Print this page
11083 support NFS server in zone
Portions contributed by: Dan Kruchinin <dan.kruchinin@nexenta.com>
Portions contributed by: Stepan Zastupov <stepan.zastupov@gmail.com>
Portions contributed by: Joyce McIntosh <joyce.mcintosh@nexenta.com>
Portions contributed by: Mike Zeller <mike@mikezeller.net>
Portions contributed by: Dan McDonald <danmcd@joyent.com>
Portions contributed by: Gordon Ross <gordon.w.ross@gmail.com>
Portions contributed by: Vitaliy Gusev <gusev.vitaliy@gmail.com>
Reviewed by: Rick McNeal <rick.mcneal@nexenta.com>
Reviewed by: Rob Gittins <rob.gittins@nexenta.com>
Reviewed by: Sanjay Nadkarni <sanjay.nadkarni@nexenta.com>
Reviewed by: Jason King <jbk@joyent.com>
Reviewed by: C Fraire <cfraire@me.com>
Change-Id: I22f289d357503f9b48a0bc2482cc4328a6d43d16
        
*** 20,31 ****
   */
  /*
   * Copyright (c) 1990, 2010, Oracle and/or its affiliates. All rights reserved.
   * Copyright (c) 2011 Bayard G. Bell. All rights reserved.
   * Copyright (c) 2013 by Delphix. All rights reserved.
-  * Copyright 2014 Nexenta Systems, Inc.  All rights reserved.
   * Copyright (c) 2017 Joyent Inc
   */
  
  /*
   *      Copyright (c) 1983,1984,1985,1986,1987,1988,1989  AT&T.
   *      All rights reserved.
--- 20,31 ----
   */
  /*
   * Copyright (c) 1990, 2010, Oracle and/or its affiliates. All rights reserved.
   * Copyright (c) 2011 Bayard G. Bell. All rights reserved.
   * Copyright (c) 2013 by Delphix. All rights reserved.
   * Copyright (c) 2017 Joyent Inc
+  * Copyright 2019 Nexenta by DDN, Inc.
   */
  
  /*
   *      Copyright (c) 1983,1984,1985,1986,1987,1988,1989  AT&T.
   *      All rights reserved.
*** 81,91 ****
  #include <nfs/export.h>
  #include <nfs/nfssys.h>
  #include <nfs/nfs_clnt.h>
  #include <nfs/nfs_acl.h>
  #include <nfs/nfs_log.h>
- #include <nfs/nfs_cmd.h>
  #include <nfs/lm.h>
  #include <nfs/nfs_dispatch.h>
  #include <nfs/nfs4_drc.h>
  
  #include <sys/modctl.h>
--- 81,90 ----
*** 107,128 ****
  
  static struct modlinkage modlinkage = {
          MODREV_1, (void *)&modlmisc, NULL
  };
  
  kmem_cache_t *nfs_xuio_cache;
  int nfs_loaned_buffers = 0;
  
  int
  _init(void)
  {
          int status;
  
!         if ((status = nfs_srvinit()) != 0) {
!                 cmn_err(CE_WARN, "_init: nfs_srvinit failed");
!                 return (status);
!         }
  
          status = mod_install((struct modlinkage *)&modlinkage);
          if (status != 0) {
                  /*
                   * Could not load module, cleanup previous
--- 106,128 ----
  
  static struct modlinkage modlinkage = {
          MODREV_1, (void *)&modlmisc, NULL
  };
  
+ zone_key_t      nfssrv_zone_key;
+ list_t          nfssrv_globals_list;
+ krwlock_t       nfssrv_globals_rwl;
+ 
  kmem_cache_t *nfs_xuio_cache;
  int nfs_loaned_buffers = 0;
  
  int
  _init(void)
  {
          int status;
  
!         nfs_srvinit();
  
          status = mod_install((struct modlinkage *)&modlinkage);
          if (status != 0) {
                  /*
                   * Could not load module, cleanup previous
*** 175,205 ****
   * modifying those routines to avoid the duplication. For now, we optimize
   * by calling exportmatch() only after checking that the dispatch routine
   * supports RPC_PUBLICFH_OK, and if the filesystem is explicitly exported
   * public (i.e., not the placeholder).
   */
! #define PUBLICFH_CHECK(disp, exi, fsid, xfid) \
                  ((disp->dis_flags & RPC_PUBLICFH_OK) && \
                  ((exi->exi_export.ex_flags & EX_PUBLIC) || \
!                 (exi == exi_public && exportmatch(exi_root, \
                  fsid, xfid))))
  
  static void     nfs_srv_shutdown_all(int);
! static void     rfs4_server_start(int);
  static void     nullfree(void);
  static void     rfs_dispatch(struct svc_req *, SVCXPRT *);
  static void     acl_dispatch(struct svc_req *, SVCXPRT *);
- static void     common_dispatch(struct svc_req *, SVCXPRT *,
-                 rpcvers_t, rpcvers_t, char *,
-                 struct rpc_disptable *);
- static void     hanfsv4_failover(void);
  static  int     checkauth(struct exportinfo *, struct svc_req *, cred_t *, int,
                  bool_t, bool_t *);
  static char     *client_name(struct svc_req *req);
  static char     *client_addr(struct svc_req *req, char *buf);
  extern  int     sec_svc_getcred(struct svc_req *, cred_t *cr, char **, int *);
  extern  bool_t  sec_svc_inrootlist(int, caddr_t, int, caddr_t *);
  
  #define NFSLOG_COPY_NETBUF(exi, xprt, nb)       {               \
          (nb)->maxlen = (xprt)->xp_rtaddr.maxlen;                \
          (nb)->len = (xprt)->xp_rtaddr.len;                      \
          (nb)->buf = kmem_alloc((nb)->len, KM_SLEEP);            \
--- 175,204 ----
   * modifying those routines to avoid the duplication. For now, we optimize
   * by calling exportmatch() only after checking that the dispatch routine
   * supports RPC_PUBLICFH_OK, and if the filesystem is explicitly exported
   * public (i.e., not the placeholder).
   */
! #define PUBLICFH_CHECK(ne, disp, exi, fsid, xfid) \
                  ((disp->dis_flags & RPC_PUBLICFH_OK) && \
                  ((exi->exi_export.ex_flags & EX_PUBLIC) || \
!                 (exi == ne->exi_public && exportmatch(ne->exi_root, \
                  fsid, xfid))))
  
  static void     nfs_srv_shutdown_all(int);
! static void     rfs4_server_start(nfs_globals_t *, int);
  static void     nullfree(void);
  static void     rfs_dispatch(struct svc_req *, SVCXPRT *);
  static void     acl_dispatch(struct svc_req *, SVCXPRT *);
  static  int     checkauth(struct exportinfo *, struct svc_req *, cred_t *, int,
                  bool_t, bool_t *);
  static char     *client_name(struct svc_req *req);
  static char     *client_addr(struct svc_req *req, char *buf);
  extern  int     sec_svc_getcred(struct svc_req *, cred_t *cr, char **, int *);
  extern  bool_t  sec_svc_inrootlist(int, caddr_t, int, caddr_t *);
+ static void     *nfs_server_zone_init(zoneid_t);
+ static void     nfs_server_zone_fini(zoneid_t, void *);
+ static void     nfs_server_zone_shutdown(zoneid_t, void *);
  
  #define NFSLOG_COPY_NETBUF(exi, xprt, nb)       {               \
          (nb)->maxlen = (xprt)->xp_rtaddr.maxlen;                \
          (nb)->len = (xprt)->xp_rtaddr.len;                      \
          (nb)->buf = kmem_alloc((nb)->len, KM_SLEEP);            \
*** 246,303 ****
  };
  
  static SVC_CALLOUT_TABLE nfs_sct_rdma = {
          sizeof (__nfs_sc_rdma) / sizeof (__nfs_sc_rdma[0]), FALSE, __nfs_sc_rdma
  };
- rpcvers_t nfs_versmin = NFS_VERSMIN_DEFAULT;
- rpcvers_t nfs_versmax = NFS_VERSMAX_DEFAULT;
  
  /*
-  * Used to track the state of the server so that initialization
-  * can be done properly.
-  */
- typedef enum {
-         NFS_SERVER_STOPPED,     /* server state destroyed */
-         NFS_SERVER_STOPPING,    /* server state being destroyed */
-         NFS_SERVER_RUNNING,
-         NFS_SERVER_QUIESCED,    /* server state preserved */
-         NFS_SERVER_OFFLINE      /* server pool offline */
- } nfs_server_running_t;
- 
- static nfs_server_running_t nfs_server_upordown;
- static kmutex_t nfs_server_upordown_lock;
- static  kcondvar_t nfs_server_upordown_cv;
- 
- /*
   * DSS: distributed stable storage
   * lists of all DSS paths: current, and before last warmstart
   */
  nvlist_t *rfs4_dss_paths, *rfs4_dss_oldpaths;
  
  int rfs4_dispatch(struct rpcdisp *, struct svc_req *, SVCXPRT *, char *);
  bool_t rfs4_minorvers_mismatch(struct svc_req *, SVCXPRT *, void *);
  
  /*
!  * RDMA wait variables.
   */
! static kcondvar_t rdma_wait_cv;
! static kmutex_t rdma_wait_mutex;
  
  /*
   * Will be called at the point the server pool is being unregistered
   * from the pool list. From that point onwards, the pool is waiting
   * to be drained and as such the server state is stale and pertains
   * to the old instantiation of the NFS server pool.
   */
  void
  nfs_srv_offline(void)
  {
!         mutex_enter(&nfs_server_upordown_lock);
!         if (nfs_server_upordown == NFS_SERVER_RUNNING) {
!                 nfs_server_upordown = NFS_SERVER_OFFLINE;
          }
!         mutex_exit(&nfs_server_upordown_lock);
  }
  
  /*
   * Will be called at the point the server pool is being destroyed so
   * all transports have been closed and no service threads are in
--- 245,302 ----
  };
  
  static SVC_CALLOUT_TABLE nfs_sct_rdma = {
          sizeof (__nfs_sc_rdma) / sizeof (__nfs_sc_rdma[0]), FALSE, __nfs_sc_rdma
  };
  
  /*
   * DSS: distributed stable storage
   * lists of all DSS paths: current, and before last warmstart
   */
  nvlist_t *rfs4_dss_paths, *rfs4_dss_oldpaths;
  
  int rfs4_dispatch(struct rpcdisp *, struct svc_req *, SVCXPRT *, char *);
  bool_t rfs4_minorvers_mismatch(struct svc_req *, SVCXPRT *, void *);
  
  /*
!  * Stash NFS zone globals in TSD to avoid some lock contention
!  * from frequent zone_getspecific calls.
   */
! static uint_t nfs_server_tsd_key;
  
+ nfs_globals_t *
+ nfs_srv_getzg(void)
+ {
+         nfs_globals_t *ng;
+ 
+         ng = tsd_get(nfs_server_tsd_key);
+         if (ng == NULL) {
+                 ng = zone_getspecific(nfssrv_zone_key, curzone);
+                 (void) tsd_set(nfs_server_tsd_key, ng);
+         }
+ 
+         return (ng);
+ }
+ 
  /*
   * Will be called at the point the server pool is being unregistered
   * from the pool list. From that point onwards, the pool is waiting
   * to be drained and as such the server state is stale and pertains
   * to the old instantiation of the NFS server pool.
   */
  void
  nfs_srv_offline(void)
  {
!         nfs_globals_t *ng;
! 
!         ng = nfs_srv_getzg();
! 
!         mutex_enter(&ng->nfs_server_upordown_lock);
!         if (ng->nfs_server_upordown == NFS_SERVER_RUNNING) {
!                 ng->nfs_server_upordown = NFS_SERVER_OFFLINE;
          }
!         mutex_exit(&ng->nfs_server_upordown_lock);
  }
  
  /*
   * Will be called at the point the server pool is being destroyed so
   * all transports have been closed and no service threads are in
*** 322,358 ****
          int quiesce = 1;
          nfs_srv_shutdown_all(quiesce);
  }
  
  static void
! nfs_srv_shutdown_all(int quiesce) {
!         mutex_enter(&nfs_server_upordown_lock);
          if (quiesce) {
!                 if (nfs_server_upordown == NFS_SERVER_RUNNING ||
!                         nfs_server_upordown == NFS_SERVER_OFFLINE) {
!                         nfs_server_upordown = NFS_SERVER_QUIESCED;
!                         cv_signal(&nfs_server_upordown_cv);
  
!                         /* reset DSS state, for subsequent warm restart */
                          rfs4_dss_numnewpaths = 0;
                          rfs4_dss_newpaths = NULL;
  
                          cmn_err(CE_NOTE, "nfs_server: server is now quiesced; "
                              "NFSv4 state has been preserved");
                  }
          } else {
!                 if (nfs_server_upordown == NFS_SERVER_OFFLINE) {
!                         nfs_server_upordown = NFS_SERVER_STOPPING;
!                         mutex_exit(&nfs_server_upordown_lock);
!                         rfs4_state_fini();
!                         rfs4_fini_drc(nfs4_drc);
!                         mutex_enter(&nfs_server_upordown_lock);
!                         nfs_server_upordown = NFS_SERVER_STOPPED;
!                         cv_signal(&nfs_server_upordown_cv);
                  }
          }
!         mutex_exit(&nfs_server_upordown_lock);
  }
  
  static int
  nfs_srv_set_sc_versions(struct file *fp, SVC_CALLOUT_TABLE **sctpp,
                          rpcvers_t versmin, rpcvers_t versmax)
--- 321,365 ----
          int quiesce = 1;
          nfs_srv_shutdown_all(quiesce);
  }
  
  static void
! nfs_srv_shutdown_all(int quiesce)
! {
!         nfs_globals_t *ng = nfs_srv_getzg();
! 
!         mutex_enter(&ng->nfs_server_upordown_lock);
          if (quiesce) {
!                 if (ng->nfs_server_upordown == NFS_SERVER_RUNNING ||
!                     ng->nfs_server_upordown == NFS_SERVER_OFFLINE) {
!                         ng->nfs_server_upordown = NFS_SERVER_QUIESCED;
!                         cv_signal(&ng->nfs_server_upordown_cv);
  
!                         /* reset DSS state */
                          rfs4_dss_numnewpaths = 0;
                          rfs4_dss_newpaths = NULL;
  
                          cmn_err(CE_NOTE, "nfs_server: server is now quiesced; "
                              "NFSv4 state has been preserved");
                  }
          } else {
!                 if (ng->nfs_server_upordown == NFS_SERVER_OFFLINE) {
!                         ng->nfs_server_upordown = NFS_SERVER_STOPPING;
!                         mutex_exit(&ng->nfs_server_upordown_lock);
!                         rfs4_state_zone_fini();
!                         rfs4_fini_drc();
!                         mutex_enter(&ng->nfs_server_upordown_lock);
!                         ng->nfs_server_upordown = NFS_SERVER_STOPPED;
! 
!                         /* reset DSS state */
!                         rfs4_dss_numnewpaths = 0;
!                         rfs4_dss_newpaths = NULL;
! 
!                         cv_signal(&ng->nfs_server_upordown_cv);
                  }
          }
!         mutex_exit(&ng->nfs_server_upordown_lock);
  }
  
  static int
  nfs_srv_set_sc_versions(struct file *fp, SVC_CALLOUT_TABLE **sctpp,
      rpcvers_t versmin, rpcvers_t versmax)
*** 416,425 ****
--- 423,433 ----
   * uap->fd is the fd of an open transport provider
   */
  int
  nfs_svc(struct nfs_svc_args *arg, model_t model)
  {
+         nfs_globals_t *ng;
          file_t *fp;
          SVCMASTERXPRT *xprt;
          int error;
          int readsize;
          char buf[KNC_STRSIZE];
*** 430,446 ****
--- 438,459 ----
  
  #ifdef lint
          model = model;          /* STRUCT macros don't always refer to it */
  #endif
  
+         ng = nfs_srv_getzg();
          STRUCT_SET_HANDLE(uap, model, arg);
  
          /* Check privileges in nfssys() */
  
          if ((fp = getf(STRUCT_FGET(uap, fd))) == NULL)
                  return (EBADF);
  
+         /* Setup global file handle in nfs_export */
+         if ((error = nfs_export_get_rootfh(ng)) != 0)
+                 return (error);
+ 
          /*
           * Set read buffer size to rsize
           * and add room for RPC headers.
           */
          readsize = nfs3tsize() + (RPC_MAXDATASIZE - NFS_MAXDATA);
*** 463,493 ****
                  releasef(STRUCT_FGET(uap, fd));
                  kmem_free(addrmask.buf, addrmask.maxlen);
                  return (error);
          }
  
!         nfs_versmin = STRUCT_FGET(uap, versmin);
!         nfs_versmax = STRUCT_FGET(uap, versmax);
  
          /* Double check the vers min/max ranges */
!         if ((nfs_versmin > nfs_versmax) ||
!             (nfs_versmin < NFS_VERSMIN) ||
!             (nfs_versmax > NFS_VERSMAX)) {
!                 nfs_versmin = NFS_VERSMIN_DEFAULT;
!                 nfs_versmax = NFS_VERSMAX_DEFAULT;
          }
  
!         if (error =
!             nfs_srv_set_sc_versions(fp, &sctp, nfs_versmin, nfs_versmax)) {
                  releasef(STRUCT_FGET(uap, fd));
                  kmem_free(addrmask.buf, addrmask.maxlen);
                  return (error);
          }
  
          /* Initialize nfsv4 server */
!         if (nfs_versmax == (rpcvers_t)NFS_V4)
!                 rfs4_server_start(STRUCT_FGET(uap, delegation));
  
          /* Create a transport handle. */
          error = svc_tli_kcreate(fp, readsize, buf, &addrmask, &xprt,
              sctp, NULL, NFS_SVCPOOL_ID, TRUE);
  
--- 476,506 ----
                  releasef(STRUCT_FGET(uap, fd));
                  kmem_free(addrmask.buf, addrmask.maxlen);
                  return (error);
          }
  
!         ng->nfs_versmin = STRUCT_FGET(uap, versmin);
!         ng->nfs_versmax = STRUCT_FGET(uap, versmax);
  
          /* Double check the vers min/max ranges */
!         if ((ng->nfs_versmin > ng->nfs_versmax) ||
!             (ng->nfs_versmin < NFS_VERSMIN) ||
!             (ng->nfs_versmax > NFS_VERSMAX)) {
!                 ng->nfs_versmin = NFS_VERSMIN_DEFAULT;
!                 ng->nfs_versmax = NFS_VERSMAX_DEFAULT;
          }
  
!         if (error = nfs_srv_set_sc_versions(fp, &sctp, ng->nfs_versmin,
!             ng->nfs_versmax)) {
                  releasef(STRUCT_FGET(uap, fd));
                  kmem_free(addrmask.buf, addrmask.maxlen);
                  return (error);
          }
  
          /* Initialize nfsv4 server */
!         if (ng->nfs_versmax == (rpcvers_t)NFS_V4)
!                 rfs4_server_start(ng, STRUCT_FGET(uap, delegation));
  
          /* Create a transport handle. */
          error = svc_tli_kcreate(fp, readsize, buf, &addrmask, &xprt,
              sctp, NULL, NFS_SVCPOOL_ID, TRUE);
  
*** 502,573 ****
  
          return (error);
  }
  
  static void
! rfs4_server_start(int nfs4_srv_delegation)
  {
          /*
           * Determine if the server has previously been "started" and
           * if not, do the per instance initialization
           */
!         mutex_enter(&nfs_server_upordown_lock);
  
!         if (nfs_server_upordown != NFS_SERVER_RUNNING) {
                  /* Do we need to stop and wait on the previous server? */
!                 while (nfs_server_upordown == NFS_SERVER_STOPPING ||
!                     nfs_server_upordown == NFS_SERVER_OFFLINE)
!                         cv_wait(&nfs_server_upordown_cv,
!                             &nfs_server_upordown_lock);
  
!                 if (nfs_server_upordown != NFS_SERVER_RUNNING) {
                          (void) svc_pool_control(NFS_SVCPOOL_ID,
                              SVCPSET_UNREGISTER_PROC, (void *)&nfs_srv_offline);
                          (void) svc_pool_control(NFS_SVCPOOL_ID,
                              SVCPSET_SHUTDOWN_PROC, (void *)&nfs_srv_stop_all);
  
!                         /* is this an nfsd warm start? */
!                         if (nfs_server_upordown == NFS_SERVER_QUIESCED) {
!                                 cmn_err(CE_NOTE, "nfs_server: "
!                                     "server was previously quiesced; "
!                                     "existing NFSv4 state will be re-used");
  
!                                 /*
!                                  * HA-NFSv4: this is also the signal
!                                  * that a Resource Group failover has
!                                  * occurred.
!                                  */
!                                 if (cluster_bootflags & CLUSTER_BOOTED)
!                                         hanfsv4_failover();
!                         } else {
!                                 /* cold start */
!                                 rfs4_state_init();
!                                 nfs4_drc = rfs4_init_drc(nfs4_drc_max,
!                                     nfs4_drc_hash);
                          }
! 
!                         /*
!                          * Check to see if delegation is to be
!                          * enabled at the server
!                          */
!                         if (nfs4_srv_delegation != FALSE)
!                                 rfs4_set_deleg_policy(SRV_NORMAL_DELEGATE);
! 
!                         nfs_server_upordown = NFS_SERVER_RUNNING;
                  }
!                 cv_signal(&nfs_server_upordown_cv);
!         }
!         mutex_exit(&nfs_server_upordown_lock);
  }
  
  /*
   * If RDMA device available,
   * start RDMA listener.
   */
  int
  rdma_start(struct rdma_svc_args *rsa)
  {
          int error;
          rdma_xprt_group_t started_rdma_xprts;
          rdma_stat stat;
          int svc_state = 0;
  
--- 515,564 ----
  
          return (error);
  }
  
  static void
! rfs4_server_start(nfs_globals_t *ng, int nfs4_srv_delegation)
  {
          /*
           * Determine if the server has previously been "started" and
           * if not, do the per instance initialization
           */
!         mutex_enter(&ng->nfs_server_upordown_lock);
  
!         if (ng->nfs_server_upordown != NFS_SERVER_RUNNING) {
                  /* Do we need to stop and wait on the previous server? */
!                 while (ng->nfs_server_upordown == NFS_SERVER_STOPPING ||
!                     ng->nfs_server_upordown == NFS_SERVER_OFFLINE)
!                         cv_wait(&ng->nfs_server_upordown_cv,
!                             &ng->nfs_server_upordown_lock);
  
!                 if (ng->nfs_server_upordown != NFS_SERVER_RUNNING) {
                          (void) svc_pool_control(NFS_SVCPOOL_ID,
                              SVCPSET_UNREGISTER_PROC, (void *)&nfs_srv_offline);
                          (void) svc_pool_control(NFS_SVCPOOL_ID,
                              SVCPSET_SHUTDOWN_PROC, (void *)&nfs_srv_stop_all);
  
!                         rfs4_do_server_start(ng->nfs_server_upordown,
!                             nfs4_srv_delegation,
!                             cluster_bootflags & CLUSTER_BOOTED);
  
!                         ng->nfs_server_upordown = NFS_SERVER_RUNNING;
                  }
!                 cv_signal(&ng->nfs_server_upordown_cv);
          }
!         mutex_exit(&ng->nfs_server_upordown_lock);
  }
  
  /*
   * If RDMA device available,
   * start RDMA listener.
   */
  int
  rdma_start(struct rdma_svc_args *rsa)
  {
+         nfs_globals_t *ng;
          int error;
          rdma_xprt_group_t started_rdma_xprts;
          rdma_stat stat;
          int svc_state = 0;
  
*** 576,588 ****
              (rsa->nfs_versmin < NFS_VERSMIN) ||
              (rsa->nfs_versmax > NFS_VERSMAX)) {
                  rsa->nfs_versmin = NFS_VERSMIN_DEFAULT;
                  rsa->nfs_versmax = NFS_VERSMAX_DEFAULT;
          }
-         nfs_versmin = rsa->nfs_versmin;
-         nfs_versmax = rsa->nfs_versmax;
  
          /* Set the versions in the callout table */
          __nfs_sc_rdma[0].sc_versmin = rsa->nfs_versmin;
          __nfs_sc_rdma[0].sc_versmax = rsa->nfs_versmax;
          /* For the NFS_ACL program, check the max version */
          __nfs_sc_rdma[1].sc_versmin = rsa->nfs_versmin;
--- 567,581 ----
              (rsa->nfs_versmin < NFS_VERSMIN) ||
              (rsa->nfs_versmax > NFS_VERSMAX)) {
                  rsa->nfs_versmin = NFS_VERSMIN_DEFAULT;
                  rsa->nfs_versmax = NFS_VERSMAX_DEFAULT;
          }
  
+         ng = nfs_srv_getzg();
+         ng->nfs_versmin = rsa->nfs_versmin;
+         ng->nfs_versmax = rsa->nfs_versmax;
+ 
          /* Set the versions in the callout table */
          __nfs_sc_rdma[0].sc_versmin = rsa->nfs_versmin;
          __nfs_sc_rdma[0].sc_versmax = rsa->nfs_versmax;
          /* For the NFS_ACL program, check the max version */
          __nfs_sc_rdma[1].sc_versmin = rsa->nfs_versmin;
*** 591,601 ****
          else
                  __nfs_sc_rdma[1].sc_versmax = rsa->nfs_versmax;
  
          /* Initialize nfsv4 server */
          if (rsa->nfs_versmax == (rpcvers_t)NFS_V4)
!                 rfs4_server_start(rsa->delegation);
  
          started_rdma_xprts.rtg_count = 0;
          started_rdma_xprts.rtg_listhead = NULL;
          started_rdma_xprts.rtg_poolid = rsa->poolid;
  
--- 584,594 ----
          else
                  __nfs_sc_rdma[1].sc_versmax = rsa->nfs_versmax;
  
          /* Initialize nfsv4 server */
          if (rsa->nfs_versmax == (rpcvers_t)NFS_V4)
!                 rfs4_server_start(ng, rsa->delegation);
  
          started_rdma_xprts.rtg_count = 0;
          started_rdma_xprts.rtg_listhead = NULL;
          started_rdma_xprts.rtg_poolid = rsa->poolid;
  
*** 608,618 ****
          while (!error) {
  
                  /*
                   * wait till either interrupted by a signal on
                   * nfs service stop/restart or signalled by a
!                  * rdma plugin attach/detatch.
                   */
  
                  stat = rdma_kwait();
  
                  /*
--- 601,611 ----
          while (!error) {
  
                  /*
                   * wait till either interrupted by a signal on
                   * nfs service stop/restart or signalled by a
!                  * rdma attach/detatch.
                   */
  
                  stat = rdma_kwait();
  
                  /*
*** 659,672 ****
  /* ARGSUSED */
  void
  rpc_null_v3(caddr_t *argp, caddr_t *resp, struct exportinfo *exi,
      struct svc_req *req, cred_t *cr, bool_t ro)
  {
!         DTRACE_NFSV3_3(op__null__start, struct svc_req *, req,
!             cred_t *, cr, vnode_t *, NULL);
!         DTRACE_NFSV3_3(op__null__done, struct svc_req *, req,
!             cred_t *, cr, vnode_t *, NULL);
  }
  
  /* ARGSUSED */
  static void
  rfs_error(caddr_t *argp, caddr_t *resp, struct exportinfo *exi,
--- 652,665 ----
  /* ARGSUSED */
  void
  rpc_null_v3(caddr_t *argp, caddr_t *resp, struct exportinfo *exi,
      struct svc_req *req, cred_t *cr, bool_t ro)
  {
!         DTRACE_NFSV3_4(op__null__start, struct svc_req *, req,
!             cred_t *, cr, vnode_t *, NULL, struct exportinfo *, exi);
!         DTRACE_NFSV3_4(op__null__done, struct svc_req *, req,
!             cred_t *, cr, vnode_t *, NULL, struct exportinfo *, exi);
  }
  
  /* ARGSUSED */
  static void
  rfs_error(caddr_t *argp, caddr_t *resp, struct exportinfo *exi,
*** 1340,1356 ****
  };
  
  static struct rpc_disptable rfs_disptable[] = {
          {sizeof (rfsdisptab_v2) / sizeof (rfsdisptab_v2[0]),
              rfscallnames_v2,
!             &rfsproccnt_v2_ptr, rfsdisptab_v2},
          {sizeof (rfsdisptab_v3) / sizeof (rfsdisptab_v3[0]),
              rfscallnames_v3,
!             &rfsproccnt_v3_ptr, rfsdisptab_v3},
          {sizeof (rfsdisptab_v4) / sizeof (rfsdisptab_v4[0]),
              rfscallnames_v4,
!             &rfsproccnt_v4_ptr, rfsdisptab_v4},
  };
  
  /*
   * If nfs_portmon is set, then clients are required to use privileged
   * ports (ports < IPPORT_RESERVED) in order to get NFS services.
--- 1333,1349 ----
  };
  
  static struct rpc_disptable rfs_disptable[] = {
          {sizeof (rfsdisptab_v2) / sizeof (rfsdisptab_v2[0]),
              rfscallnames_v2,
!             rfsdisptab_v2},
          {sizeof (rfsdisptab_v3) / sizeof (rfsdisptab_v3[0]),
              rfscallnames_v3,
!             rfsdisptab_v3},
          {sizeof (rfsdisptab_v4) / sizeof (rfsdisptab_v4[0]),
              rfscallnames_v4,
!             rfsdisptab_v4},
  };
  
  /*
   * If nfs_portmon is set, then clients are required to use privileged
   * ports (ports < IPPORT_RESERVED) in order to get NFS services.
*** 1365,1375 ****
  #ifdef DEBUG
  static int cred_hits = 0;
  static int cred_misses = 0;
  #endif
  
- 
  #ifdef DEBUG
  /*
   * Debug code to allow disabling of rfs_dispatch() use of
   * fastxdrargs() and fastxdrres() calls for testing purposes.
   */
--- 1358,1367 ----
*** 1469,1483 ****
                          return (TRUE);
          }
          return (FALSE);
  }
  
- 
  static void
  common_dispatch(struct svc_req *req, SVCXPRT *xprt, rpcvers_t min_vers,
!                 rpcvers_t max_vers, char *pgmname,
!                 struct rpc_disptable *disptable)
  {
          int which;
          rpcvers_t vers;
          char *args;
          union {
--- 1461,1473 ----
                          return (TRUE);
          }
          return (FALSE);
  }
  
  static void
  common_dispatch(struct svc_req *req, SVCXPRT *xprt, rpcvers_t min_vers,
!     rpcvers_t max_vers, char *pgmname, struct rpc_disptable *disptable)
  {
          int which;
          rpcvers_t vers;
          char *args;
          union {
*** 1506,1518 ****
--- 1496,1517 ----
          bool_t logging_enabled = FALSE;
          struct exportinfo *nfslog_exi = NULL;
          char **procnames;
          char cbuf[INET6_ADDRSTRLEN];    /* to hold both IPv4 and IPv6 addr */
          bool_t ro = FALSE;
+         nfs_globals_t *ng = nfs_srv_getzg();
+         nfs_export_t *ne = ng->nfs_export;
+         kstat_named_t *svstat, *procstat;
  
+         ASSERT(req->rq_prog == NFS_PROGRAM || req->rq_prog == NFS_ACL_PROGRAM);
+ 
          vers = req->rq_vers;
  
+         svstat = ng->svstat[req->rq_vers];
+         procstat = (req->rq_prog == NFS_PROGRAM) ?
+             ng->rfsproccnt[vers] : ng->aclproccnt[vers];
+ 
          if (vers < min_vers || vers > max_vers) {
                  svcerr_progvers(req->rq_xprt, min_vers, max_vers);
                  error++;
                  cmn_err(CE_NOTE, "%s: bad version number %u", pgmname, vers);
                  goto done;
*** 1524,1534 ****
                  svcerr_noproc(req->rq_xprt);
                  error++;
                  goto done;
          }
  
!         (*(disptable[(int)vers].dis_proccntp))[which].value.ui64++;
  
          disp = &disptable[(int)vers].dis_table[which];
          procnames = disptable[(int)vers].dis_procnames;
  
          auth_flavor = req->rq_cred.oa_flavor;
--- 1523,1533 ----
                  svcerr_noproc(req->rq_xprt);
                  error++;
                  goto done;
          }
  
!         procstat[which].value.ui64++;
  
          disp = &disptable[(int)vers].dis_table[which];
          procnames = disptable[(int)vers].dis_procnames;
  
          auth_flavor = req->rq_cred.oa_flavor;
*** 1630,1646 ****
--- 1629,1647 ----
                          anon_ok = 0;
  
                  cr = xprt->xp_cred;
                  ASSERT(cr != NULL);
  #ifdef DEBUG
+                 {
                          if (crgetref(cr) != 1) {
                                  crfree(cr);
                                  cr = crget();
                                  xprt->xp_cred = cr;
                                  cred_misses++;
                          } else
                                  cred_hits++;
+                 }
  #else
                  if (crgetref(cr) != 1) {
                          crfree(cr);
                          cr = crget();
                          xprt->xp_cred = cr;
*** 1648,1658 ****
  #endif
  
                  exi = checkexport(fsid, xfid);
  
                  if (exi != NULL) {
!                         publicfh_ok = PUBLICFH_CHECK(disp, exi, fsid, xfid);
  
                          /*
                           * Don't allow non-V4 clients access
                           * to pseudo exports
                           */
--- 1649,1659 ----
  #endif
  
                  exi = checkexport(fsid, xfid);
  
                  if (exi != NULL) {
!                         publicfh_ok = PUBLICFH_CHECK(ne, disp, exi, fsid, xfid);
  
                          /*
                           * Don't allow non-V4 clients access
                           * to pseudo exports
                           */
*** 1761,1771 ****
           * the later writing of the log record.  This is done for
           * the case that a lookup is done across a non-logged public
           * file system.
           */
          if (nfslog_buffer_list != NULL) {
!                 nfslog_exi = nfslog_get_exi(exi, req, res, &nfslog_rec_id);
                  /*
                   * Is logging enabled?
                   */
                  logging_enabled = (nfslog_exi != NULL);
  
--- 1762,1772 ----
           * the later writing of the log record.  This is done for
           * the case that a lookup is done across a non-logged public
           * file system.
           */
          if (nfslog_buffer_list != NULL) {
!                 nfslog_exi = nfslog_get_exi(ne, exi, req, res, &nfslog_rec_id);
                  /*
                   * Is logging enabled?
                   */
                  logging_enabled = (nfslog_exi != NULL);
  
*** 1844,1856 ****
          }
  
          if (exi != NULL)
                  exi_rele(exi);
  
!         global_svstat_ptr[req->rq_vers][NFS_BADCALLS].value.ui64 += error;
! 
!         global_svstat_ptr[req->rq_vers][NFS_CALLS].value.ui64++;
  }
  
  static void
  rfs_dispatch(struct svc_req *req, SVCXPRT *xprt)
  {
--- 1845,1856 ----
          }
  
          if (exi != NULL)
                  exi_rele(exi);
  
!         svstat[NFS_BADCALLS].value.ui64 += error;
!         svstat[NFS_CALLS].value.ui64++;
  }
  
  static void
  rfs_dispatch(struct svc_req *req, SVCXPRT *xprt)
  {
*** 1969,1982 ****
  };
  
  static struct rpc_disptable acl_disptable[] = {
          {sizeof (acldisptab_v2) / sizeof (acldisptab_v2[0]),
                  aclcallnames_v2,
!                 &aclproccnt_v2_ptr, acldisptab_v2},
          {sizeof (acldisptab_v3) / sizeof (acldisptab_v3[0]),
                  aclcallnames_v3,
!                 &aclproccnt_v3_ptr, acldisptab_v3},
  };
  
  static void
  acl_dispatch(struct svc_req *req, SVCXPRT *xprt)
  {
--- 1969,1982 ----
  };
  
  static struct rpc_disptable acl_disptable[] = {
          {sizeof (acldisptab_v2) / sizeof (acldisptab_v2[0]),
                  aclcallnames_v2,
!                 acldisptab_v2},
          {sizeof (acldisptab_v3) / sizeof (acldisptab_v3[0]),
                  aclcallnames_v3,
!                 acldisptab_v3},
  };
  
  static void
  acl_dispatch(struct svc_req *req, SVCXPRT *xprt)
  {
*** 2566,2600 ****
   * once.  It performs the following tasks:
   *      - Call sub-initialization routines (localize access to variables)
   *      - Initialize all locks
   *      - initialize the version 3 write verifier
   */
! int
  nfs_srvinit(void)
  {
-         int error;
  
!         error = nfs_exportinit();
!         if (error != 0)
!                 return (error);
!         error = rfs4_srvrinit();
!         if (error != 0) {
!                 nfs_exportfini();
!                 return (error);
!         }
          rfs_srvrinit();
          rfs3_srvrinit();
          nfsauth_init();
  
!         /* Init the stuff to control start/stop */
!         nfs_server_upordown = NFS_SERVER_STOPPED;
!         mutex_init(&nfs_server_upordown_lock, NULL, MUTEX_DEFAULT, NULL);
!         cv_init(&nfs_server_upordown_cv, NULL, CV_DEFAULT, NULL);
!         mutex_init(&rdma_wait_mutex, NULL, MUTEX_DEFAULT, NULL);
!         cv_init(&rdma_wait_cv, NULL, CV_DEFAULT, NULL);
! 
!         return (0);
  }
  
  /*
   * NFS Server finalization routine. This routine is called to cleanup the
   * initialization work previously performed if the NFS server module could
--- 2566,2598 ----
   * once.  It performs the following tasks:
   *      - Call sub-initialization routines (localize access to variables)
   *      - Initialize all locks
   *      - initialize the version 3 write verifier
   */
! void
  nfs_srvinit(void)
  {
  
!         /* Truly global stuff in this module (not per zone) */
!         rw_init(&nfssrv_globals_rwl, NULL, RW_DEFAULT, NULL);
!         list_create(&nfssrv_globals_list, sizeof (nfs_globals_t),
!             offsetof(nfs_globals_t, nfs_g_link));
!         tsd_create(&nfs_server_tsd_key, NULL);
! 
!         /* The order here is important */
!         nfs_exportinit();
          rfs_srvrinit();
          rfs3_srvrinit();
+         rfs4_srvrinit();
          nfsauth_init();
  
!         /*
!          * NFS server zone-specific global variables
!          * Note the zone_init is called for the GZ here.
!          */
!         zone_key_create(&nfssrv_zone_key, nfs_server_zone_init,
!             nfs_server_zone_shutdown, nfs_server_zone_fini);
  }
  
  /*
   * NFS Server finalization routine. This routine is called to cleanup the
   * initialization work previously performed if the NFS server module could
*** 2601,2625 ****
   * not be loaded correctly.
   */
  void
  nfs_srvfini(void)
  {
          nfsauth_fini();
          rfs3_srvrfini();
          rfs_srvrfini();
          nfs_exportfini();
  
!         mutex_destroy(&nfs_server_upordown_lock);
!         cv_destroy(&nfs_server_upordown_cv);
!         mutex_destroy(&rdma_wait_mutex);
!         cv_destroy(&rdma_wait_cv);
  }
  
  /*
!  * Set up an iovec array of up to cnt pointers.
   */
  
  void
  mblk_to_iov(mblk_t *m, int cnt, struct iovec *iovp)
  {
          while (m != NULL && cnt-- > 0) {
                  iovp->iov_base = (caddr_t)m->b_rptr;
--- 2599,2728 ----
   * not be loaded correctly.
   */
  void
  nfs_srvfini(void)
  {
+ 
+         /*
+          * NFS server zone-specific global variables
+          * Note the zone_fini is called for the GZ here.
+          */
+         (void) zone_key_delete(nfssrv_zone_key);
+ 
+         /* The order here is important (reverse of init) */
          nfsauth_fini();
+         rfs4_srvrfini();
          rfs3_srvrfini();
          rfs_srvrfini();
          nfs_exportfini();
  
!         /* Truly global stuff in this module (not per zone) */
!         tsd_destroy(&nfs_server_tsd_key);
!         list_destroy(&nfssrv_globals_list);
!         rw_destroy(&nfssrv_globals_rwl);
  }
  
  /*
!  * Zone init, shutdown, fini functions for the NFS server
!  *
!  * This design is careful to create the entire hierarhcy of
!  * NFS server "globals" (including those created by various
!  * per-module *_zone_init functions, etc.) so that all these
!  * objects have exactly the same lifetime.
!  *
!  * These objects are also kept on a list for two reasons:
!  * 1: It makes finding these in mdb _much_ easier.
!  * 2: It allows operating across all zone globals for
!  *    functions like nfs_auth.c:exi_cache_reclaim
   */
+ static void *
+ nfs_server_zone_init(zoneid_t zoneid)
+ {
+         nfs_globals_t *ng;
  
+         ng = kmem_zalloc(sizeof (*ng), KM_SLEEP);
+ 
+         ng->nfs_versmin = NFS_VERSMIN_DEFAULT;
+         ng->nfs_versmax = NFS_VERSMAX_DEFAULT;
+ 
+         /* Init the stuff to control start/stop */
+         ng->nfs_server_upordown = NFS_SERVER_STOPPED;
+         mutex_init(&ng->nfs_server_upordown_lock, NULL, MUTEX_DEFAULT, NULL);
+         cv_init(&ng->nfs_server_upordown_cv, NULL, CV_DEFAULT, NULL);
+         mutex_init(&ng->rdma_wait_mutex, NULL, MUTEX_DEFAULT, NULL);
+         cv_init(&ng->rdma_wait_cv, NULL, CV_DEFAULT, NULL);
+ 
+         ng->nfs_zoneid = zoneid;
+ 
+         /*
+          * Order here is important.
+          * export init must precede srv init calls.
+          */
+         nfs_export_zone_init(ng);
+         rfs_stat_zone_init(ng);
+         rfs_srv_zone_init(ng);
+         rfs3_srv_zone_init(ng);
+         rfs4_srv_zone_init(ng);
+         nfsauth_zone_init(ng);
+ 
+         rw_enter(&nfssrv_globals_rwl, RW_WRITER);
+         list_insert_tail(&nfssrv_globals_list, ng);
+         rw_exit(&nfssrv_globals_rwl);
+ 
+         return (ng);
+ }
+ 
+ /* ARGSUSED */
+ static void
+ nfs_server_zone_shutdown(zoneid_t zoneid, void *data)
+ {
+         nfs_globals_t *ng;
+ 
+         ng = (nfs_globals_t *)data;
+ 
+         /*
+          * Order is like _fini, but only
+          * some modules need this hook.
+          */
+         nfsauth_zone_shutdown(ng);
+         nfs_export_zone_shutdown(ng);
+ }
+ 
+ /* ARGSUSED */
+ static void
+ nfs_server_zone_fini(zoneid_t zoneid, void *data)
+ {
+         nfs_globals_t *ng;
+ 
+         ng = (nfs_globals_t *)data;
+ 
+         rw_enter(&nfssrv_globals_rwl, RW_WRITER);
+         list_remove(&nfssrv_globals_list, ng);
+         rw_exit(&nfssrv_globals_rwl);
+ 
+         /*
+          * Order here is important.
+          * reverse order from init
+          */
+         nfsauth_zone_fini(ng);
+         rfs4_srv_zone_fini(ng);
+         rfs3_srv_zone_fini(ng);
+         rfs_srv_zone_fini(ng);
+         rfs_stat_zone_fini(ng);
+         nfs_export_zone_fini(ng);
+ 
+         mutex_destroy(&ng->nfs_server_upordown_lock);
+         cv_destroy(&ng->nfs_server_upordown_cv);
+         mutex_destroy(&ng->rdma_wait_mutex);
+         cv_destroy(&ng->rdma_wait_cv);
+ 
+         kmem_free(ng, sizeof (*ng));
+ }
+ 
+ /*
+  * Set up an iovec array of up to cnt pointers.
+  */
  void
  mblk_to_iov(mblk_t *m, int cnt, struct iovec *iovp)
  {
          while (m != NULL && cnt-- > 0) {
                  iovp->iov_base = (caddr_t)m->b_rptr;
*** 2853,2867 ****
--- 2956,2972 ----
                           */
  
                          /* Release the reference on the old exi value */
                          ASSERT(*exi != NULL);
                          exi_rele(*exi);
+                         *exi = NULL;
  
                          if (error = nfs_check_vpexi(mc_dvp, *vpp, kcred, exi)) {
                                  VN_RELE(*vpp);
                                  goto publicfh_done;
                          }
+                         /* Have a new *exi */
                  }
          }
  
  publicfh_done:
          if (mc_dvp)
*** 2884,2901 ****
  {
          char namebuf[TYPICALMAXPATHLEN];
          struct pathname pn;
          int error;
  
          /*
           * If pathname starts with '/', then set startdvp to root.
           */
          if (*path == '/') {
                  while (*path == '/')
                          path++;
  
!                 startdvp = rootdir;
          }
  
          error = pn_get_buf(path, UIO_SYSSPACE, &pn, namebuf, sizeof (namebuf));
          if (error == 0) {
                  /*
--- 2989,3008 ----
  {
          char namebuf[TYPICALMAXPATHLEN];
          struct pathname pn;
          int error;
  
+         ASSERT3U(crgetzoneid(cr), ==, curzone->zone_id);
+ 
          /*
           * If pathname starts with '/', then set startdvp to root.
           */
          if (*path == '/') {
                  while (*path == '/')
                          path++;
  
!                 startdvp = ZONE_ROOTVP();
          }
  
          error = pn_get_buf(path, UIO_SYSSPACE, &pn, namebuf, sizeof (namebuf));
          if (error == 0) {
                  /*
*** 2914,2924 ****
                          if ((pn.pn_pathlen = strlen(pn.pn_path)) == 0)
                                  return (ENOENT);
                  }
                  VN_HOLD(startdvp);
                  error = lookuppnvp(&pn, NULL, NO_FOLLOW, dirvpp, compvpp,
!                     rootdir, startdvp, cr);
          }
          if (error == ENAMETOOLONG) {
                  /*
                   * This thread used a pathname > TYPICALMAXPATHLEN bytes long.
                   */
--- 3021,3031 ----
                          if ((pn.pn_pathlen = strlen(pn.pn_path)) == 0)
                                  return (ENOENT);
                  }
                  VN_HOLD(startdvp);
                  error = lookuppnvp(&pn, NULL, NO_FOLLOW, dirvpp, compvpp,
!                     ZONE_ROOTVP(), startdvp, cr);
          }
          if (error == ENAMETOOLONG) {
                  /*
                   * This thread used a pathname > TYPICALMAXPATHLEN bytes long.
                   */
*** 2931,2941 ****
                                  return (ENOENT);
                          }
                  }
                  VN_HOLD(startdvp);
                  error = lookuppnvp(&pn, NULL, NO_FOLLOW, dirvpp, compvpp,
!                     rootdir, startdvp, cr);
                  pn_free(&pn);
          }
  
          return (error);
  }
--- 3038,3048 ----
                                  return (ENOENT);
                          }
                  }
                  VN_HOLD(startdvp);
                  error = lookuppnvp(&pn, NULL, NO_FOLLOW, dirvpp, compvpp,
!                     ZONE_ROOTVP(), startdvp, cr);
                  pn_free(&pn);
          }
  
          return (error);
  }
*** 3035,3206 ****
          }
  
          return (error);
  }
  
- /*
-  * Do the main work of handling HA-NFSv4 Resource Group failover on
-  * Sun Cluster.
-  * We need to detect whether any RG admin paths have been added or removed,
-  * and adjust resources accordingly.
-  * Currently we're using a very inefficient algorithm, ~ 2 * O(n**2). In
-  * order to scale, the list and array of paths need to be held in more
-  * suitable data structures.
-  */
- static void
- hanfsv4_failover(void)
- {
-         int i, start_grace, numadded_paths = 0;
-         char **added_paths = NULL;
-         rfs4_dss_path_t *dss_path;
- 
-         /*
-          * Note: currently, rfs4_dss_pathlist cannot be NULL, since
-          * it will always include an entry for NFS4_DSS_VAR_DIR. If we
-          * make the latter dynamically specified too, the following will
-          * need to be adjusted.
-          */
- 
-         /*
-          * First, look for removed paths: RGs that have been failed-over
-          * away from this node.
-          * Walk the "currently-serving" rfs4_dss_pathlist and, for each
-          * path, check if it is on the "passed-in" rfs4_dss_newpaths array
-          * from nfsd. If not, that RG path has been removed.
-          *
-          * Note that nfsd has sorted rfs4_dss_newpaths for us, and removed
-          * any duplicates.
-          */
-         dss_path = rfs4_dss_pathlist;
-         do {
-                 int found = 0;
-                 char *path = dss_path->path;
- 
-                 /* used only for non-HA so may not be removed */
-                 if (strcmp(path, NFS4_DSS_VAR_DIR) == 0) {
-                         dss_path = dss_path->next;
-                         continue;
-                 }
- 
-                 for (i = 0; i < rfs4_dss_numnewpaths; i++) {
-                         int cmpret;
-                         char *newpath = rfs4_dss_newpaths[i];
- 
-                         /*
-                          * Since nfsd has sorted rfs4_dss_newpaths for us,
-                          * once the return from strcmp is negative we know
-                          * we've passed the point where "path" should be,
-                          * and can stop searching: "path" has been removed.
-                          */
-                         cmpret = strcmp(path, newpath);
-                         if (cmpret < 0)
-                                 break;
-                         if (cmpret == 0) {
-                                 found = 1;
-                                 break;
-                         }
-                 }
- 
-                 if (found == 0) {
-                         unsigned index = dss_path->index;
-                         rfs4_servinst_t *sip = dss_path->sip;
-                         rfs4_dss_path_t *path_next = dss_path->next;
- 
-                         /*
-                          * This path has been removed.
-                          * We must clear out the servinst reference to
-                          * it, since it's now owned by another
-                          * node: we should not attempt to touch it.
-                          */
-                         ASSERT(dss_path == sip->dss_paths[index]);
-                         sip->dss_paths[index] = NULL;
- 
-                         /* remove from "currently-serving" list, and destroy */
-                         remque(dss_path);
-                         /* allow for NUL */
-                         kmem_free(dss_path->path, strlen(dss_path->path) + 1);
-                         kmem_free(dss_path, sizeof (rfs4_dss_path_t));
- 
-                         dss_path = path_next;
-                 } else {
-                         /* path was found; not removed */
-                         dss_path = dss_path->next;
-                 }
-         } while (dss_path != rfs4_dss_pathlist);
- 
-         /*
-          * Now, look for added paths: RGs that have been failed-over
-          * to this node.
-          * Walk the "passed-in" rfs4_dss_newpaths array from nfsd and,
-          * for each path, check if it is on the "currently-serving"
-          * rfs4_dss_pathlist. If not, that RG path has been added.
-          *
-          * Note: we don't do duplicate detection here; nfsd does that for us.
-          *
-          * Note: numadded_paths <= rfs4_dss_numnewpaths, which gives us
-          * an upper bound for the size needed for added_paths[numadded_paths].
-          */
- 
-         /* probably more space than we need, but guaranteed to be enough */
-         if (rfs4_dss_numnewpaths > 0) {
-                 size_t sz = rfs4_dss_numnewpaths * sizeof (char *);
-                 added_paths = kmem_zalloc(sz, KM_SLEEP);
-         }
- 
-         /* walk the "passed-in" rfs4_dss_newpaths array from nfsd */
-         for (i = 0; i < rfs4_dss_numnewpaths; i++) {
-                 int found = 0;
-                 char *newpath = rfs4_dss_newpaths[i];
- 
-                 dss_path = rfs4_dss_pathlist;
-                 do {
-                         char *path = dss_path->path;
- 
-                         /* used only for non-HA */
-                         if (strcmp(path, NFS4_DSS_VAR_DIR) == 0) {
-                                 dss_path = dss_path->next;
-                                 continue;
-                         }
- 
-                         if (strncmp(path, newpath, strlen(path)) == 0) {
-                                 found = 1;
-                                 break;
-                         }
- 
-                         dss_path = dss_path->next;
-                 } while (dss_path != rfs4_dss_pathlist);
- 
-                 if (found == 0) {
-                         added_paths[numadded_paths] = newpath;
-                         numadded_paths++;
-                 }
-         }
- 
-         /* did we find any added paths? */
-         if (numadded_paths > 0) {
-                 /* create a new server instance, and start its grace period */
-                 start_grace = 1;
-                 rfs4_servinst_create(start_grace, numadded_paths, added_paths);
- 
-                 /* read in the stable storage state from these paths */
-                 rfs4_dss_readstate(numadded_paths, added_paths);
- 
-                 /*
-                  * Multiple failovers during a grace period will cause
-                  * clients of the same resource group to be partitioned
-                  * into different server instances, with different
-                  * grace periods.  Since clients of the same resource
-                  * group must be subject to the same grace period,
-                  * we need to reset all currently active grace periods.
-                  */
-                 rfs4_grace_reset_all();
-         }
- 
-         if (rfs4_dss_numnewpaths > 0)
-                 kmem_free(added_paths, rfs4_dss_numnewpaths * sizeof (char *));
- }
- 
  /*
   * Used by NFSv3 and NFSv4 server to query label of
   * a pathname component during lookup/access ops.
   */
  ts_label_t *
--- 3142,3151 ----