Print this page
11083 support NFS server in zone
Portions contributed by: Dan Kruchinin <dan.kruchinin@nexenta.com>
Portions contributed by: Stepan Zastupov <stepan.zastupov@gmail.com>
Portions contributed by: Joyce McIntosh <joyce.mcintosh@nexenta.com>
Portions contributed by: Mike Zeller <mike@mikezeller.net>
Portions contributed by: Dan McDonald <danmcd@joyent.com>
Portions contributed by: Gordon Ross <gordon.w.ross@gmail.com>
Portions contributed by: Vitaliy Gusev <gusev.vitaliy@gmail.com>
Reviewed by: Rick McNeal <rick.mcneal@nexenta.com>
Reviewed by: Rob Gittins <rob.gittins@nexenta.com>
Reviewed by: Sanjay Nadkarni <sanjay.nadkarni@nexenta.com>
Reviewed by: Jason King <jbk@joyent.com>
Reviewed by: C Fraire <cfraire@me.com>
Change-Id: I22f289d357503f9b48a0bc2482cc4328a6d43d16

Split Close
Expand all
Collapse all
          --- old/usr/src/uts/common/fs/nfs/nfs4_state.c
          +++ new/usr/src/uts/common/fs/nfs/nfs4_state.c
↓ open down ↓ 10 lines elided ↑ open up ↑
  11   11   * and limitations under the License.
  12   12   *
  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
       21 +
  21   22  /*
  22   23   * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
  23      - * Copyright 2014 Nexenta Systems, Inc.  All rights reserved.
  24   24   */
  25   25  
       26 +/*
       27 + * Copyright 2018 Nexenta Systems, Inc.
       28 + * Copyright 2019 Nexenta by DDN, Inc.
       29 + */
       30 +
  26   31  #include <sys/systm.h>
  27   32  #include <sys/kmem.h>
  28   33  #include <sys/cmn_err.h>
  29   34  #include <sys/atomic.h>
  30   35  #include <sys/clconf.h>
  31   36  #include <sys/cladm.h>
  32   37  #include <sys/flock.h>
  33   38  #include <nfs/export.h>
  34   39  #include <nfs/nfs.h>
  35   40  #include <nfs/nfs4.h>
  36   41  #include <nfs/nfssys.h>
  37   42  #include <nfs/lm.h>
  38   43  #include <sys/pathname.h>
  39   44  #include <sys/sdt.h>
  40   45  #include <sys/nvpair.h>
  41   46  
  42   47  extern u_longlong_t nfs4_srv_caller_id;
  43   48  
  44      -extern time_t rfs4_start_time;
  45   49  extern uint_t nfs4_srv_vkey;
  46   50  
  47   51  stateid4 special0 = {
  48   52          0,
  49   53          { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }
  50   54  };
  51   55  
  52   56  stateid4 special1 = {
  53   57          0xffffffff,
  54   58          {
↓ open down ↓ 10 lines elided ↑ open up ↑
  65   69  /* For embedding the cluster nodeid into our clientid */
  66   70  #define CLUSTER_NODEID_SHIFT    24
  67   71  #define CLUSTER_MAX_NODEID      255
  68   72  
  69   73  #ifdef DEBUG
  70   74  int rfs4_debug;
  71   75  #endif
  72   76  
  73   77  static uint32_t rfs4_database_debug = 0x00;
  74   78  
  75      -static void rfs4_ss_clid_write(rfs4_client_t *cp, char *leaf);
       79 +/* CSTYLED */
       80 +static void rfs4_ss_clid_write(nfs4_srv_t *nsrv4, rfs4_client_t *cp, char *leaf);
  76   81  static void rfs4_ss_clid_write_one(rfs4_client_t *cp, char *dir, char *leaf);
  77   82  static void rfs4_dss_clear_oldstate(rfs4_servinst_t *sip);
  78   83  static void rfs4_ss_chkclid_sip(rfs4_client_t *cp, rfs4_servinst_t *sip);
  79   84  
  80   85  /*
  81   86   * Couple of simple init/destroy functions for a general waiter
  82   87   */
  83   88  void
  84   89  rfs4_sw_init(rfs4_state_wait_t *swp)
  85   90  {
↓ open down ↓ 28 lines elided ↑ open up ↑
 114  119  rfs4_sw_exit(rfs4_state_wait_t *swp)
 115  120  {
 116  121          mutex_enter(swp->sw_cv_lock);
 117  122          ASSERT(swp->sw_active == TRUE);
 118  123          swp->sw_active = FALSE;
 119  124          if (swp->sw_wait_count != 0)
 120  125                  cv_broadcast(swp->sw_cv);
 121  126          mutex_exit(swp->sw_cv_lock);
 122  127  }
 123  128  
 124      -/*
 125      - * CPR callback id -- not related to v4 callbacks
 126      - */
 127      -static callb_id_t cpr_id = 0;
 128      -
 129  129  static void
 130  130  deep_lock_copy(LOCK4res *dres, LOCK4res *sres)
 131  131  {
 132  132          lock_owner4 *slo = &sres->LOCK4res_u.denied.owner;
 133  133          lock_owner4 *dlo = &dres->LOCK4res_u.denied.owner;
 134  134  
 135  135          if (sres->status == NFS4ERR_DENIED) {
 136  136                  dlo->owner_val = kmem_alloc(slo->owner_len, KM_SLEEP);
 137  137                  bcopy(slo->owner_val, dlo->owner_val, slo->owner_len);
 138  138          }
 139  139  }
 140  140  
      141 +/*
      142 + * CPR callback id -- not related to v4 callbacks
      143 + */
      144 +static callb_id_t cpr_id = 0;
      145 +
 141  146  static void
 142  147  deep_lock_free(LOCK4res *res)
 143  148  {
 144  149          lock_owner4 *lo = &res->LOCK4res_u.denied.owner;
 145  150  
 146  151          if (res->status == NFS4ERR_DENIED)
 147  152                  kmem_free(lo->owner_val, lo->owner_len);
 148  153  }
 149  154  
 150  155  static void
↓ open down ↓ 115 lines elided ↑ open up ↑
 266  271   */
 267  272  
 268  273  #ifdef DEBUG
 269  274  #define TABSIZE 17
 270  275  #else
 271  276  #define TABSIZE 2047
 272  277  #endif
 273  278  
 274  279  #define ADDRHASH(key) ((unsigned long)(key) >> 3)
 275  280  
 276      -/* Used to serialize create/destroy of rfs4_server_state database */
 277      -kmutex_t        rfs4_state_lock;
 278      -static rfs4_database_t *rfs4_server_state = NULL;
 279      -
 280      -/* Used to serialize lookups of clientids */
 281      -static  krwlock_t       rfs4_findclient_lock;
 282      -
 283      -/*
 284      - * For now this "table" is exposed so that the CPR callback
 285      - * function can tromp through it..
 286      - */
 287      -rfs4_table_t *rfs4_client_tab;
 288      -
 289      -static rfs4_index_t *rfs4_clientid_idx;
 290      -static rfs4_index_t *rfs4_nfsclnt_idx;
 291      -static rfs4_table_t *rfs4_clntip_tab;
 292      -static rfs4_index_t *rfs4_clntip_idx;
 293      -static rfs4_table_t *rfs4_openowner_tab;
 294      -static rfs4_index_t *rfs4_openowner_idx;
 295      -static rfs4_table_t *rfs4_state_tab;
 296      -static rfs4_index_t *rfs4_state_idx;
 297      -static rfs4_index_t *rfs4_state_owner_file_idx;
 298      -static rfs4_index_t *rfs4_state_file_idx;
 299      -static rfs4_table_t *rfs4_lo_state_tab;
 300      -static rfs4_index_t *rfs4_lo_state_idx;
 301      -static rfs4_index_t *rfs4_lo_state_owner_idx;
 302      -static rfs4_table_t *rfs4_lockowner_tab;
 303      -static rfs4_index_t *rfs4_lockowner_idx;
 304      -static rfs4_index_t *rfs4_lockowner_pid_idx;
 305      -static rfs4_table_t *rfs4_file_tab;
 306      -static rfs4_index_t *rfs4_file_idx;
 307      -static rfs4_table_t *rfs4_deleg_state_tab;
 308      -static rfs4_index_t *rfs4_deleg_idx;
 309      -static rfs4_index_t *rfs4_deleg_state_idx;
 310      -
 311  281  #define MAXTABSZ 1024*1024
 312  282  
 313  283  /* The values below are rfs4_lease_time units */
 314  284  
 315  285  #ifdef DEBUG
 316  286  #define CLIENT_CACHE_TIME 1
 317  287  #define OPENOWNER_CACHE_TIME 1
 318  288  #define STATE_CACHE_TIME 1
 319  289  #define LO_STATE_CACHE_TIME 1
 320  290  #define LOCKOWNER_CACHE_TIME 1
↓ open down ↓ 2 lines elided ↑ open up ↑
 323  293  #else
 324  294  #define CLIENT_CACHE_TIME 10
 325  295  #define OPENOWNER_CACHE_TIME 5
 326  296  #define STATE_CACHE_TIME 1
 327  297  #define LO_STATE_CACHE_TIME 1
 328  298  #define LOCKOWNER_CACHE_TIME 3
 329  299  #define FILE_CACHE_TIME 40
 330  300  #define DELEG_STATE_CACHE_TIME 1
 331  301  #endif
 332  302  
      303 +/*
      304 + * NFSv4 server state databases
      305 + *
      306 + * Initilized when the module is loaded and used by NFSv4 state tables.
      307 + * These kmem_cache databases are global, the tables that make use of these
      308 + * are per zone.
      309 + */
      310 +kmem_cache_t *rfs4_client_mem_cache;
      311 +kmem_cache_t *rfs4_clntIP_mem_cache;
      312 +kmem_cache_t *rfs4_openown_mem_cache;
      313 +kmem_cache_t *rfs4_openstID_mem_cache;
      314 +kmem_cache_t *rfs4_lockstID_mem_cache;
      315 +kmem_cache_t *rfs4_lockown_mem_cache;
      316 +kmem_cache_t *rfs4_file_mem_cache;
      317 +kmem_cache_t *rfs4_delegstID_mem_cache;
 333  318  
 334      -static time_t rfs4_client_cache_time = 0;
 335      -static time_t rfs4_clntip_cache_time = 0;
 336      -static time_t rfs4_openowner_cache_time = 0;
 337      -static time_t rfs4_state_cache_time = 0;
 338      -static time_t rfs4_lo_state_cache_time = 0;
 339      -static time_t rfs4_lockowner_cache_time = 0;
 340      -static time_t rfs4_file_cache_time = 0;
 341      -static time_t rfs4_deleg_state_cache_time = 0;
 342      -
      319 +/*
      320 + * NFSv4 state table functions
      321 + */
 343  322  static bool_t rfs4_client_create(rfs4_entry_t, void *);
 344  323  static void rfs4_dss_remove_cpleaf(rfs4_client_t *);
 345  324  static void rfs4_dss_remove_leaf(rfs4_servinst_t *, char *, char *);
 346  325  static void rfs4_client_destroy(rfs4_entry_t);
 347  326  static bool_t rfs4_client_expiry(rfs4_entry_t);
 348  327  static uint32_t clientid_hash(void *);
 349  328  static bool_t clientid_compare(rfs4_entry_t, void *);
 350  329  static void *clientid_mkkey(rfs4_entry_t);
 351  330  static uint32_t nfsclnt_hash(void *);
 352  331  static bool_t nfsclnt_compare(rfs4_entry_t, void *);
↓ open down ↓ 64 lines elided ↑ open up ↑
 417  396  void
 418  397  rfs4_ss_pnfree(rfs4_ss_pn_t *ss_pn)
 419  398  {
 420  399          kmem_free(ss_pn, sizeof (rfs4_ss_pn_t));
 421  400  }
 422  401  
 423  402  static rfs4_ss_pn_t *
 424  403  rfs4_ss_pnalloc(char *dir, char *leaf)
 425  404  {
 426  405          rfs4_ss_pn_t *ss_pn;
 427      -        int     dir_len, leaf_len;
      406 +        int dir_len, leaf_len;
 428  407  
 429  408          /*
 430  409           * validate we have a resonable path
 431  410           * (account for the '/' and trailing null)
 432  411           */
 433  412          if ((dir_len = strlen(dir)) > MAXPATHLEN ||
 434  413              (leaf_len = strlen(leaf)) > MAXNAMELEN ||
 435  414              (dir_len + leaf_len + 2) > MAXPATHLEN) {
 436  415                  return (NULL);
 437  416          }
↓ open down ↓ 260 lines elided ↑ open up ↑
 698  677          }
 699  678  
 700  679  out:
 701  680          (void) VOP_CLOSE(dvp, FREAD, 1, (offset_t)0, CRED(), NULL);
 702  681          VN_RELE(dvp);
 703  682          if (dirt)
 704  683                  kmem_free((caddr_t)dirt, RFS4_SS_DIRSIZE);
 705  684  }
 706  685  
 707  686  static void
 708      -rfs4_ss_init(void)
      687 +rfs4_ss_init(nfs4_srv_t *nsrv4)
 709  688  {
 710  689          int npaths = 1;
 711  690          char *default_dss_path = NFS4_DSS_VAR_DIR;
 712  691  
 713  692          /* read the default stable storage state */
 714      -        rfs4_dss_readstate(npaths, &default_dss_path);
      693 +        rfs4_dss_readstate(nsrv4, npaths, &default_dss_path);
 715  694  
 716  695          rfs4_ss_enabled = 1;
 717  696  }
 718  697  
 719  698  static void
 720      -rfs4_ss_fini(void)
      699 +rfs4_ss_fini(nfs4_srv_t *nsrv4)
 721  700  {
 722  701          rfs4_servinst_t *sip;
 723  702  
 724      -        mutex_enter(&rfs4_servinst_lock);
 725      -        sip = rfs4_cur_servinst;
      703 +        mutex_enter(&nsrv4->servinst_lock);
      704 +        sip = nsrv4->nfs4_cur_servinst;
 726  705          while (sip != NULL) {
 727  706                  rfs4_dss_clear_oldstate(sip);
 728  707                  sip = sip->next;
 729  708          }
 730      -        mutex_exit(&rfs4_servinst_lock);
      709 +        mutex_exit(&nsrv4->servinst_lock);
 731  710  }
 732  711  
 733  712  /*
 734  713   * Remove all oldstate files referenced by this servinst.
 735  714   */
 736  715  static void
 737  716  rfs4_dss_clear_oldstate(rfs4_servinst_t *sip)
 738  717  {
 739  718          rfs4_oldstate_t *os_head, *osp;
 740  719  
↓ open down ↓ 23 lines elided ↑ open up ↑
 764  743                  osp = os_next;
 765  744          }
 766  745  
 767  746          rw_exit(&sip->oldstate_lock);
 768  747  }
 769  748  
 770  749  /*
 771  750   * Form the state and oldstate paths, and read in the stable storage files.
 772  751   */
 773  752  void
 774      -rfs4_dss_readstate(int npaths, char **paths)
      753 +rfs4_dss_readstate(nfs4_srv_t *nsrv4, int npaths, char **paths)
 775  754  {
 776  755          int i;
 777  756          char *state, *oldstate;
 778  757  
 779  758          state = kmem_alloc(MAXPATHLEN, KM_SLEEP);
 780  759          oldstate = kmem_alloc(MAXPATHLEN, KM_SLEEP);
 781  760  
 782  761          for (i = 0; i < npaths; i++) {
 783  762                  char *path = paths[i];
 784  763  
↓ open down ↓ 3 lines elided ↑ open up ↑
 788  767                  /*
 789  768                   * Populate the current server instance's oldstate list.
 790  769                   *
 791  770                   * 1. Read stable storage data from old state directory,
 792  771                   *    leaving its contents alone.
 793  772                   *
 794  773                   * 2. Read stable storage data from state directory,
 795  774                   *    and move the latter's contents to old state
 796  775                   *    directory.
 797  776                   */
 798      -                rfs4_ss_oldstate(rfs4_cur_servinst->oldstate, oldstate, NULL);
 799      -                rfs4_ss_oldstate(rfs4_cur_servinst->oldstate, state, oldstate);
      777 +                /* CSTYLED */
      778 +                rfs4_ss_oldstate(nsrv4->nfs4_cur_servinst->oldstate, oldstate, NULL);
      779 +                /* CSTYLED */
      780 +                rfs4_ss_oldstate(nsrv4->nfs4_cur_servinst->oldstate, state, oldstate);
 800  781          }
 801  782  
 802  783          kmem_free(state, MAXPATHLEN);
 803  784          kmem_free(oldstate, MAXPATHLEN);
 804  785  }
 805  786  
 806  787  
 807  788  /*
 808  789   * Check if we are still in grace and if the client can be
 809  790   * granted permission to perform reclaims.
 810  791   */
 811  792  void
 812      -rfs4_ss_chkclid(rfs4_client_t *cp)
      793 +rfs4_ss_chkclid(nfs4_srv_t *nsrv4, rfs4_client_t *cp)
 813  794  {
 814  795          rfs4_servinst_t *sip;
 815  796  
 816  797          /*
 817  798           * It should be sufficient to check the oldstate data for just
 818  799           * this client's instance. However, since our per-instance
 819  800           * client grouping is solely temporal, HA-NFSv4 RG failover
 820  801           * might result in clients of the same RG being partitioned into
 821  802           * separate instances.
 822  803           *
 823  804           * Until the client grouping is improved, we must check the
 824  805           * oldstate data for all instances with an active grace period.
 825  806           *
 826  807           * This also serves as the mechanism to remove stale oldstate data.
 827  808           * The first time we check an instance after its grace period has
 828  809           * expired, the oldstate data should be cleared.
 829  810           *
 830  811           * Start at the current instance, and walk the list backwards
 831  812           * to the first.
 832  813           */
 833      -        mutex_enter(&rfs4_servinst_lock);
 834      -        for (sip = rfs4_cur_servinst; sip != NULL; sip = sip->prev) {
      814 +        mutex_enter(&nsrv4->servinst_lock);
      815 +        for (sip = nsrv4->nfs4_cur_servinst; sip != NULL; sip = sip->prev) {
 835  816                  rfs4_ss_chkclid_sip(cp, sip);
 836  817  
 837  818                  /* if the above check found this client, we're done */
 838  819                  if (cp->rc_can_reclaim)
 839  820                          break;
 840  821          }
 841      -        mutex_exit(&rfs4_servinst_lock);
      822 +        mutex_exit(&nsrv4->servinst_lock);
 842  823  }
 843  824  
 844  825  static void
 845  826  rfs4_ss_chkclid_sip(rfs4_client_t *cp, rfs4_servinst_t *sip)
 846  827  {
 847  828          rfs4_oldstate_t *osp, *os_head;
 848  829  
 849  830          /* short circuit everything if this server instance has no oldstate */
 850  831          rw_enter(&sip->oldstate_lock, RW_READER);
 851  832          os_head = sip->oldstate;
↓ open down ↓ 29 lines elided ↑ open up ↑
 881  862  
 882  863          rw_exit(&sip->oldstate_lock);
 883  864  }
 884  865  
 885  866  /*
 886  867   * Place client information into stable storage: 1/3.
 887  868   * First, generate the leaf filename, from the client's IP address and
 888  869   * the server-generated short-hand clientid.
 889  870   */
 890  871  void
 891      -rfs4_ss_clid(rfs4_client_t *cp)
      872 +rfs4_ss_clid(nfs4_srv_t *nsrv4, rfs4_client_t *cp)
 892  873  {
 893  874          const char *kinet_ntop6(uchar_t *, char *, size_t);
 894  875          char leaf[MAXNAMELEN], buf[INET6_ADDRSTRLEN];
 895  876          struct sockaddr *ca;
 896  877          uchar_t *b;
 897  878  
 898  879          if (rfs4_ss_enabled == 0) {
 899  880                  return;
 900  881          }
 901  882  
↓ open down ↓ 11 lines elided ↑ open up ↑
 913  894          } else if (ca->sa_family == AF_INET6) {
 914  895                  struct sockaddr_in6 *sin6;
 915  896  
 916  897                  sin6 = (struct sockaddr_in6 *)ca;
 917  898                  (void) kinet_ntop6((uchar_t *)&sin6->sin6_addr,
 918  899                      buf, INET6_ADDRSTRLEN);
 919  900          }
 920  901  
 921  902          (void) snprintf(leaf, MAXNAMELEN, "%s-%llx", buf,
 922  903              (longlong_t)cp->rc_clientid);
 923      -        rfs4_ss_clid_write(cp, leaf);
      904 +        rfs4_ss_clid_write(nsrv4, cp, leaf);
 924  905  }
 925  906  
 926  907  /*
 927  908   * Place client information into stable storage: 2/3.
 928  909   * DSS: distributed stable storage: the file may need to be written to
 929  910   * multiple directories.
 930  911   */
 931  912  static void
 932      -rfs4_ss_clid_write(rfs4_client_t *cp, char *leaf)
      913 +rfs4_ss_clid_write(nfs4_srv_t *nsrv4, rfs4_client_t *cp, char *leaf)
 933  914  {
 934  915          rfs4_servinst_t *sip;
 935  916  
 936  917          /*
 937  918           * It should be sufficient to write the leaf file to (all) DSS paths
 938  919           * associated with just this client's instance. However, since our
 939  920           * per-instance client grouping is solely temporal, HA-NFSv4 RG
 940  921           * failover might result in us losing DSS data.
 941  922           *
 942  923           * Until the client grouping is improved, we must write the DSS data
 943  924           * to all instances' paths. Start at the current instance, and
 944  925           * walk the list backwards to the first.
 945  926           */
 946      -        mutex_enter(&rfs4_servinst_lock);
 947      -        for (sip = rfs4_cur_servinst; sip != NULL; sip = sip->prev) {
      927 +        mutex_enter(&nsrv4->servinst_lock);
      928 +        for (sip = nsrv4->nfs4_cur_servinst; sip != NULL; sip = sip->prev) {
 948  929                  int i, npaths = sip->dss_npaths;
 949  930  
 950  931                  /* write the leaf file to all DSS paths */
 951  932                  for (i = 0; i < npaths; i++) {
 952  933                          rfs4_dss_path_t *dss_path = sip->dss_paths[i];
 953  934  
 954  935                          /* HA-NFSv4 path might have been failed-away from us */
 955  936                          if (dss_path == NULL)
 956  937                                  continue;
 957  938  
 958  939                          rfs4_ss_clid_write_one(cp, dss_path->path, leaf);
 959  940                  }
 960  941          }
 961      -        mutex_exit(&rfs4_servinst_lock);
      942 +        mutex_exit(&nsrv4->servinst_lock);
 962  943  }
 963  944  
 964  945  /*
 965  946   * Place client information into stable storage: 3/3.
 966  947   * Write the stable storage data to the requested file.
 967  948   */
 968  949  static void
 969  950  rfs4_ss_clid_write_one(rfs4_client_t *cp, char *dss_path, char *leaf)
 970  951  {
 971  952          int ioflag;
↓ open down ↓ 172 lines elided ↑ open up ↑
1144 1125          }
1145 1126  }
1146 1127  
1147 1128  /*
1148 1129   * This is called from nfssys() in order to clear server state
1149 1130   * for the specified client IP Address.
1150 1131   */
1151 1132  void
1152 1133  rfs4_clear_client_state(struct nfs4clrst_args *clr)
1153 1134  {
1154      -        (void) rfs4_dbe_walk(rfs4_client_tab, rfs4_client_scrub, clr);
     1135 +        nfs4_srv_t *nsrv4;
     1136 +        nsrv4 = nfs4_get_srv();
     1137 +        (void) rfs4_dbe_walk(nsrv4->rfs4_client_tab, rfs4_client_scrub, clr);
1155 1138  }
1156 1139  
1157 1140  /*
1158 1141   * Used to initialize the NFSv4 server's state or database.  All of
1159      - * the tables are created and timers are set. Only called when NFSv4
1160      - * service is provided.
     1142 + * the tables are created and timers are set.
1161 1143   */
1162 1144  void
1163      -rfs4_state_init()
     1145 +rfs4_state_g_init()
1164 1146  {
1165      -        int start_grace;
1166 1147          extern boolean_t rfs4_cpr_callb(void *, int);
1167      -        char *dss_path = NFS4_DSS_VAR_DIR;
1168      -        time_t start_time;
     1148 +        /*
     1149 +         * Add a CPR callback so that we can update client
     1150 +         * access times to extend the lease after a suspend
     1151 +         * and resume (using the same class as rpcmod/connmgr)
     1152 +         */
     1153 +        cpr_id = callb_add(rfs4_cpr_callb, 0, CB_CL_CPR_RPC, "rfs4");
1169 1154  
1170      -        mutex_enter(&rfs4_state_lock);
     1155 +        /*
     1156 +         * NFSv4 server state databases
     1157 +         *
     1158 +         * Initialized when the module is loaded and used by NFSv4 state
     1159 +         * tables.  These kmem_cache free pools are used globally, the NFSv4
     1160 +         * state tables which make use of these kmem_cache free pools are per
     1161 +         * zone.
     1162 +         *
     1163 +         * initialize the global kmem_cache free pools which will be used by
     1164 +         * the NFSv4 state tables.
     1165 +         */
     1166 +        /* CSTYLED */
     1167 +        rfs4_client_mem_cache = nfs4_init_mem_cache("Client_entry_cache", 2, sizeof (rfs4_client_t), 0);
     1168 +        /* CSTYLED */
     1169 +        rfs4_clntIP_mem_cache = nfs4_init_mem_cache("ClntIP_entry_cache", 1, sizeof (rfs4_clntip_t), 1);
     1170 +        /* CSTYLED */
     1171 +        rfs4_openown_mem_cache = nfs4_init_mem_cache("OpenOwner_entry_cache", 1, sizeof (rfs4_openowner_t), 2);
     1172 +        /* CSTYLED */
     1173 +        rfs4_openstID_mem_cache = nfs4_init_mem_cache("OpenStateID_entry_cache", 3, sizeof (rfs4_state_t), 3);
     1174 +        /* CSTYLED */
     1175 +        rfs4_lockstID_mem_cache = nfs4_init_mem_cache("LockStateID_entry_cache", 3, sizeof (rfs4_lo_state_t), 4);
     1176 +        /* CSTYLED */
     1177 +        rfs4_lockown_mem_cache = nfs4_init_mem_cache("Lockowner_entry_cache", 2, sizeof (rfs4_lockowner_t), 5);
     1178 +        /* CSTYLED */
     1179 +        rfs4_file_mem_cache = nfs4_init_mem_cache("File_entry_cache", 1, sizeof (rfs4_file_t), 6);
     1180 +        /* CSTYLED */
     1181 +        rfs4_delegstID_mem_cache = nfs4_init_mem_cache("DelegStateID_entry_cache", 2, sizeof (rfs4_deleg_state_t), 7);
1171 1182  
     1183 +        rfs4_client_clrst = rfs4_clear_client_state;
     1184 +}
     1185 +
     1186 +
     1187 +/*
     1188 + * Used at server shutdown to cleanup all of the NFSv4 server's structures
     1189 + * and other state.
     1190 + */
     1191 +void
     1192 +rfs4_state_g_fini()
     1193 +{
     1194 +        int i;
1172 1195          /*
1173      -         * If the server state database has already been initialized,
1174      -         * skip it
     1196 +         * Cleanup the CPR callback.
1175 1197           */
1176      -        if (rfs4_server_state != NULL) {
1177      -                mutex_exit(&rfs4_state_lock);
1178      -                return;
     1198 +        if (cpr_id)
     1199 +                (void) callb_delete(cpr_id);
     1200 +
     1201 +        rfs4_client_clrst = NULL;
     1202 +
     1203 +        /* free the NFSv4 state databases */
     1204 +        for (i = 0; i < RFS4_DB_MEM_CACHE_NUM; i++) {
     1205 +                kmem_cache_destroy(rfs4_db_mem_cache_table[i].r_db_mem_cache);
     1206 +                rfs4_db_mem_cache_table[i].r_db_mem_cache = NULL;
1179 1207          }
1180 1208  
1181      -        rw_init(&rfs4_findclient_lock, NULL, RW_DEFAULT, NULL);
     1209 +        rfs4_client_mem_cache = NULL;
     1210 +        rfs4_clntIP_mem_cache = NULL;
     1211 +        rfs4_openown_mem_cache = NULL;
     1212 +        rfs4_openstID_mem_cache = NULL;
     1213 +        rfs4_lockstID_mem_cache = NULL;
     1214 +        rfs4_lockown_mem_cache = NULL;
     1215 +        rfs4_file_mem_cache = NULL;
     1216 +        rfs4_delegstID_mem_cache = NULL;
1182 1217  
     1218 +        /* DSS: distributed stable storage */
     1219 +        nvlist_free(rfs4_dss_oldpaths);
     1220 +        nvlist_free(rfs4_dss_paths);
     1221 +        rfs4_dss_paths = rfs4_dss_oldpaths = NULL;
     1222 +}
     1223 +
     1224 +/*
     1225 + * Used to initialize the per zone NFSv4 server's state
     1226 + */
     1227 +void
     1228 +rfs4_state_zone_init(nfs4_srv_t *nsrv4)
     1229 +{
     1230 +        time_t start_time;
     1231 +        int start_grace;
     1232 +        char *dss_path = NFS4_DSS_VAR_DIR;
     1233 +
     1234 +        /* DSS: distributed stable storage: initialise served paths list */
     1235 +        nsrv4->dss_pathlist = NULL;
     1236 +
1183 1237          /*
1184 1238           * Set the boot time.  If the server
1185 1239           * has been restarted quickly and has had the opportunity to
1186 1240           * service clients, then the start_time needs to be bumped
1187 1241           * regardless.  A small window but it exists...
1188 1242           */
1189 1243          start_time = gethrestime_sec();
1190      -        if (rfs4_start_time < start_time)
1191      -                rfs4_start_time = start_time;
     1244 +        if (nsrv4->rfs4_start_time < start_time)
     1245 +                nsrv4->rfs4_start_time = start_time;
1192 1246          else
1193      -                rfs4_start_time++;
     1247 +                nsrv4->rfs4_start_time++;
1194 1248  
1195      -        /* DSS: distributed stable storage: initialise served paths list */
1196      -        rfs4_dss_pathlist = NULL;
1197      -
1198 1249          /*
1199 1250           * Create the first server instance, or a new one if the server has
1200 1251           * been restarted; see above comments on rfs4_start_time. Don't
1201 1252           * start its grace period; that will be done later, to maximise the
1202 1253           * clients' recovery window.
1203 1254           */
1204 1255          start_grace = 0;
1205      -        rfs4_servinst_create(start_grace, 1, &dss_path);
     1256 +        if (curzone == global_zone && rfs4_dss_numnewpaths > 0) {
     1257 +                int i;
     1258 +                char **dss_allpaths = NULL;
     1259 +                dss_allpaths = kmem_alloc(sizeof (char *) *
     1260 +                    (rfs4_dss_numnewpaths + 1), KM_SLEEP);
     1261 +                /*
     1262 +                 * Add the default path into the list of paths for saving
     1263 +                 * state informantion.
     1264 +                 */
     1265 +                dss_allpaths[0] = dss_path;
     1266 +                for (i = 0; i < rfs4_dss_numnewpaths; i++) {
     1267 +                        dss_allpaths[i + 1] = rfs4_dss_newpaths[i];
     1268 +                }
     1269 +                rfs4_servinst_create(nsrv4, start_grace,
     1270 +                    (rfs4_dss_numnewpaths + 1), dss_allpaths);
     1271 +                kmem_free(dss_allpaths,
     1272 +                    (sizeof (char *) * (rfs4_dss_numnewpaths + 1)));
     1273 +        } else {
     1274 +                rfs4_servinst_create(nsrv4, start_grace, 1, &dss_path);
     1275 +        }
1206 1276  
1207 1277          /* reset the "first NFSv4 request" status */
1208      -        rfs4_seen_first_compound = 0;
     1278 +        nsrv4->seen_first_compound = 0;
1209 1279  
     1280 +        mutex_enter(&nsrv4->state_lock);
     1281 +
1210 1282          /*
1211      -         * Add a CPR callback so that we can update client
1212      -         * access times to extend the lease after a suspend
1213      -         * and resume (using the same class as rpcmod/connmgr)
     1283 +         * If the server state database has already been initialized,
     1284 +         * skip it
1214 1285           */
1215      -        cpr_id = callb_add(rfs4_cpr_callb, 0, CB_CL_CPR_RPC, "rfs4");
     1286 +        if (nsrv4->nfs4_server_state != NULL) {
     1287 +                mutex_exit(&nsrv4->state_lock);
     1288 +                return;
     1289 +        }
1216 1290  
     1291 +        rw_init(&nsrv4->rfs4_findclient_lock, NULL, RW_DEFAULT, NULL);
     1292 +
1217 1293          /* set the various cache timers for table creation */
1218      -        if (rfs4_client_cache_time == 0)
1219      -                rfs4_client_cache_time = CLIENT_CACHE_TIME;
1220      -        if (rfs4_openowner_cache_time == 0)
1221      -                rfs4_openowner_cache_time = OPENOWNER_CACHE_TIME;
1222      -        if (rfs4_state_cache_time == 0)
1223      -                rfs4_state_cache_time = STATE_CACHE_TIME;
1224      -        if (rfs4_lo_state_cache_time == 0)
1225      -                rfs4_lo_state_cache_time = LO_STATE_CACHE_TIME;
1226      -        if (rfs4_lockowner_cache_time == 0)
1227      -                rfs4_lockowner_cache_time = LOCKOWNER_CACHE_TIME;
1228      -        if (rfs4_file_cache_time == 0)
1229      -                rfs4_file_cache_time = FILE_CACHE_TIME;
1230      -        if (rfs4_deleg_state_cache_time == 0)
1231      -                rfs4_deleg_state_cache_time = DELEG_STATE_CACHE_TIME;
     1294 +        if (nsrv4->rfs4_client_cache_time == 0)
     1295 +                nsrv4->rfs4_client_cache_time = CLIENT_CACHE_TIME;
     1296 +        if (nsrv4->rfs4_openowner_cache_time == 0)
     1297 +                nsrv4->rfs4_openowner_cache_time = OPENOWNER_CACHE_TIME;
     1298 +        if (nsrv4->rfs4_state_cache_time == 0)
     1299 +                nsrv4->rfs4_state_cache_time = STATE_CACHE_TIME;
     1300 +        if (nsrv4->rfs4_lo_state_cache_time == 0)
     1301 +                nsrv4->rfs4_lo_state_cache_time = LO_STATE_CACHE_TIME;
     1302 +        if (nsrv4->rfs4_lockowner_cache_time == 0)
     1303 +                nsrv4->rfs4_lockowner_cache_time = LOCKOWNER_CACHE_TIME;
     1304 +        if (nsrv4->rfs4_file_cache_time == 0)
     1305 +                nsrv4->rfs4_file_cache_time = FILE_CACHE_TIME;
     1306 +        if (nsrv4->rfs4_deleg_state_cache_time == 0)
     1307 +                nsrv4->rfs4_deleg_state_cache_time = DELEG_STATE_CACHE_TIME;
1232 1308  
1233 1309          /* Create the overall database to hold all server state */
1234      -        rfs4_server_state = rfs4_database_create(rfs4_database_debug);
     1310 +        nsrv4->nfs4_server_state = rfs4_database_create(rfs4_database_debug);
1235 1311  
1236 1312          /* Now create the individual tables */
1237      -        rfs4_client_cache_time *= rfs4_lease_time;
1238      -        rfs4_client_tab = rfs4_table_create(rfs4_server_state,
     1313 +        nsrv4->rfs4_client_cache_time *= rfs4_lease_time;
     1314 +        nsrv4->rfs4_client_tab = rfs4_table_create(nsrv4->nfs4_server_state,
1239 1315              "Client",
1240      -            rfs4_client_cache_time,
     1316 +            nsrv4->rfs4_client_cache_time,
1241 1317              2,
1242 1318              rfs4_client_create,
1243 1319              rfs4_client_destroy,
1244 1320              rfs4_client_expiry,
1245 1321              sizeof (rfs4_client_t),
1246 1322              TABSIZE,
1247 1323              MAXTABSZ/8, 100);
1248      -        rfs4_nfsclnt_idx = rfs4_index_create(rfs4_client_tab,
     1324 +        nsrv4->rfs4_nfsclnt_idx = rfs4_index_create(nsrv4->rfs4_client_tab,
1249 1325              "nfs_client_id4", nfsclnt_hash,
1250 1326              nfsclnt_compare, nfsclnt_mkkey,
1251 1327              TRUE);
1252      -        rfs4_clientid_idx = rfs4_index_create(rfs4_client_tab,
     1328 +        nsrv4->rfs4_clientid_idx = rfs4_index_create(nsrv4->rfs4_client_tab,
1253 1329              "client_id", clientid_hash,
1254 1330              clientid_compare, clientid_mkkey,
1255 1331              FALSE);
1256 1332  
1257      -        rfs4_clntip_cache_time = 86400 * 365;   /* about a year */
1258      -        rfs4_clntip_tab = rfs4_table_create(rfs4_server_state,
     1333 +        nsrv4->rfs4_clntip_cache_time = 86400 * 365;    /* about a year */
     1334 +        nsrv4->rfs4_clntip_tab = rfs4_table_create(nsrv4->nfs4_server_state,
1259 1335              "ClntIP",
1260      -            rfs4_clntip_cache_time,
     1336 +            nsrv4->rfs4_clntip_cache_time,
1261 1337              1,
1262 1338              rfs4_clntip_create,
1263 1339              rfs4_clntip_destroy,
1264 1340              rfs4_clntip_expiry,
1265 1341              sizeof (rfs4_clntip_t),
1266 1342              TABSIZE,
1267 1343              MAXTABSZ, 100);
1268      -        rfs4_clntip_idx = rfs4_index_create(rfs4_clntip_tab,
     1344 +        nsrv4->rfs4_clntip_idx = rfs4_index_create(nsrv4->rfs4_clntip_tab,
1269 1345              "client_ip", clntip_hash,
1270 1346              clntip_compare, clntip_mkkey,
1271 1347              TRUE);
1272 1348  
1273      -        rfs4_openowner_cache_time *= rfs4_lease_time;
1274      -        rfs4_openowner_tab = rfs4_table_create(rfs4_server_state,
     1349 +        nsrv4->rfs4_openowner_cache_time *= rfs4_lease_time;
     1350 +        nsrv4->rfs4_openowner_tab = rfs4_table_create(nsrv4->nfs4_server_state,
1275 1351              "OpenOwner",
1276      -            rfs4_openowner_cache_time,
     1352 +            nsrv4->rfs4_openowner_cache_time,
1277 1353              1,
1278 1354              rfs4_openowner_create,
1279 1355              rfs4_openowner_destroy,
1280 1356              rfs4_openowner_expiry,
1281 1357              sizeof (rfs4_openowner_t),
1282 1358              TABSIZE,
1283 1359              MAXTABSZ, 100);
1284      -        rfs4_openowner_idx = rfs4_index_create(rfs4_openowner_tab,
     1360 +        nsrv4->rfs4_openowner_idx = rfs4_index_create(nsrv4->rfs4_openowner_tab,
1285 1361              "open_owner4", openowner_hash,
1286 1362              openowner_compare,
1287 1363              openowner_mkkey, TRUE);
1288 1364  
1289      -        rfs4_state_cache_time *= rfs4_lease_time;
1290      -        rfs4_state_tab = rfs4_table_create(rfs4_server_state,
     1365 +        nsrv4->rfs4_state_cache_time *= rfs4_lease_time;
     1366 +        nsrv4->rfs4_state_tab = rfs4_table_create(nsrv4->nfs4_server_state,
1291 1367              "OpenStateID",
1292      -            rfs4_state_cache_time,
     1368 +            nsrv4->rfs4_state_cache_time,
1293 1369              3,
1294 1370              rfs4_state_create,
1295 1371              rfs4_state_destroy,
1296 1372              rfs4_state_expiry,
1297 1373              sizeof (rfs4_state_t),
1298 1374              TABSIZE,
1299 1375              MAXTABSZ, 100);
1300 1376  
1301      -        rfs4_state_owner_file_idx = rfs4_index_create(rfs4_state_tab,
     1377 +        /* CSTYLED */
     1378 +        nsrv4->rfs4_state_owner_file_idx = rfs4_index_create(nsrv4->rfs4_state_tab,
1302 1379              "Openowner-File",
1303 1380              state_owner_file_hash,
1304 1381              state_owner_file_compare,
1305 1382              state_owner_file_mkkey, TRUE);
1306 1383  
1307      -        rfs4_state_idx = rfs4_index_create(rfs4_state_tab,
     1384 +        nsrv4->rfs4_state_idx = rfs4_index_create(nsrv4->rfs4_state_tab,
1308 1385              "State-id", state_hash,
1309 1386              state_compare, state_mkkey, FALSE);
1310 1387  
1311      -        rfs4_state_file_idx = rfs4_index_create(rfs4_state_tab,
     1388 +        nsrv4->rfs4_state_file_idx = rfs4_index_create(nsrv4->rfs4_state_tab,
1312 1389              "File", state_file_hash,
1313 1390              state_file_compare, state_file_mkkey,
1314 1391              FALSE);
1315 1392  
1316      -        rfs4_lo_state_cache_time *= rfs4_lease_time;
1317      -        rfs4_lo_state_tab = rfs4_table_create(rfs4_server_state,
     1393 +        nsrv4->rfs4_lo_state_cache_time *= rfs4_lease_time;
     1394 +        nsrv4->rfs4_lo_state_tab = rfs4_table_create(nsrv4->nfs4_server_state,
1318 1395              "LockStateID",
1319      -            rfs4_lo_state_cache_time,
     1396 +            nsrv4->rfs4_lo_state_cache_time,
1320 1397              2,
1321 1398              rfs4_lo_state_create,
1322 1399              rfs4_lo_state_destroy,
1323 1400              rfs4_lo_state_expiry,
1324 1401              sizeof (rfs4_lo_state_t),
1325 1402              TABSIZE,
1326 1403              MAXTABSZ, 100);
1327 1404  
1328      -        rfs4_lo_state_owner_idx = rfs4_index_create(rfs4_lo_state_tab,
     1405 +        /* CSTYLED */
     1406 +        nsrv4->rfs4_lo_state_owner_idx = rfs4_index_create(nsrv4->rfs4_lo_state_tab,
1329 1407              "lockownerxstate",
1330 1408              lo_state_lo_hash,
1331 1409              lo_state_lo_compare,
1332 1410              lo_state_lo_mkkey, TRUE);
1333 1411  
1334      -        rfs4_lo_state_idx = rfs4_index_create(rfs4_lo_state_tab,
     1412 +        nsrv4->rfs4_lo_state_idx = rfs4_index_create(nsrv4->rfs4_lo_state_tab,
1335 1413              "State-id",
1336 1414              lo_state_hash, lo_state_compare,
1337 1415              lo_state_mkkey, FALSE);
1338 1416  
1339      -        rfs4_lockowner_cache_time *= rfs4_lease_time;
     1417 +        nsrv4->rfs4_lockowner_cache_time *= rfs4_lease_time;
1340 1418  
1341      -        rfs4_lockowner_tab = rfs4_table_create(rfs4_server_state,
     1419 +        nsrv4->rfs4_lockowner_tab = rfs4_table_create(nsrv4->nfs4_server_state,
1342 1420              "Lockowner",
1343      -            rfs4_lockowner_cache_time,
     1421 +            nsrv4->rfs4_lockowner_cache_time,
1344 1422              2,
1345 1423              rfs4_lockowner_create,
1346 1424              rfs4_lockowner_destroy,
1347 1425              rfs4_lockowner_expiry,
1348 1426              sizeof (rfs4_lockowner_t),
1349 1427              TABSIZE,
1350 1428              MAXTABSZ, 100);
1351 1429  
1352      -        rfs4_lockowner_idx = rfs4_index_create(rfs4_lockowner_tab,
     1430 +        nsrv4->rfs4_lockowner_idx = rfs4_index_create(nsrv4->rfs4_lockowner_tab,
1353 1431              "lock_owner4", lockowner_hash,
1354 1432              lockowner_compare,
1355 1433              lockowner_mkkey, TRUE);
1356 1434  
1357      -        rfs4_lockowner_pid_idx = rfs4_index_create(rfs4_lockowner_tab,
     1435 +        /* CSTYLED */
     1436 +        nsrv4->rfs4_lockowner_pid_idx = rfs4_index_create(nsrv4->rfs4_lockowner_tab,
1358 1437              "pid", pid_hash,
1359 1438              pid_compare, pid_mkkey,
1360 1439              FALSE);
1361 1440  
1362      -        rfs4_file_cache_time *= rfs4_lease_time;
1363      -        rfs4_file_tab = rfs4_table_create(rfs4_server_state,
     1441 +        nsrv4->rfs4_file_cache_time *= rfs4_lease_time;
     1442 +        nsrv4->rfs4_file_tab = rfs4_table_create(nsrv4->nfs4_server_state,
1364 1443              "File",
1365      -            rfs4_file_cache_time,
     1444 +            nsrv4->rfs4_file_cache_time,
1366 1445              1,
1367 1446              rfs4_file_create,
1368 1447              rfs4_file_destroy,
1369 1448              NULL,
1370 1449              sizeof (rfs4_file_t),
1371 1450              TABSIZE,
1372 1451              MAXTABSZ, -1);
1373 1452  
1374      -        rfs4_file_idx = rfs4_index_create(rfs4_file_tab,
     1453 +        nsrv4->rfs4_file_idx = rfs4_index_create(nsrv4->rfs4_file_tab,
1375 1454              "Filehandle", file_hash,
1376 1455              file_compare, file_mkkey, TRUE);
1377 1456  
1378      -        rfs4_deleg_state_cache_time *= rfs4_lease_time;
1379      -        rfs4_deleg_state_tab = rfs4_table_create(rfs4_server_state,
     1457 +        nsrv4->rfs4_deleg_state_cache_time *= rfs4_lease_time;
     1458 +        /* CSTYLED */
     1459 +        nsrv4->rfs4_deleg_state_tab = rfs4_table_create(nsrv4->nfs4_server_state,
1380 1460              "DelegStateID",
1381      -            rfs4_deleg_state_cache_time,
     1461 +            nsrv4->rfs4_deleg_state_cache_time,
1382 1462              2,
1383 1463              rfs4_deleg_state_create,
1384 1464              rfs4_deleg_state_destroy,
1385 1465              rfs4_deleg_state_expiry,
1386 1466              sizeof (rfs4_deleg_state_t),
1387 1467              TABSIZE,
1388 1468              MAXTABSZ, 100);
1389      -        rfs4_deleg_idx = rfs4_index_create(rfs4_deleg_state_tab,
     1469 +        nsrv4->rfs4_deleg_idx = rfs4_index_create(nsrv4->rfs4_deleg_state_tab,
1390 1470              "DelegByFileClient",
1391 1471              deleg_hash,
1392 1472              deleg_compare,
1393 1473              deleg_mkkey, TRUE);
1394 1474  
1395      -        rfs4_deleg_state_idx = rfs4_index_create(rfs4_deleg_state_tab,
     1475 +        /* CSTYLED */
     1476 +        nsrv4->rfs4_deleg_state_idx = rfs4_index_create(nsrv4->rfs4_deleg_state_tab,
1396 1477              "DelegState",
1397 1478              deleg_state_hash,
1398 1479              deleg_state_compare,
1399 1480              deleg_state_mkkey, FALSE);
1400 1481  
     1482 +        mutex_exit(&nsrv4->state_lock);
     1483 +
1401 1484          /*
1402 1485           * Init the stable storage.
1403 1486           */
1404      -        rfs4_ss_init();
1405      -
1406      -        rfs4_client_clrst = rfs4_clear_client_state;
1407      -
1408      -        mutex_exit(&rfs4_state_lock);
     1487 +        rfs4_ss_init(nsrv4);
1409 1488  }
1410 1489  
1411      -
1412 1490  /*
1413      - * Used at server shutdown to cleanup all of the NFSv4 server's structures
1414      - * and other state.
     1491 + * Used at server shutdown to cleanup all of NFSv4 server's zone structures
     1492 + * and state.
1415 1493   */
1416 1494  void
1417      -rfs4_state_fini()
     1495 +rfs4_state_zone_fini()
1418 1496  {
1419 1497          rfs4_database_t *dbp;
     1498 +        nfs4_srv_t *nsrv4;
     1499 +        nsrv4 = nfs4_get_srv();
1420 1500  
1421      -        mutex_enter(&rfs4_state_lock);
     1501 +        rfs4_set_deleg_policy(nsrv4, SRV_NEVER_DELEGATE);
1422 1502  
1423      -        if (rfs4_server_state == NULL) {
1424      -                mutex_exit(&rfs4_state_lock);
     1503 +        /*
     1504 +         * Clean up any dangling stable storage structures BEFORE calling
     1505 +         * rfs4_servinst_destroy_all() so there are no dangling structures
     1506 +         * (i.e. the srvinsts are all cleared of danglers BEFORE they get
     1507 +         * freed).
     1508 +         */
     1509 +        rfs4_ss_fini(nsrv4);
     1510 +
     1511 +        mutex_enter(&nsrv4->state_lock);
     1512 +
     1513 +        if (nsrv4->nfs4_server_state == NULL) {
     1514 +                mutex_exit(&nsrv4->state_lock);
1425 1515                  return;
1426 1516          }
1427 1517  
1428      -        rfs4_client_clrst = NULL;
     1518 +        /* destroy server instances and current instance ptr */
     1519 +        rfs4_servinst_destroy_all(nsrv4);
1429 1520  
1430      -        rfs4_set_deleg_policy(SRV_NEVER_DELEGATE);
1431      -        dbp = rfs4_server_state;
1432      -        rfs4_server_state = NULL;
     1521 +        /* reset the "first NFSv4 request" status */
     1522 +        nsrv4->seen_first_compound = 0;
1433 1523  
1434      -        /*
1435      -         * Cleanup the CPR callback.
1436      -         */
1437      -        if (cpr_id)
1438      -                (void) callb_delete(cpr_id);
     1524 +        dbp = nsrv4->nfs4_server_state;
     1525 +        nsrv4->nfs4_server_state = NULL;
1439 1526  
1440      -        rw_destroy(&rfs4_findclient_lock);
     1527 +        rw_destroy(&nsrv4->rfs4_findclient_lock);
1441 1528  
1442 1529          /* First stop all of the reaper threads in the database */
1443 1530          rfs4_database_shutdown(dbp);
1444      -        /* clean up any dangling stable storage structures */
1445      -        rfs4_ss_fini();
1446      -        /* Now actually destroy/release the database and its tables */
     1531 +
     1532 +        /*
     1533 +         * WARNING: There may be consumers of the rfs4 database still
     1534 +         * active as we destroy these.  IF that's the case, consider putting
     1535 +         * some of their _zone_fini()-like functions into the zsd key as
     1536 +         * ~~SHUTDOWN~~ functions instead of ~~DESTROY~~ functions.  We can
     1537 +         * maintain some ordering guarantees better that way.
     1538 +         */
     1539 +        /* Now destroy/release the database tables */
1447 1540          rfs4_database_destroy(dbp);
1448 1541  
1449 1542          /* Reset the cache timers for next time */
1450      -        rfs4_client_cache_time = 0;
1451      -        rfs4_openowner_cache_time = 0;
1452      -        rfs4_state_cache_time = 0;
1453      -        rfs4_lo_state_cache_time = 0;
1454      -        rfs4_lockowner_cache_time = 0;
1455      -        rfs4_file_cache_time = 0;
1456      -        rfs4_deleg_state_cache_time = 0;
     1543 +        nsrv4->rfs4_client_cache_time = 0;
     1544 +        nsrv4->rfs4_openowner_cache_time = 0;
     1545 +        nsrv4->rfs4_state_cache_time = 0;
     1546 +        nsrv4->rfs4_lo_state_cache_time = 0;
     1547 +        nsrv4->rfs4_lockowner_cache_time = 0;
     1548 +        nsrv4->rfs4_file_cache_time = 0;
     1549 +        nsrv4->rfs4_deleg_state_cache_time = 0;
1457 1550  
1458      -        mutex_exit(&rfs4_state_lock);
1459      -
1460      -        /* destroy server instances and current instance ptr */
1461      -        rfs4_servinst_destroy_all();
1462      -
1463      -        /* reset the "first NFSv4 request" status */
1464      -        rfs4_seen_first_compound = 0;
1465      -
1466      -        /* DSS: distributed stable storage */
1467      -        nvlist_free(rfs4_dss_oldpaths);
1468      -        nvlist_free(rfs4_dss_paths);
1469      -        rfs4_dss_paths = rfs4_dss_oldpaths = NULL;
     1551 +        mutex_exit(&nsrv4->state_lock);
1470 1552  }
1471 1553  
1472 1554  typedef union {
1473 1555          struct {
1474 1556                  uint32_t start_time;
1475 1557                  uint32_t c_id;
1476 1558          } impl_id;
1477 1559          clientid4 id4;
1478 1560  } cid;
1479 1561  
↓ open down ↓ 94 lines elided ↑ open up ↑
1574 1656                  cp->rc_ss_remove = 1;
1575 1657          return (cp_expired);
1576 1658  }
1577 1659  
1578 1660  /*
1579 1661   * Remove the leaf file from all distributed stable storage paths.
1580 1662   */
1581 1663  static void
1582 1664  rfs4_dss_remove_cpleaf(rfs4_client_t *cp)
1583 1665  {
     1666 +        nfs4_srv_t *nsrv4;
1584 1667          rfs4_servinst_t *sip;
1585 1668          char *leaf = cp->rc_ss_pn->leaf;
1586 1669  
1587 1670          /*
1588 1671           * since the state files are written to all DSS
1589 1672           * paths we must remove this leaf file instance
1590 1673           * from all server instances.
1591 1674           */
1592 1675  
1593      -        mutex_enter(&rfs4_servinst_lock);
1594      -        for (sip = rfs4_cur_servinst; sip != NULL; sip = sip->prev) {
     1676 +        nsrv4 = nfs4_get_srv();
     1677 +        mutex_enter(&nsrv4->servinst_lock);
     1678 +        for (sip = nsrv4->nfs4_cur_servinst; sip != NULL; sip = sip->prev) {
1595 1679                  /* remove the leaf file associated with this server instance */
1596 1680                  rfs4_dss_remove_leaf(sip, NFS4_DSS_STATE_LEAF, leaf);
1597 1681          }
1598      -        mutex_exit(&rfs4_servinst_lock);
     1682 +        mutex_exit(&nsrv4->servinst_lock);
1599 1683  }
1600 1684  
1601 1685  static void
1602 1686  rfs4_dss_remove_leaf(rfs4_servinst_t *sip, char *dir_leaf, char *leaf)
1603 1687  {
1604 1688          int i, npaths = sip->dss_npaths;
1605 1689  
1606 1690          for (i = 0; i < npaths; i++) {
1607 1691                  rfs4_dss_path_t *dss_path = sip->dss_paths[i];
1608 1692                  char *path, *dir;
↓ open down ↓ 47 lines elided ↑ open up ↑
1656 1740  }
1657 1741  
1658 1742  static bool_t
1659 1743  rfs4_client_create(rfs4_entry_t u_entry, void *arg)
1660 1744  {
1661 1745          rfs4_client_t *cp = (rfs4_client_t *)u_entry;
1662 1746          nfs_client_id4 *client = (nfs_client_id4 *)arg;
1663 1747          struct sockaddr *ca;
1664 1748          cid *cidp;
1665 1749          scid_confirm_verf *scvp;
     1750 +        nfs4_srv_t *nsrv4;
1666 1751  
     1752 +        nsrv4 = nfs4_get_srv();
     1753 +
1667 1754          /* Get a clientid to give to the client */
1668 1755          cidp = (cid *)&cp->rc_clientid;
1669      -        cidp->impl_id.start_time = rfs4_start_time;
     1756 +        cidp->impl_id.start_time = nsrv4->rfs4_start_time;
1670 1757          cidp->impl_id.c_id = (uint32_t)rfs4_dbe_getid(cp->rc_dbe);
1671 1758  
1672 1759          /* If we are booted as a cluster node, embed our nodeid */
1673 1760          if (cluster_bootflags & CLUSTER_BOOTED)
1674 1761                  embed_nodeid(cidp);
1675 1762  
1676 1763          /* Allocate and copy client's client id value */
1677 1764          cp->rc_nfs_client.id_val = kmem_alloc(client->id_len, KM_SLEEP);
1678 1765          cp->rc_nfs_client.id_len = client->id_len;
1679 1766          bcopy(client->id_val, cp->rc_nfs_client.id_val, client->id_len);
↓ open down ↓ 37 lines elided ↑ open up ↑
1717 1804          mutex_init(cp->rc_cbinfo.cb_lock, NULL, MUTEX_DEFAULT, NULL);
1718 1805          cv_init(cp->rc_cbinfo.cb_cv, NULL, CV_DEFAULT, NULL);
1719 1806          cv_init(cp->rc_cbinfo.cb_cv_nullcaller, NULL, CV_DEFAULT, NULL);
1720 1807  
1721 1808          /*
1722 1809           * Associate the client_t with the current server instance.
1723 1810           * The hold is solely to satisfy the calling requirement of
1724 1811           * rfs4_servinst_assign(). In this case it's not strictly necessary.
1725 1812           */
1726 1813          rfs4_dbe_hold(cp->rc_dbe);
1727      -        rfs4_servinst_assign(cp, rfs4_cur_servinst);
     1814 +        rfs4_servinst_assign(nsrv4, cp, nsrv4->nfs4_cur_servinst);
1728 1815          rfs4_dbe_rele(cp->rc_dbe);
1729 1816  
1730 1817          return (TRUE);
1731 1818  }
1732 1819  
1733 1820  /*
1734 1821   * Caller wants to generate/update the setclientid_confirm verifier
1735 1822   * associated with a client.  This is done during the SETCLIENTID
1736 1823   * processing.
1737 1824   */
↓ open down ↓ 10 lines elided ↑ open up ↑
1748 1835  void
1749 1836  rfs4_client_rele(rfs4_client_t *cp)
1750 1837  {
1751 1838          rfs4_dbe_rele(cp->rc_dbe);
1752 1839  }
1753 1840  
1754 1841  rfs4_client_t *
1755 1842  rfs4_findclient(nfs_client_id4 *client, bool_t *create, rfs4_client_t *oldcp)
1756 1843  {
1757 1844          rfs4_client_t *cp;
     1845 +        nfs4_srv_t *nsrv4;
     1846 +        nsrv4 = nfs4_get_srv();
1758 1847  
1759 1848  
1760 1849          if (oldcp) {
1761      -                rw_enter(&rfs4_findclient_lock, RW_WRITER);
     1850 +                rw_enter(&nsrv4->rfs4_findclient_lock, RW_WRITER);
1762 1851                  rfs4_dbe_hide(oldcp->rc_dbe);
1763 1852          } else {
1764      -                rw_enter(&rfs4_findclient_lock, RW_READER);
     1853 +                rw_enter(&nsrv4->rfs4_findclient_lock, RW_READER);
1765 1854          }
1766 1855  
1767      -        cp = (rfs4_client_t *)rfs4_dbsearch(rfs4_nfsclnt_idx, client,
     1856 +        cp = (rfs4_client_t *)rfs4_dbsearch(nsrv4->rfs4_nfsclnt_idx, client,
1768 1857              create, (void *)client, RFS4_DBS_VALID);
1769 1858  
1770 1859          if (oldcp)
1771 1860                  rfs4_dbe_unhide(oldcp->rc_dbe);
1772 1861  
1773      -        rw_exit(&rfs4_findclient_lock);
     1862 +        rw_exit(&nsrv4->rfs4_findclient_lock);
1774 1863  
1775 1864          return (cp);
1776 1865  }
1777 1866  
1778 1867  rfs4_client_t *
1779 1868  rfs4_findclient_by_id(clientid4 clientid, bool_t find_unconfirmed)
1780 1869  {
1781 1870          rfs4_client_t *cp;
1782 1871          bool_t create = FALSE;
1783 1872          cid *cidp = (cid *)&clientid;
     1873 +        nfs4_srv_t *nsrv4 = nfs4_get_srv();
1784 1874  
1785 1875          /* If we're a cluster and the nodeid isn't right, short-circuit */
1786 1876          if (cluster_bootflags & CLUSTER_BOOTED && foreign_clientid(cidp))
1787 1877                  return (NULL);
1788 1878  
1789      -        rw_enter(&rfs4_findclient_lock, RW_READER);
     1879 +        rw_enter(&nsrv4->rfs4_findclient_lock, RW_READER);
1790 1880  
1791      -        cp = (rfs4_client_t *)rfs4_dbsearch(rfs4_clientid_idx, &clientid,
     1881 +        cp = (rfs4_client_t *)rfs4_dbsearch(nsrv4->rfs4_clientid_idx, &clientid,
1792 1882              &create, NULL, RFS4_DBS_VALID);
1793 1883  
1794      -        rw_exit(&rfs4_findclient_lock);
     1884 +        rw_exit(&nsrv4->rfs4_findclient_lock);
1795 1885  
1796 1886          if (cp && cp->rc_need_confirm && find_unconfirmed == FALSE) {
1797 1887                  rfs4_client_rele(cp);
1798 1888                  return (NULL);
1799 1889          } else {
1800 1890                  return (cp);
1801 1891          }
1802 1892  }
1803 1893  
1804 1894  static uint32_t
↓ open down ↓ 87 lines elided ↑ open up ↑
1892 1982                  return (FALSE);
1893 1983          cp->ri_no_referrals = 1;
1894 1984  
1895 1985          return (TRUE);
1896 1986  }
1897 1987  
1898 1988  rfs4_clntip_t *
1899 1989  rfs4_find_clntip(struct sockaddr *addr, bool_t *create)
1900 1990  {
1901 1991          rfs4_clntip_t *cp;
     1992 +        nfs4_srv_t *nsrv4;
1902 1993  
1903      -        rw_enter(&rfs4_findclient_lock, RW_READER);
     1994 +        nsrv4 = nfs4_get_srv();
1904 1995  
1905      -        cp = (rfs4_clntip_t *)rfs4_dbsearch(rfs4_clntip_idx, addr,
     1996 +        rw_enter(&nsrv4->rfs4_findclient_lock, RW_READER);
     1997 +
     1998 +        cp = (rfs4_clntip_t *)rfs4_dbsearch(nsrv4->rfs4_clntip_idx, addr,
1906 1999              create, addr, RFS4_DBS_VALID);
1907 2000  
1908      -        rw_exit(&rfs4_findclient_lock);
     2001 +        rw_exit(&nsrv4->rfs4_findclient_lock);
1909 2002  
1910 2003          return (cp);
1911 2004  }
1912 2005  
1913 2006  void
1914 2007  rfs4_invalidate_clntip(struct sockaddr *addr)
1915 2008  {
1916 2009          rfs4_clntip_t *cp;
1917 2010          bool_t create = FALSE;
     2011 +        nfs4_srv_t *nsrv4 = nfs4_get_srv();
1918 2012  
1919      -        rw_enter(&rfs4_findclient_lock, RW_READER);
     2013 +        rw_enter(&nsrv4->rfs4_findclient_lock, RW_READER);
1920 2014  
1921      -        cp = (rfs4_clntip_t *)rfs4_dbsearch(rfs4_clntip_idx, addr,
     2015 +        cp = (rfs4_clntip_t *)rfs4_dbsearch(nsrv4->rfs4_clntip_idx, addr,
1922 2016              &create, NULL, RFS4_DBS_VALID);
1923 2017          if (cp == NULL) {
1924      -                rw_exit(&rfs4_findclient_lock);
     2018 +                rw_exit(&nsrv4->rfs4_findclient_lock);
1925 2019                  return;
1926 2020          }
1927 2021          rfs4_dbe_invalidate(cp->ri_dbe);
1928 2022          rfs4_dbe_rele(cp->ri_dbe);
1929 2023  
1930      -        rw_exit(&rfs4_findclient_lock);
     2024 +        rw_exit(&nsrv4->rfs4_findclient_lock);
1931 2025  }
1932 2026  
1933 2027  bool_t
1934 2028  rfs4_lease_expired(rfs4_client_t *cp)
1935 2029  {
1936 2030          bool_t rc;
1937 2031  
1938 2032          rfs4_dbe_lock(cp->rc_dbe);
1939 2033  
1940 2034          /*
↓ open down ↓ 127 lines elided ↑ open up ↑
2068 2162  
2069 2163  static bool_t
2070 2164  rfs4_openowner_create(rfs4_entry_t u_entry, void *arg)
2071 2165  {
2072 2166          rfs4_openowner_t *oo = (rfs4_openowner_t *)u_entry;
2073 2167          rfs4_openowner_t *argp = (rfs4_openowner_t *)arg;
2074 2168          open_owner4 *openowner = &argp->ro_owner;
2075 2169          seqid4 seqid = argp->ro_open_seqid;
2076 2170          rfs4_client_t *cp;
2077 2171          bool_t create = FALSE;
     2172 +        nfs4_srv_t *nsrv4 = nfs4_get_srv();
2078 2173  
2079      -        rw_enter(&rfs4_findclient_lock, RW_READER);
     2174 +        rw_enter(&nsrv4->rfs4_findclient_lock, RW_READER);
2080 2175  
2081      -        cp = (rfs4_client_t *)rfs4_dbsearch(rfs4_clientid_idx,
     2176 +        cp = (rfs4_client_t *)rfs4_dbsearch(nsrv4->rfs4_clientid_idx,
2082 2177              &openowner->clientid,
2083 2178              &create, NULL, RFS4_DBS_VALID);
2084 2179  
2085      -        rw_exit(&rfs4_findclient_lock);
     2180 +        rw_exit(&nsrv4->rfs4_findclient_lock);
2086 2181  
2087 2182          if (cp == NULL)
2088 2183                  return (FALSE);
2089 2184  
2090 2185          oo->ro_reply_fh.nfs_fh4_len = 0;
2091 2186          oo->ro_reply_fh.nfs_fh4_val = NULL;
2092 2187  
2093 2188          oo->ro_owner.clientid = openowner->clientid;
2094 2189          oo->ro_owner.owner_val =
2095 2190              kmem_alloc(openowner->owner_len, KM_SLEEP);
↓ open down ↓ 21 lines elided ↑ open up ↑
2117 2212          rfs4_dbe_unlock(cp->rc_dbe);
2118 2213  
2119 2214          return (TRUE);
2120 2215  }
2121 2216  
2122 2217  rfs4_openowner_t *
2123 2218  rfs4_findopenowner(open_owner4 *openowner, bool_t *create, seqid4 seqid)
2124 2219  {
2125 2220          rfs4_openowner_t *oo;
2126 2221          rfs4_openowner_t arg;
     2222 +        nfs4_srv_t *nsrv4 = nfs4_get_srv();
2127 2223  
2128 2224          arg.ro_owner = *openowner;
2129 2225          arg.ro_open_seqid = seqid;
2130      -        oo = (rfs4_openowner_t *)rfs4_dbsearch(rfs4_openowner_idx, openowner,
     2226 +        /* CSTYLED */
     2227 +        oo = (rfs4_openowner_t *)rfs4_dbsearch(nsrv4->rfs4_openowner_idx, openowner,
2131 2228              create, &arg, RFS4_DBS_VALID);
2132 2229  
2133 2230          return (oo);
2134 2231  }
2135 2232  
2136 2233  void
2137 2234  rfs4_update_open_sequence(rfs4_openowner_t *oo)
2138 2235  {
2139 2236  
2140 2237          rfs4_dbe_lock(oo->ro_dbe);
↓ open down ↓ 122 lines elided ↑ open up ↑
2263 2360          return (TRUE);
2264 2361  }
2265 2362  
2266 2363  static bool_t
2267 2364  rfs4_lockowner_create(rfs4_entry_t u_entry, void *arg)
2268 2365  {
2269 2366          rfs4_lockowner_t *lo = (rfs4_lockowner_t *)u_entry;
2270 2367          lock_owner4 *lockowner = (lock_owner4 *)arg;
2271 2368          rfs4_client_t *cp;
2272 2369          bool_t create = FALSE;
     2370 +        nfs4_srv_t *nsrv4 = nfs4_get_srv();
2273 2371  
2274      -        rw_enter(&rfs4_findclient_lock, RW_READER);
     2372 +        rw_enter(&nsrv4->rfs4_findclient_lock, RW_READER);
2275 2373  
2276      -        cp = (rfs4_client_t *)rfs4_dbsearch(rfs4_clientid_idx,
     2374 +        cp = (rfs4_client_t *)rfs4_dbsearch(nsrv4->rfs4_clientid_idx,
2277 2375              &lockowner->clientid,
2278 2376              &create, NULL, RFS4_DBS_VALID);
2279 2377  
2280      -        rw_exit(&rfs4_findclient_lock);
     2378 +        rw_exit(&nsrv4->rfs4_findclient_lock);
2281 2379  
2282 2380          if (cp == NULL)
2283 2381                  return (FALSE);
2284 2382  
2285 2383          /* Reference client */
2286 2384          lo->rl_client = cp;
2287 2385          lo->rl_owner.clientid = lockowner->clientid;
2288 2386          lo->rl_owner.owner_val = kmem_alloc(lockowner->owner_len, KM_SLEEP);
2289 2387          bcopy(lockowner->owner_val, lo->rl_owner.owner_val,
2290 2388              lockowner->owner_len);
2291 2389          lo->rl_owner.owner_len = lockowner->owner_len;
2292 2390          lo->rl_pid = rfs4_dbe_getid(lo->rl_dbe);
2293 2391  
2294 2392          return (TRUE);
2295 2393  }
2296 2394  
2297 2395  rfs4_lockowner_t *
2298 2396  rfs4_findlockowner(lock_owner4 *lockowner, bool_t *create)
2299 2397  {
2300 2398          rfs4_lockowner_t *lo;
     2399 +        nfs4_srv_t *nsrv4 = nfs4_get_srv();
2301 2400  
2302      -        lo = (rfs4_lockowner_t *)rfs4_dbsearch(rfs4_lockowner_idx, lockowner,
     2401 +        /* CSTYLED */
     2402 +        lo = (rfs4_lockowner_t *)rfs4_dbsearch(nsrv4->rfs4_lockowner_idx, lockowner,
2303 2403              create, lockowner, RFS4_DBS_VALID);
2304 2404  
2305 2405          return (lo);
2306 2406  }
2307 2407  
2308 2408  rfs4_lockowner_t *
2309 2409  rfs4_findlockowner_by_pid(pid_t pid)
2310 2410  {
2311 2411          rfs4_lockowner_t *lo;
2312 2412          bool_t create = FALSE;
     2413 +        nfs4_srv_t *nsrv4 = nfs4_get_srv();
2313 2414  
2314      -        lo = (rfs4_lockowner_t *)rfs4_dbsearch(rfs4_lockowner_pid_idx,
     2415 +        lo = (rfs4_lockowner_t *)rfs4_dbsearch(nsrv4->rfs4_lockowner_pid_idx,
2315 2416              (void *)(uintptr_t)pid, &create, NULL, RFS4_DBS_VALID);
2316 2417  
2317 2418          return (lo);
2318 2419  }
2319 2420  
2320 2421  
2321 2422  static uint32_t
2322 2423  file_hash(void *key)
2323 2424  {
2324 2425          return (ADDRHASH(key));
↓ open down ↓ 90 lines elided ↑ open up ↑
2415 2516          mutex_exit(&vp->v_vsd_lock);
2416 2517  
2417 2518          return (TRUE);
2418 2519  }
2419 2520  
2420 2521  rfs4_file_t *
2421 2522  rfs4_findfile(vnode_t *vp, nfs_fh4 *fh, bool_t *create)
2422 2523  {
2423 2524          rfs4_file_t *fp;
2424 2525          rfs4_fcreate_arg arg;
     2526 +        nfs4_srv_t *nsrv4 = nfs4_get_srv();
2425 2527  
2426 2528          arg.vp = vp;
2427 2529          arg.fh = fh;
2428 2530  
2429 2531          if (*create == TRUE)
2430      -                fp = (rfs4_file_t *)rfs4_dbsearch(rfs4_file_idx, vp, create,
     2532 +                /* CSTYLED */
     2533 +                fp = (rfs4_file_t *)rfs4_dbsearch(nsrv4->rfs4_file_idx, vp, create,
2431 2534                      &arg, RFS4_DBS_VALID);
2432 2535          else {
2433 2536                  mutex_enter(&vp->v_vsd_lock);
2434 2537                  fp = (rfs4_file_t *)vsd_get(vp, nfs4_srv_vkey);
2435 2538                  if (fp) {
2436 2539                          rfs4_dbe_lock(fp->rf_dbe);
2437 2540                          if (rfs4_dbe_is_invalid(fp->rf_dbe) ||
2438 2541                              (rfs4_dbe_refcnt(fp->rf_dbe) == 0)) {
2439 2542                                  rfs4_dbe_unlock(fp->rf_dbe);
2440 2543                                  fp = NULL;
↓ open down ↓ 14 lines elided ↑ open up ↑
2455 2558   * assumes that a file struct that has a NULL vnode pointer is marked
2456 2559   * at 'invalid' and will not be found in the db the second time
2457 2560   * around.
2458 2561   */
2459 2562  rfs4_file_t *
2460 2563  rfs4_findfile_withlock(vnode_t *vp, nfs_fh4 *fh, bool_t *create)
2461 2564  {
2462 2565          rfs4_file_t *fp;
2463 2566          rfs4_fcreate_arg arg;
2464 2567          bool_t screate = *create;
     2568 +        nfs4_srv_t *nsrv4 = nfs4_get_srv();
2465 2569  
2466 2570          if (screate == FALSE) {
2467 2571                  mutex_enter(&vp->v_vsd_lock);
2468 2572                  fp = (rfs4_file_t *)vsd_get(vp, nfs4_srv_vkey);
2469 2573                  if (fp) {
2470 2574                          rfs4_dbe_lock(fp->rf_dbe);
2471 2575                          if (rfs4_dbe_is_invalid(fp->rf_dbe) ||
2472 2576                              (rfs4_dbe_refcnt(fp->rf_dbe) == 0)) {
2473 2577                                  rfs4_dbe_unlock(fp->rf_dbe);
2474 2578                                  mutex_exit(&vp->v_vsd_lock);
↓ open down ↓ 10 lines elided ↑ open up ↑
2485 2589                                  }
2486 2590                          }
2487 2591                  } else {
2488 2592                          mutex_exit(&vp->v_vsd_lock);
2489 2593                  }
2490 2594          } else {
2491 2595  retry:
2492 2596                  arg.vp = vp;
2493 2597                  arg.fh = fh;
2494 2598  
2495      -                fp = (rfs4_file_t *)rfs4_dbsearch(rfs4_file_idx, vp, create,
2496      -                    &arg, RFS4_DBS_VALID);
     2599 +                fp = (rfs4_file_t *)rfs4_dbsearch(nsrv4->rfs4_file_idx, vp,
     2600 +                    create, &arg, RFS4_DBS_VALID);
2497 2601                  if (fp != NULL) {
2498 2602                          rw_enter(&fp->rf_file_rwlock, RW_WRITER);
2499 2603                          if (fp->rf_vp == NULL) {
2500 2604                                  rw_exit(&fp->rf_file_rwlock);
2501 2605                                  rfs4_file_rele(fp);
2502 2606                                  *create = screate;
2503 2607                                  goto retry;
2504 2608                          }
2505 2609                  }
2506 2610          }
↓ open down ↓ 134 lines elided ↑ open up ↑
2641 2745          if (unlock_fp == TRUE)
2642 2746                  rw_exit(&lsp->rls_state->rs_finfo->rf_file_rwlock);
2643 2747          rfs4_dbe_rele(lsp->rls_dbe);
2644 2748  }
2645 2749  
2646 2750  static rfs4_lo_state_t *
2647 2751  rfs4_findlo_state(stateid_t *id, bool_t lock_fp)
2648 2752  {
2649 2753          rfs4_lo_state_t *lsp;
2650 2754          bool_t create = FALSE;
     2755 +        nfs4_srv_t *nsrv4 = nfs4_get_srv();
2651 2756  
2652      -        lsp = (rfs4_lo_state_t *)rfs4_dbsearch(rfs4_lo_state_idx, id,
     2757 +        lsp = (rfs4_lo_state_t *)rfs4_dbsearch(nsrv4->rfs4_lo_state_idx, id,
2653 2758              &create, NULL, RFS4_DBS_VALID);
2654 2759          if (lock_fp == TRUE && lsp != NULL)
2655 2760                  rw_enter(&lsp->rls_state->rs_finfo->rf_file_rwlock, RW_READER);
2656 2761  
2657 2762          return (lsp);
2658 2763  }
2659 2764  
2660 2765  
2661 2766  static uint32_t
2662 2767  lo_state_lo_hash(void *key)
↓ open down ↓ 18 lines elided ↑ open up ↑
2681 2786  {
2682 2787          return (u_entry);
2683 2788  }
2684 2789  
2685 2790  rfs4_lo_state_t *
2686 2791  rfs4_findlo_state_by_owner(rfs4_lockowner_t *lo, rfs4_state_t *sp,
2687 2792      bool_t *create)
2688 2793  {
2689 2794          rfs4_lo_state_t *lsp;
2690 2795          rfs4_lo_state_t arg;
     2796 +        nfs4_srv_t *nsrv4 = nfs4_get_srv();
2691 2797  
2692 2798          arg.rls_locker = lo;
2693 2799          arg.rls_state = sp;
2694 2800  
2695      -        lsp = (rfs4_lo_state_t *)rfs4_dbsearch(rfs4_lo_state_owner_idx, &arg,
2696      -            create, &arg, RFS4_DBS_VALID);
     2801 +        lsp = (rfs4_lo_state_t *)rfs4_dbsearch(nsrv4->rfs4_lo_state_owner_idx,
     2802 +            &arg, create, &arg, RFS4_DBS_VALID);
2697 2803  
2698 2804          return (lsp);
2699 2805  }
2700 2806  
2701 2807  static stateid_t
2702 2808  get_stateid(id_t eid)
2703 2809  {
2704 2810          stateid_t id;
     2811 +        nfs4_srv_t *nsrv4;
2705 2812  
2706      -        id.bits.boottime = rfs4_start_time;
     2813 +        nsrv4 = nfs4_get_srv();
     2814 +
     2815 +        id.bits.boottime = nsrv4->rfs4_start_time;
2707 2816          id.bits.ident = eid;
2708 2817          id.bits.chgseq = 0;
2709 2818          id.bits.type = 0;
2710 2819          id.bits.pid = 0;
2711 2820  
2712 2821          /*
2713 2822           * If we are booted as a cluster node, embed our nodeid.
2714 2823           * We've already done sanity checks in rfs4_client_create() so no
2715 2824           * need to repeat them here.
2716 2825           */
↓ open down ↓ 235 lines elided ↑ open up ↑
2952 3061  
2953 3062          /* And now with the openowner */
2954 3063          rfs4_client_rele(dsp->rds_client);
2955 3064          dsp->rds_client = NULL;
2956 3065  }
2957 3066  
2958 3067  rfs4_deleg_state_t *
2959 3068  rfs4_finddeleg(rfs4_state_t *sp, bool_t *create)
2960 3069  {
2961 3070          rfs4_deleg_state_t ds, *dsp;
     3071 +        nfs4_srv_t *nsrv4 = nfs4_get_srv();
2962 3072  
2963 3073          ds.rds_client = sp->rs_owner->ro_client;
2964 3074          ds.rds_finfo = sp->rs_finfo;
2965 3075  
2966      -        dsp = (rfs4_deleg_state_t *)rfs4_dbsearch(rfs4_deleg_idx, &ds,
     3076 +        dsp = (rfs4_deleg_state_t *)rfs4_dbsearch(nsrv4->rfs4_deleg_idx, &ds,
2967 3077              create, &ds, RFS4_DBS_VALID);
2968 3078  
2969 3079          return (dsp);
2970 3080  }
2971 3081  
2972 3082  rfs4_deleg_state_t *
2973 3083  rfs4_finddelegstate(stateid_t *id)
2974 3084  {
2975 3085          rfs4_deleg_state_t *dsp;
2976 3086          bool_t create = FALSE;
     3087 +        nfs4_srv_t *nsrv4 = nfs4_get_srv();
2977 3088  
2978      -        dsp = (rfs4_deleg_state_t *)rfs4_dbsearch(rfs4_deleg_state_idx, id,
2979      -            &create, NULL, RFS4_DBS_VALID);
     3089 +        dsp = (rfs4_deleg_state_t *)rfs4_dbsearch(nsrv4->rfs4_deleg_state_idx,
     3090 +            id, &create, NULL, RFS4_DBS_VALID);
2980 3091  
2981 3092          return (dsp);
2982 3093  }
2983 3094  
2984 3095  void
2985 3096  rfs4_deleg_state_rele(rfs4_deleg_state_t *dsp)
2986 3097  {
2987 3098          rfs4_dbe_rele(dsp->rds_dbe);
2988 3099  }
2989 3100  
↓ open down ↓ 94 lines elided ↑ open up ↑
3084 3195  static void *
3085 3196  state_file_mkkey(rfs4_entry_t u_entry)
3086 3197  {
3087 3198          rfs4_state_t *sp = (rfs4_state_t *)u_entry;
3088 3199  
3089 3200          return (sp->rs_finfo);
3090 3201  }
3091 3202  
3092 3203  rfs4_state_t *
3093 3204  rfs4_findstate_by_owner_file(rfs4_openowner_t *oo, rfs4_file_t *fp,
3094      -        bool_t *create)
     3205 +    bool_t *create)
3095 3206  {
3096 3207          rfs4_state_t *sp;
3097 3208          rfs4_state_t key;
     3209 +        nfs4_srv_t *nsrv4 = nfs4_get_srv();
3098 3210  
3099 3211          key.rs_owner = oo;
3100 3212          key.rs_finfo = fp;
3101 3213  
3102      -        sp = (rfs4_state_t *)rfs4_dbsearch(rfs4_state_owner_file_idx, &key,
3103      -            create, &key, RFS4_DBS_VALID);
     3214 +        sp = (rfs4_state_t *)rfs4_dbsearch(nsrv4->rfs4_state_owner_file_idx,
     3215 +            &key, create, &key, RFS4_DBS_VALID);
3104 3216  
3105 3217          return (sp);
3106 3218  }
3107 3219  
3108 3220  /* This returns ANY state struct that refers to this file */
3109 3221  static rfs4_state_t *
3110 3222  rfs4_findstate_by_file(rfs4_file_t *fp)
3111 3223  {
3112 3224          bool_t create = FALSE;
     3225 +        nfs4_srv_t *nsrv4 = nfs4_get_srv();
3113 3226  
3114      -        return ((rfs4_state_t *)rfs4_dbsearch(rfs4_state_file_idx, fp,
     3227 +        return ((rfs4_state_t *)rfs4_dbsearch(nsrv4->rfs4_state_file_idx, fp,
3115 3228              &create, fp, RFS4_DBS_VALID));
3116 3229  }
3117 3230  
3118 3231  static bool_t
3119 3232  rfs4_state_expiry(rfs4_entry_t u_entry)
3120 3233  {
3121 3234          rfs4_state_t *sp = (rfs4_state_t *)u_entry;
3122 3235  
3123 3236          if (rfs4_dbe_is_invalid(sp->rs_dbe))
3124 3237                  return (TRUE);
↓ open down ↓ 30 lines elided ↑ open up ↑
3155 3268          rfs4_dbe_unlock(oo->ro_dbe);
3156 3269  
3157 3270          return (TRUE);
3158 3271  }
3159 3272  
3160 3273  static rfs4_state_t *
3161 3274  rfs4_findstate(stateid_t *id, rfs4_dbsearch_type_t find_invalid, bool_t lock_fp)
3162 3275  {
3163 3276          rfs4_state_t *sp;
3164 3277          bool_t create = FALSE;
     3278 +        nfs4_srv_t *nsrv4 = nfs4_get_srv();
3165 3279  
3166      -        sp = (rfs4_state_t *)rfs4_dbsearch(rfs4_state_idx, id,
     3280 +        sp = (rfs4_state_t *)rfs4_dbsearch(nsrv4->rfs4_state_idx, id,
3167 3281              &create, NULL, find_invalid);
3168 3282          if (lock_fp == TRUE && sp != NULL)
3169 3283                  rw_enter(&sp->rs_finfo->rf_file_rwlock, RW_READER);
3170 3284  
3171 3285          return (sp);
3172 3286  }
3173 3287  
3174 3288  void
3175 3289  rfs4_state_close(rfs4_state_t *sp, bool_t lock_held, bool_t close_of_client,
3176 3290      cred_t *cr)
↓ open down ↓ 47 lines elided ↑ open up ↑
3224 3338          rfs4_client_state_remove(cp);
3225 3339  
3226 3340          /* Release the client */
3227 3341          rfs4_client_rele(cp);
3228 3342  }
3229 3343  
3230 3344  nfsstat4
3231 3345  rfs4_check_clientid(clientid4 *cp, int setclid_confirm)
3232 3346  {
3233 3347          cid *cidp = (cid *) cp;
     3348 +        nfs4_srv_t *nsrv4;
3234 3349  
     3350 +        nsrv4 = nfs4_get_srv();
     3351 +
3235 3352          /*
3236 3353           * If we are booted as a cluster node, check the embedded nodeid.
3237 3354           * If it indicates that this clientid was generated on another node,
3238 3355           * inform the client accordingly.
3239 3356           */
3240 3357          if (cluster_bootflags & CLUSTER_BOOTED && foreign_clientid(cidp))
3241 3358                  return (NFS4ERR_STALE_CLIENTID);
3242 3359  
3243 3360          /*
3244 3361           * If the server start time matches the time provided
3245 3362           * by the client (via the clientid) and this is NOT a
3246 3363           * setclientid_confirm then return EXPIRED.
3247 3364           */
3248      -        if (!setclid_confirm && cidp->impl_id.start_time == rfs4_start_time)
     3365 +        if (!setclid_confirm &&
     3366 +            cidp->impl_id.start_time == nsrv4->rfs4_start_time)
3249 3367                  return (NFS4ERR_EXPIRED);
3250 3368  
3251 3369          return (NFS4ERR_STALE_CLIENTID);
3252 3370  }
3253 3371  
3254 3372  /*
3255 3373   * This is used when a stateid has not been found amongst the
3256 3374   * current server's state.  Check the stateid to see if it
3257 3375   * was from this server instantiation or not.
3258 3376   */
3259 3377  static nfsstat4
3260 3378  what_stateid_error(stateid_t *id, stateid_type_t type)
3261 3379  {
     3380 +        nfs4_srv_t *nsrv4;
     3381 +
     3382 +        nsrv4 = nfs4_get_srv();
     3383 +
3262 3384          /* If we are booted as a cluster node, was stateid locally generated? */
3263 3385          if ((cluster_bootflags & CLUSTER_BOOTED) && foreign_stateid(id))
3264 3386                  return (NFS4ERR_STALE_STATEID);
3265 3387  
3266 3388          /* If types don't match then no use checking further */
3267 3389          if (type != id->bits.type)
3268 3390                  return (NFS4ERR_BAD_STATEID);
3269 3391  
3270 3392          /* From a different server instantiation, return STALE */
3271      -        if (id->bits.boottime != rfs4_start_time)
     3393 +        if (id->bits.boottime != nsrv4->rfs4_start_time)
3272 3394                  return (NFS4ERR_STALE_STATEID);
3273 3395  
3274 3396          /*
3275 3397           * From this server but the state is most likely beyond lease
3276 3398           * timeout: return NFS4ERR_EXPIRED.  However, there is the
3277 3399           * case of a delegation stateid.  For delegations, there is a
3278 3400           * case where the state can be removed without the client's
3279 3401           * knowledge/consent: revocation.  In the case of delegation
3280 3402           * revocation, the delegation state will be removed and will
3281 3403           * not be found.  If the client does something like a
3282 3404           * DELEGRETURN or even a READ/WRITE with a delegatoin stateid
3283 3405           * that has been revoked, the server should return BAD_STATEID
3284 3406           * instead of the more common EXPIRED error.
3285 3407           */
3286      -        if (id->bits.boottime == rfs4_start_time) {
     3408 +        if (id->bits.boottime == nsrv4->rfs4_start_time) {
3287 3409                  if (type == DELEGID)
3288 3410                          return (NFS4ERR_BAD_STATEID);
3289 3411                  else
3290 3412                          return (NFS4ERR_EXPIRED);
3291 3413          }
3292 3414  
3293 3415          return (NFS4ERR_BAD_STATEID);
3294 3416  }
3295 3417  
3296 3418  /*
↓ open down ↓ 481 lines elided ↑ open up ↑
3778 3900   */
3779 3901  void
3780 3902  rfs4_close_all_state(rfs4_file_t *fp)
3781 3903  {
3782 3904          rfs4_state_t *sp;
3783 3905  
3784 3906          rfs4_dbe_lock(fp->rf_dbe);
3785 3907  
3786 3908  #ifdef DEBUG
3787 3909          /* only applies when server is handing out delegations */
3788      -        if (rfs4_deleg_policy != SRV_NEVER_DELEGATE)
     3910 +        if (nfs4_get_deleg_policy() != SRV_NEVER_DELEGATE)
3789 3911                  ASSERT(fp->rf_dinfo.rd_hold_grant > 0);
3790 3912  #endif
3791 3913  
3792 3914          /* No delegations for this file */
3793 3915          ASSERT(list_is_empty(&fp->rf_delegstatelist));
3794 3916  
3795 3917          /* Make sure that it can not be found */
3796 3918          rfs4_dbe_invalidate(fp->rf_dbe);
3797 3919  
3798 3920          if (fp->rf_vp == NULL) {
↓ open down ↓ 185 lines elided ↑ open up ↑
3984 4106                  }
3985 4107                  rfs4_dbe_invalidate(fp->rf_dbe);
3986 4108          }
3987 4109  }
3988 4110  
3989 4111  /*
3990 4112   * Given a directory that is being unexported, cleanup/release all
3991 4113   * state in the server that refers to objects residing underneath this
3992 4114   * particular export.  The ordering of the release is important.
3993 4115   * Lock_owner, then state and then file.
     4116 + *
     4117 + * NFS zones note: nfs_export.c:unexport() calls this from a
     4118 + * thread in the global zone for NGZ data structures, so we
     4119 + * CANNOT use zone_getspecific anywhere in this code path.
3994 4120   */
3995 4121  void
3996      -rfs4_clean_state_exi(struct exportinfo *exi)
     4122 +rfs4_clean_state_exi(nfs_export_t *ne, struct exportinfo *exi)
3997 4123  {
3998      -        mutex_enter(&rfs4_state_lock);
     4124 +        nfs_globals_t *ng;
     4125 +        nfs4_srv_t *nsrv4;
3999 4126  
4000      -        if (rfs4_server_state == NULL) {
4001      -                mutex_exit(&rfs4_state_lock);
     4127 +        ng = ne->ne_globals;
     4128 +        ASSERT(ng->nfs_zoneid == exi->exi_zoneid);
     4129 +        nsrv4 = ng->nfs4_srv;
     4130 +
     4131 +        mutex_enter(&nsrv4->state_lock);
     4132 +
     4133 +        if (nsrv4->nfs4_server_state == NULL) {
     4134 +                mutex_exit(&nsrv4->state_lock);
4002 4135                  return;
4003 4136          }
4004 4137  
4005      -        rfs4_dbe_walk(rfs4_lo_state_tab, rfs4_lo_state_walk_callout, exi);
4006      -        rfs4_dbe_walk(rfs4_state_tab, rfs4_state_walk_callout, exi);
4007      -        rfs4_dbe_walk(rfs4_deleg_state_tab, rfs4_deleg_state_walk_callout, exi);
4008      -        rfs4_dbe_walk(rfs4_file_tab, rfs4_file_walk_callout, exi);
     4138 +        rfs4_dbe_walk(nsrv4->rfs4_lo_state_tab,
     4139 +            rfs4_lo_state_walk_callout, exi);
     4140 +        rfs4_dbe_walk(nsrv4->rfs4_state_tab, rfs4_state_walk_callout, exi);
     4141 +        rfs4_dbe_walk(nsrv4->rfs4_deleg_state_tab,
     4142 +            rfs4_deleg_state_walk_callout, exi);
     4143 +        rfs4_dbe_walk(nsrv4->rfs4_file_tab, rfs4_file_walk_callout, exi);
4009 4144  
4010      -        mutex_exit(&rfs4_state_lock);
     4145 +        mutex_exit(&nsrv4->state_lock);
4011 4146  }
    
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX