Print this page
NEX-20260 NFS hung in transitional state when RSF marks it maintenance
NEX-20423 NFSv4 state database entry locking is not always used around reference count.
Reviewed by: Rick McNeal <rick.mcneal@nexenta.com>
Reviewed by: Rob Gittins <rob.gittins@nexenta.com>
NEX-19996 exi_id_get_next() calls should be WRITER locked
NEX-20014 NFS v4 state lock mutex exited before entered (on error path)
Reviewed by: Sanjay Nadkarni <sanjay.nadkarni@nexenta.com>
Reviewed by: Rick McNeal <rick.mcneal@nexenta.com>
Reviewed by: Joyce McIntosh <joyce.mcintosh@nexenta.com>
NEX-16452 NFS server in a zone state database needs to be per zone
Reviewed by: Gordon Ross <gordon.ross@nexenta.com>
Reviewed by: Sanjay Nadkarni <sanjay.nadkarni@nexenta.com>
NEX-15279 support NFS server in zone
NEX-15520 online NFS shares cause zoneadm halt to hang in nfs_export_zone_fini
Portions contributed by: Dan Kruchinin dan.kruchinin@nexenta.com
Portions contributed by: Stepan Zastupov stepan.zastupov@gmail.com
Reviewed by: Joyce McIntosh <joyce.mcintosh@nexenta.com>
Reviewed by: Rob Gittins <rob.gittins@nexenta.com>
Reviewed by: Gordon Ross <gordon.ross@nexenta.com>
   1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */

  21 /*
  22  * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
  23  * Copyright 2014 Nexenta Systems, Inc.  All rights reserved.
  24  */
  25 




  26 #include <sys/systm.h>
  27 #include <sys/kmem.h>
  28 #include <sys/cmn_err.h>
  29 #include <sys/atomic.h>
  30 #include <sys/clconf.h>
  31 #include <sys/cladm.h>
  32 #include <sys/flock.h>
  33 #include <nfs/export.h>
  34 #include <nfs/nfs.h>
  35 #include <nfs/nfs4.h>
  36 #include <nfs/nfssys.h>
  37 #include <nfs/lm.h>
  38 #include <sys/pathname.h>
  39 #include <sys/sdt.h>
  40 #include <sys/nvpair.h>
  41 
  42 extern u_longlong_t nfs4_srv_caller_id;
  43 
  44 extern time_t rfs4_start_time;
  45 extern uint_t nfs4_srv_vkey;
  46 
  47 stateid4 special0 = {
  48         0,
  49         { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }
  50 };
  51 
  52 stateid4 special1 = {
  53         0xffffffff,
  54         {
  55                 (char)0xff, (char)0xff, (char)0xff, (char)0xff,
  56                 (char)0xff, (char)0xff, (char)0xff, (char)0xff,
  57                 (char)0xff, (char)0xff, (char)0xff, (char)0xff
  58         }
  59 };
  60 
  61 
  62 #define ISSPECIAL(id)  (stateid4_cmp(id, &special0) || \
  63                         stateid4_cmp(id, &special1))
  64 
  65 /* For embedding the cluster nodeid into our clientid */
  66 #define CLUSTER_NODEID_SHIFT    24
  67 #define CLUSTER_MAX_NODEID      255
  68 
  69 #ifdef DEBUG
  70 int rfs4_debug;
  71 #endif
  72 
  73 static uint32_t rfs4_database_debug = 0x00;
  74 
  75 static void rfs4_ss_clid_write(rfs4_client_t *cp, char *leaf);

  76 static void rfs4_ss_clid_write_one(rfs4_client_t *cp, char *dir, char *leaf);
  77 static void rfs4_dss_clear_oldstate(rfs4_servinst_t *sip);
  78 static void rfs4_ss_chkclid_sip(rfs4_client_t *cp, rfs4_servinst_t *sip);
  79 
  80 /*
  81  * Couple of simple init/destroy functions for a general waiter
  82  */
  83 void
  84 rfs4_sw_init(rfs4_state_wait_t *swp)
  85 {
  86         mutex_init(swp->sw_cv_lock, NULL, MUTEX_DEFAULT, NULL);
  87         cv_init(swp->sw_cv, NULL, CV_DEFAULT, NULL);
  88         swp->sw_active = FALSE;
  89         swp->sw_wait_count = 0;
  90 }
  91 
  92 void
  93 rfs4_sw_destroy(rfs4_state_wait_t *swp)
  94 {
  95         mutex_destroy(swp->sw_cv_lock);


 104                 swp->sw_wait_count++;
 105                 cv_wait(swp->sw_cv, swp->sw_cv_lock);
 106                 swp->sw_wait_count--;
 107         }
 108         ASSERT(swp->sw_active == FALSE);
 109         swp->sw_active = TRUE;
 110         mutex_exit(swp->sw_cv_lock);
 111 }
 112 
 113 void
 114 rfs4_sw_exit(rfs4_state_wait_t *swp)
 115 {
 116         mutex_enter(swp->sw_cv_lock);
 117         ASSERT(swp->sw_active == TRUE);
 118         swp->sw_active = FALSE;
 119         if (swp->sw_wait_count != 0)
 120                 cv_broadcast(swp->sw_cv);
 121         mutex_exit(swp->sw_cv_lock);
 122 }
 123 
 124 /*
 125  * CPR callback id -- not related to v4 callbacks
 126  */
 127 static callb_id_t cpr_id = 0;
 128 
 129 static void
 130 deep_lock_copy(LOCK4res *dres, LOCK4res *sres)
 131 {
 132         lock_owner4 *slo = &sres->LOCK4res_u.denied.owner;
 133         lock_owner4 *dlo = &dres->LOCK4res_u.denied.owner;
 134 
 135         if (sres->status == NFS4ERR_DENIED) {
 136                 dlo->owner_val = kmem_alloc(slo->owner_len, KM_SLEEP);
 137                 bcopy(slo->owner_val, dlo->owner_val, slo->owner_len);
 138         }
 139 }
 140 





 141 static void
 142 deep_lock_free(LOCK4res *res)
 143 {
 144         lock_owner4 *lo = &res->LOCK4res_u.denied.owner;
 145 
 146         if (res->status == NFS4ERR_DENIED)
 147                 kmem_free(lo->owner_val, lo->owner_len);
 148 }
 149 
 150 static void
 151 deep_open_copy(OPEN4res *dres, OPEN4res *sres)
 152 {
 153         nfsace4 *sacep, *dacep;
 154 
 155         if (sres->status != NFS4_OK) {
 156                 return;
 157         }
 158 
 159         dres->attrset = sres->attrset;
 160 


 256 /*
 257  * This code is some what prototypical for now. Its purpose currently is to
 258  * implement the interfaces sufficiently to finish the higher protocol
 259  * elements. This will be replaced by a dynamically resizeable tables
 260  * backed by kmem_cache allocator. However synchronization is handled
 261  * correctly (I hope) and will not change by much.  The mutexes for
 262  * the hash buckets that can be used to create new instances of data
 263  * structures  might be good candidates to evolve into reader writer
 264  * locks. If it has to do a creation, it would be holding the
 265  * mutex across a kmem_alloc with KM_SLEEP specified.
 266  */
 267 
 268 #ifdef DEBUG
 269 #define TABSIZE 17
 270 #else
 271 #define TABSIZE 2047
 272 #endif
 273 
 274 #define ADDRHASH(key) ((unsigned long)(key) >> 3)
 275 
 276 /* Used to serialize create/destroy of rfs4_server_state database */
 277 kmutex_t        rfs4_state_lock;
 278 static rfs4_database_t *rfs4_server_state = NULL;
 279 
 280 /* Used to serialize lookups of clientids */
 281 static  krwlock_t       rfs4_findclient_lock;
 282 
 283 /*
 284  * For now this "table" is exposed so that the CPR callback
 285  * function can tromp through it..
 286  */
 287 rfs4_table_t *rfs4_client_tab;
 288 
 289 static rfs4_index_t *rfs4_clientid_idx;
 290 static rfs4_index_t *rfs4_nfsclnt_idx;
 291 static rfs4_table_t *rfs4_clntip_tab;
 292 static rfs4_index_t *rfs4_clntip_idx;
 293 static rfs4_table_t *rfs4_openowner_tab;
 294 static rfs4_index_t *rfs4_openowner_idx;
 295 static rfs4_table_t *rfs4_state_tab;
 296 static rfs4_index_t *rfs4_state_idx;
 297 static rfs4_index_t *rfs4_state_owner_file_idx;
 298 static rfs4_index_t *rfs4_state_file_idx;
 299 static rfs4_table_t *rfs4_lo_state_tab;
 300 static rfs4_index_t *rfs4_lo_state_idx;
 301 static rfs4_index_t *rfs4_lo_state_owner_idx;
 302 static rfs4_table_t *rfs4_lockowner_tab;
 303 static rfs4_index_t *rfs4_lockowner_idx;
 304 static rfs4_index_t *rfs4_lockowner_pid_idx;
 305 static rfs4_table_t *rfs4_file_tab;
 306 static rfs4_index_t *rfs4_file_idx;
 307 static rfs4_table_t *rfs4_deleg_state_tab;
 308 static rfs4_index_t *rfs4_deleg_idx;
 309 static rfs4_index_t *rfs4_deleg_state_idx;
 310 
 311 #define MAXTABSZ 1024*1024
 312 
 313 /* The values below are rfs4_lease_time units */
 314 
 315 #ifdef DEBUG
 316 #define CLIENT_CACHE_TIME 1
 317 #define OPENOWNER_CACHE_TIME 1
 318 #define STATE_CACHE_TIME 1
 319 #define LO_STATE_CACHE_TIME 1
 320 #define LOCKOWNER_CACHE_TIME 1
 321 #define FILE_CACHE_TIME 3
 322 #define DELEG_STATE_CACHE_TIME 1
 323 #else
 324 #define CLIENT_CACHE_TIME 10
 325 #define OPENOWNER_CACHE_TIME 5
 326 #define STATE_CACHE_TIME 1
 327 #define LO_STATE_CACHE_TIME 1
 328 #define LOCKOWNER_CACHE_TIME 3
 329 #define FILE_CACHE_TIME 40
 330 #define DELEG_STATE_CACHE_TIME 1
 331 #endif
 332 















 333 
 334 static time_t rfs4_client_cache_time = 0;
 335 static time_t rfs4_clntip_cache_time = 0;
 336 static time_t rfs4_openowner_cache_time = 0;
 337 static time_t rfs4_state_cache_time = 0;
 338 static time_t rfs4_lo_state_cache_time = 0;
 339 static time_t rfs4_lockowner_cache_time = 0;
 340 static time_t rfs4_file_cache_time = 0;
 341 static time_t rfs4_deleg_state_cache_time = 0;
 342 
 343 static bool_t rfs4_client_create(rfs4_entry_t, void *);
 344 static void rfs4_dss_remove_cpleaf(rfs4_client_t *);
 345 static void rfs4_dss_remove_leaf(rfs4_servinst_t *, char *, char *);
 346 static void rfs4_client_destroy(rfs4_entry_t);
 347 static bool_t rfs4_client_expiry(rfs4_entry_t);
 348 static uint32_t clientid_hash(void *);
 349 static bool_t clientid_compare(rfs4_entry_t, void *);
 350 static void *clientid_mkkey(rfs4_entry_t);
 351 static uint32_t nfsclnt_hash(void *);
 352 static bool_t nfsclnt_compare(rfs4_entry_t, void *);
 353 static void *nfsclnt_mkkey(rfs4_entry_t);
 354 static bool_t rfs4_clntip_expiry(rfs4_entry_t);
 355 static void rfs4_clntip_destroy(rfs4_entry_t);
 356 static bool_t rfs4_clntip_create(rfs4_entry_t, void *);
 357 static uint32_t clntip_hash(void *);
 358 static bool_t clntip_compare(rfs4_entry_t, void *);
 359 static void *clntip_mkkey(rfs4_entry_t);
 360 static bool_t rfs4_openowner_create(rfs4_entry_t, void *);
 361 static void rfs4_openowner_destroy(rfs4_entry_t);
 362 static bool_t rfs4_openowner_expiry(rfs4_entry_t);


 688                                         cl_ss->ss_pn = rfs4_ss_movestate(
 689                                             statedir, destdir, dep->d_name);
 690                                 } else {
 691                                         cl_ss->ss_pn = ss_pn;
 692                                 }
 693                                 insque(cl_ss, oldstate);
 694                         } else {
 695                                 rfs4_ss_pnfree(ss_pn);
 696                         }
 697                 }
 698         }
 699 
 700 out:
 701         (void) VOP_CLOSE(dvp, FREAD, 1, (offset_t)0, CRED(), NULL);
 702         VN_RELE(dvp);
 703         if (dirt)
 704                 kmem_free((caddr_t)dirt, RFS4_SS_DIRSIZE);
 705 }
 706 
 707 static void
 708 rfs4_ss_init(void)
 709 {
 710         int npaths = 1;
 711         char *default_dss_path = NFS4_DSS_VAR_DIR;
 712 
 713         /* read the default stable storage state */
 714         rfs4_dss_readstate(npaths, &default_dss_path);
 715 
 716         rfs4_ss_enabled = 1;
 717 }
 718 
 719 static void
 720 rfs4_ss_fini(void)
 721 {
 722         rfs4_servinst_t *sip;
 723 
 724         mutex_enter(&rfs4_servinst_lock);
 725         sip = rfs4_cur_servinst;
 726         while (sip != NULL) {
 727                 rfs4_dss_clear_oldstate(sip);
 728                 sip = sip->next;
 729         }
 730         mutex_exit(&rfs4_servinst_lock);
 731 }
 732 
 733 /*
 734  * Remove all oldstate files referenced by this servinst.
 735  */
 736 static void
 737 rfs4_dss_clear_oldstate(rfs4_servinst_t *sip)
 738 {
 739         rfs4_oldstate_t *os_head, *osp;
 740 
 741         rw_enter(&sip->oldstate_lock, RW_WRITER);
 742         os_head = sip->oldstate;
 743 
 744         if (os_head == NULL) {
 745                 rw_exit(&sip->oldstate_lock);
 746                 return;
 747         }
 748 
 749         /* skip dummy entry */
 750         osp = os_head->next;


 754 
 755                 rfs4_dss_remove_leaf(sip, NFS4_DSS_OLDSTATE_LEAF, leaf);
 756 
 757                 if (osp->cl_id4.id_val)
 758                         kmem_free(osp->cl_id4.id_val, osp->cl_id4.id_len);
 759                 rfs4_ss_pnfree(osp->ss_pn);
 760 
 761                 os_next = osp->next;
 762                 remque(osp);
 763                 kmem_free(osp, sizeof (rfs4_oldstate_t));
 764                 osp = os_next;
 765         }
 766 
 767         rw_exit(&sip->oldstate_lock);
 768 }
 769 
 770 /*
 771  * Form the state and oldstate paths, and read in the stable storage files.
 772  */
 773 void
 774 rfs4_dss_readstate(int npaths, char **paths)
 775 {
 776         int i;
 777         char *state, *oldstate;
 778 
 779         state = kmem_alloc(MAXPATHLEN, KM_SLEEP);
 780         oldstate = kmem_alloc(MAXPATHLEN, KM_SLEEP);
 781 
 782         for (i = 0; i < npaths; i++) {
 783                 char *path = paths[i];
 784 
 785                 (void) sprintf(state, "%s/%s", path, NFS4_DSS_STATE_LEAF);
 786                 (void) sprintf(oldstate, "%s/%s", path, NFS4_DSS_OLDSTATE_LEAF);
 787 
 788                 /*
 789                  * Populate the current server instance's oldstate list.
 790                  *
 791                  * 1. Read stable storage data from old state directory,
 792                  *    leaving its contents alone.
 793                  *
 794                  * 2. Read stable storage data from state directory,
 795                  *    and move the latter's contents to old state
 796                  *    directory.
 797                  */
 798                 rfs4_ss_oldstate(rfs4_cur_servinst->oldstate, oldstate, NULL);
 799                 rfs4_ss_oldstate(rfs4_cur_servinst->oldstate, state, oldstate);


 800         }
 801 
 802         kmem_free(state, MAXPATHLEN);
 803         kmem_free(oldstate, MAXPATHLEN);
 804 }
 805 
 806 
 807 /*
 808  * Check if we are still in grace and if the client can be
 809  * granted permission to perform reclaims.
 810  */
 811 void
 812 rfs4_ss_chkclid(rfs4_client_t *cp)
 813 {
 814         rfs4_servinst_t *sip;
 815 
 816         /*
 817          * It should be sufficient to check the oldstate data for just
 818          * this client's instance. However, since our per-instance
 819          * client grouping is solely temporal, HA-NFSv4 RG failover
 820          * might result in clients of the same RG being partitioned into
 821          * separate instances.
 822          *
 823          * Until the client grouping is improved, we must check the
 824          * oldstate data for all instances with an active grace period.
 825          *
 826          * This also serves as the mechanism to remove stale oldstate data.
 827          * The first time we check an instance after its grace period has
 828          * expired, the oldstate data should be cleared.
 829          *
 830          * Start at the current instance, and walk the list backwards
 831          * to the first.
 832          */
 833         mutex_enter(&rfs4_servinst_lock);
 834         for (sip = rfs4_cur_servinst; sip != NULL; sip = sip->prev) {
 835                 rfs4_ss_chkclid_sip(cp, sip);
 836 
 837                 /* if the above check found this client, we're done */
 838                 if (cp->rc_can_reclaim)
 839                         break;
 840         }
 841         mutex_exit(&rfs4_servinst_lock);
 842 }
 843 
 844 static void
 845 rfs4_ss_chkclid_sip(rfs4_client_t *cp, rfs4_servinst_t *sip)
 846 {
 847         rfs4_oldstate_t *osp, *os_head;
 848 
 849         /* short circuit everything if this server instance has no oldstate */
 850         rw_enter(&sip->oldstate_lock, RW_READER);
 851         os_head = sip->oldstate;
 852         rw_exit(&sip->oldstate_lock);
 853         if (os_head == NULL)
 854                 return;
 855 
 856         /*
 857          * If this server instance is no longer in a grace period then
 858          * the client won't be able to reclaim. No further need for this
 859          * instance's oldstate data, so it can be cleared.
 860          */
 861         if (!rfs4_servinst_in_grace(sip))


 871         while (osp != os_head) {
 872                 if (osp->cl_id4.id_len == cp->rc_nfs_client.id_len) {
 873                         if (bcmp(osp->cl_id4.id_val, cp->rc_nfs_client.id_val,
 874                             osp->cl_id4.id_len) == 0) {
 875                                 cp->rc_can_reclaim = 1;
 876                                 break;
 877                         }
 878                 }
 879                 osp = osp->next;
 880         }
 881 
 882         rw_exit(&sip->oldstate_lock);
 883 }
 884 
 885 /*
 886  * Place client information into stable storage: 1/3.
 887  * First, generate the leaf filename, from the client's IP address and
 888  * the server-generated short-hand clientid.
 889  */
 890 void
 891 rfs4_ss_clid(rfs4_client_t *cp)
 892 {
 893         const char *kinet_ntop6(uchar_t *, char *, size_t);
 894         char leaf[MAXNAMELEN], buf[INET6_ADDRSTRLEN];
 895         struct sockaddr *ca;
 896         uchar_t *b;
 897 
 898         if (rfs4_ss_enabled == 0) {
 899                 return;
 900         }
 901 
 902         buf[0] = 0;
 903 
 904         ca = (struct sockaddr *)&cp->rc_addr;
 905 
 906         /*
 907          * Convert the caller's IP address to a dotted string
 908          */
 909         if (ca->sa_family == AF_INET) {
 910                 b = (uchar_t *)&((struct sockaddr_in *)ca)->sin_addr;
 911                 (void) sprintf(buf, "%03d.%03d.%03d.%03d", b[0] & 0xFF,
 912                     b[1] & 0xFF, b[2] & 0xFF, b[3] & 0xFF);
 913         } else if (ca->sa_family == AF_INET6) {
 914                 struct sockaddr_in6 *sin6;
 915 
 916                 sin6 = (struct sockaddr_in6 *)ca;
 917                 (void) kinet_ntop6((uchar_t *)&sin6->sin6_addr,
 918                     buf, INET6_ADDRSTRLEN);
 919         }
 920 
 921         (void) snprintf(leaf, MAXNAMELEN, "%s-%llx", buf,
 922             (longlong_t)cp->rc_clientid);
 923         rfs4_ss_clid_write(cp, leaf);
 924 }
 925 
 926 /*
 927  * Place client information into stable storage: 2/3.
 928  * DSS: distributed stable storage: the file may need to be written to
 929  * multiple directories.
 930  */
 931 static void
 932 rfs4_ss_clid_write(rfs4_client_t *cp, char *leaf)
 933 {
 934         rfs4_servinst_t *sip;
 935 
 936         /*
 937          * It should be sufficient to write the leaf file to (all) DSS paths
 938          * associated with just this client's instance. However, since our
 939          * per-instance client grouping is solely temporal, HA-NFSv4 RG
 940          * failover might result in us losing DSS data.
 941          *
 942          * Until the client grouping is improved, we must write the DSS data
 943          * to all instances' paths. Start at the current instance, and
 944          * walk the list backwards to the first.
 945          */
 946         mutex_enter(&rfs4_servinst_lock);
 947         for (sip = rfs4_cur_servinst; sip != NULL; sip = sip->prev) {
 948                 int i, npaths = sip->dss_npaths;
 949 
 950                 /* write the leaf file to all DSS paths */
 951                 for (i = 0; i < npaths; i++) {
 952                         rfs4_dss_path_t *dss_path = sip->dss_paths[i];
 953 
 954                         /* HA-NFSv4 path might have been failed-away from us */
 955                         if (dss_path == NULL)
 956                                 continue;
 957 
 958                         rfs4_ss_clid_write_one(cp, dss_path->path, leaf);
 959                 }
 960         }
 961         mutex_exit(&rfs4_servinst_lock);
 962 }
 963 
 964 /*
 965  * Place client information into stable storage: 3/3.
 966  * Write the stable storage data to the requested file.
 967  */
 968 static void
 969 rfs4_ss_clid_write_one(rfs4_client_t *cp, char *dss_path, char *leaf)
 970 {
 971         int ioflag;
 972         int file_vers = NFS4_SS_VERSION;
 973         size_t dirlen;
 974         struct uio uio;
 975         struct iovec iov[4];
 976         char *dir;
 977         rfs4_ss_pn_t *ss_pn;
 978         vnode_t *vp;
 979         nfs_client_id4 *cl_id4 = &(cp->rc_nfs_client);
 980 
 981         /* allow 2 extra bytes for '/' & NUL */


1134                  * for forced expiration
1135                  */
1136                 if (ent_sin->sin_addr.s_addr == clr_in.s_addr) {
1137                         cp->rc_forced_expire = 1;
1138                 }
1139                 break;
1140 
1141         default:
1142                 /* force this assert to fail */
1143                 ASSERT(clr->addr_type != clr->addr_type);
1144         }
1145 }
1146 
1147 /*
1148  * This is called from nfssys() in order to clear server state
1149  * for the specified client IP Address.
1150  */
1151 void
1152 rfs4_clear_client_state(struct nfs4clrst_args *clr)
1153 {
1154         (void) rfs4_dbe_walk(rfs4_client_tab, rfs4_client_scrub, clr);


1155 }
1156 
1157 /*
1158  * Used to initialize the NFSv4 server's state or database.  All of
1159  * the tables are created and timers are set. Only called when NFSv4
1160  * service is provided.
1161  */
1162 void
1163 rfs4_state_init()
1164 {
1165         int start_grace;
1166         extern boolean_t rfs4_cpr_callb(void *, int);
1167         char *dss_path = NFS4_DSS_VAR_DIR;
1168         time_t start_time;




1169 
1170         mutex_enter(&rfs4_state_lock);

























1171 












1172         /*
1173          * If the server state database has already been initialized,
1174          * skip it
1175          */
1176         if (rfs4_server_state != NULL) {
1177                 mutex_exit(&rfs4_state_lock);
1178                 return;






1179         }
1180 
1181         rw_init(&rfs4_findclient_lock, NULL, RW_DEFAULT, NULL);







1182 



















1183         /*
1184          * Set the boot time.  If the server
1185          * has been restarted quickly and has had the opportunity to
1186          * service clients, then the start_time needs to be bumped
1187          * regardless.  A small window but it exists...
1188          */
1189         start_time = gethrestime_sec();
1190         if (rfs4_start_time < start_time)
1191                 rfs4_start_time = start_time;
1192         else
1193                 rfs4_start_time++;
1194 
1195         /* DSS: distributed stable storage: initialise served paths list */
1196         rfs4_dss_pathlist = NULL;
1197 
1198         /*
1199          * Create the first server instance, or a new one if the server has
1200          * been restarted; see above comments on rfs4_start_time. Don't
1201          * start its grace period; that will be done later, to maximise the
1202          * clients' recovery window.
1203          */
1204         start_grace = 0;
1205         rfs4_servinst_create(start_grace, 1, &dss_path);
1206 
1207         /* reset the "first NFSv4 request" status */
1208         rfs4_seen_first_compound = 0;
1209 


1210         /*
1211          * Add a CPR callback so that we can update client
1212          * access times to extend the lease after a suspend
1213          * and resume (using the same class as rpcmod/connmgr)
1214          */
1215         cpr_id = callb_add(rfs4_cpr_callb, 0, CB_CL_CPR_RPC, "rfs4");



1216 


1217         /* set the various cache timers for table creation */
1218         if (rfs4_client_cache_time == 0)
1219                 rfs4_client_cache_time = CLIENT_CACHE_TIME;
1220         if (rfs4_openowner_cache_time == 0)
1221                 rfs4_openowner_cache_time = OPENOWNER_CACHE_TIME;
1222         if (rfs4_state_cache_time == 0)
1223                 rfs4_state_cache_time = STATE_CACHE_TIME;
1224         if (rfs4_lo_state_cache_time == 0)
1225                 rfs4_lo_state_cache_time = LO_STATE_CACHE_TIME;
1226         if (rfs4_lockowner_cache_time == 0)
1227                 rfs4_lockowner_cache_time = LOCKOWNER_CACHE_TIME;
1228         if (rfs4_file_cache_time == 0)
1229                 rfs4_file_cache_time = FILE_CACHE_TIME;
1230         if (rfs4_deleg_state_cache_time == 0)
1231                 rfs4_deleg_state_cache_time = DELEG_STATE_CACHE_TIME;
1232 
1233         /* Create the overall database to hold all server state */
1234         rfs4_server_state = rfs4_database_create(rfs4_database_debug);
1235 
1236         /* Now create the individual tables */
1237         rfs4_client_cache_time *= rfs4_lease_time;
1238         rfs4_client_tab = rfs4_table_create(rfs4_server_state,
1239             "Client",
1240             rfs4_client_cache_time,
1241             2,
1242             rfs4_client_create,
1243             rfs4_client_destroy,
1244             rfs4_client_expiry,
1245             sizeof (rfs4_client_t),
1246             TABSIZE,
1247             MAXTABSZ/8, 100);
1248         rfs4_nfsclnt_idx = rfs4_index_create(rfs4_client_tab,
1249             "nfs_client_id4", nfsclnt_hash,
1250             nfsclnt_compare, nfsclnt_mkkey,
1251             TRUE);
1252         rfs4_clientid_idx = rfs4_index_create(rfs4_client_tab,
1253             "client_id", clientid_hash,
1254             clientid_compare, clientid_mkkey,
1255             FALSE);
1256 
1257         rfs4_clntip_cache_time = 86400 * 365;   /* about a year */
1258         rfs4_clntip_tab = rfs4_table_create(rfs4_server_state,
1259             "ClntIP",
1260             rfs4_clntip_cache_time,
1261             1,
1262             rfs4_clntip_create,
1263             rfs4_clntip_destroy,
1264             rfs4_clntip_expiry,
1265             sizeof (rfs4_clntip_t),
1266             TABSIZE,
1267             MAXTABSZ, 100);
1268         rfs4_clntip_idx = rfs4_index_create(rfs4_clntip_tab,
1269             "client_ip", clntip_hash,
1270             clntip_compare, clntip_mkkey,
1271             TRUE);
1272 
1273         rfs4_openowner_cache_time *= rfs4_lease_time;
1274         rfs4_openowner_tab = rfs4_table_create(rfs4_server_state,
1275             "OpenOwner",
1276             rfs4_openowner_cache_time,
1277             1,
1278             rfs4_openowner_create,
1279             rfs4_openowner_destroy,
1280             rfs4_openowner_expiry,
1281             sizeof (rfs4_openowner_t),
1282             TABSIZE,
1283             MAXTABSZ, 100);
1284         rfs4_openowner_idx = rfs4_index_create(rfs4_openowner_tab,
1285             "open_owner4", openowner_hash,
1286             openowner_compare,
1287             openowner_mkkey, TRUE);
1288 
1289         rfs4_state_cache_time *= rfs4_lease_time;
1290         rfs4_state_tab = rfs4_table_create(rfs4_server_state,
1291             "OpenStateID",
1292             rfs4_state_cache_time,
1293             3,
1294             rfs4_state_create,
1295             rfs4_state_destroy,
1296             rfs4_state_expiry,
1297             sizeof (rfs4_state_t),
1298             TABSIZE,
1299             MAXTABSZ, 100);
1300 
1301         rfs4_state_owner_file_idx = rfs4_index_create(rfs4_state_tab,

1302             "Openowner-File",
1303             state_owner_file_hash,
1304             state_owner_file_compare,
1305             state_owner_file_mkkey, TRUE);
1306 
1307         rfs4_state_idx = rfs4_index_create(rfs4_state_tab,
1308             "State-id", state_hash,
1309             state_compare, state_mkkey, FALSE);
1310 
1311         rfs4_state_file_idx = rfs4_index_create(rfs4_state_tab,
1312             "File", state_file_hash,
1313             state_file_compare, state_file_mkkey,
1314             FALSE);
1315 
1316         rfs4_lo_state_cache_time *= rfs4_lease_time;
1317         rfs4_lo_state_tab = rfs4_table_create(rfs4_server_state,
1318             "LockStateID",
1319             rfs4_lo_state_cache_time,
1320             2,
1321             rfs4_lo_state_create,
1322             rfs4_lo_state_destroy,
1323             rfs4_lo_state_expiry,
1324             sizeof (rfs4_lo_state_t),
1325             TABSIZE,
1326             MAXTABSZ, 100);
1327 
1328         rfs4_lo_state_owner_idx = rfs4_index_create(rfs4_lo_state_tab,

1329             "lockownerxstate",
1330             lo_state_lo_hash,
1331             lo_state_lo_compare,
1332             lo_state_lo_mkkey, TRUE);
1333 
1334         rfs4_lo_state_idx = rfs4_index_create(rfs4_lo_state_tab,
1335             "State-id",
1336             lo_state_hash, lo_state_compare,
1337             lo_state_mkkey, FALSE);
1338 
1339         rfs4_lockowner_cache_time *= rfs4_lease_time;
1340 
1341         rfs4_lockowner_tab = rfs4_table_create(rfs4_server_state,
1342             "Lockowner",
1343             rfs4_lockowner_cache_time,
1344             2,
1345             rfs4_lockowner_create,
1346             rfs4_lockowner_destroy,
1347             rfs4_lockowner_expiry,
1348             sizeof (rfs4_lockowner_t),
1349             TABSIZE,
1350             MAXTABSZ, 100);
1351 
1352         rfs4_lockowner_idx = rfs4_index_create(rfs4_lockowner_tab,
1353             "lock_owner4", lockowner_hash,
1354             lockowner_compare,
1355             lockowner_mkkey, TRUE);
1356 
1357         rfs4_lockowner_pid_idx = rfs4_index_create(rfs4_lockowner_tab,

1358             "pid", pid_hash,
1359             pid_compare, pid_mkkey,
1360             FALSE);
1361 
1362         rfs4_file_cache_time *= rfs4_lease_time;
1363         rfs4_file_tab = rfs4_table_create(rfs4_server_state,
1364             "File",
1365             rfs4_file_cache_time,
1366             1,
1367             rfs4_file_create,
1368             rfs4_file_destroy,
1369             NULL,
1370             sizeof (rfs4_file_t),
1371             TABSIZE,
1372             MAXTABSZ, -1);
1373 
1374         rfs4_file_idx = rfs4_index_create(rfs4_file_tab,
1375             "Filehandle", file_hash,
1376             file_compare, file_mkkey, TRUE);
1377 
1378         rfs4_deleg_state_cache_time *= rfs4_lease_time;
1379         rfs4_deleg_state_tab = rfs4_table_create(rfs4_server_state,

1380             "DelegStateID",
1381             rfs4_deleg_state_cache_time,
1382             2,
1383             rfs4_deleg_state_create,
1384             rfs4_deleg_state_destroy,
1385             rfs4_deleg_state_expiry,
1386             sizeof (rfs4_deleg_state_t),
1387             TABSIZE,
1388             MAXTABSZ, 100);
1389         rfs4_deleg_idx = rfs4_index_create(rfs4_deleg_state_tab,
1390             "DelegByFileClient",
1391             deleg_hash,
1392             deleg_compare,
1393             deleg_mkkey, TRUE);
1394 
1395         rfs4_deleg_state_idx = rfs4_index_create(rfs4_deleg_state_tab,

1396             "DelegState",
1397             deleg_state_hash,
1398             deleg_state_compare,
1399             deleg_state_mkkey, FALSE);
1400 


1401         /*
1402          * Init the stable storage.
1403          */
1404         rfs4_ss_init();
1405 
1406         rfs4_client_clrst = rfs4_clear_client_state;
1407 
1408         mutex_exit(&rfs4_state_lock);
1409 }
1410 
1411 
1412 /*
1413  * Used at server shutdown to cleanup all of the NFSv4 server's structures
1414  * and other state.
1415  */
1416 void
1417 rfs4_state_fini()
1418 {
1419         rfs4_database_t *dbp;


1420 
1421         mutex_enter(&rfs4_state_lock);
1422 
1423         if (rfs4_server_state == NULL) {
1424                 mutex_exit(&rfs4_state_lock);


1425                 return;
1426         }
1427 
1428         rfs4_client_clrst = NULL;

1429 
1430         rfs4_set_deleg_policy(SRV_NEVER_DELEGATE);
1431         dbp = rfs4_server_state;
1432         rfs4_server_state = NULL;
1433 
1434         /*
1435          * Cleanup the CPR callback.
1436          */
1437         if (cpr_id)
1438                 (void) callb_delete(cpr_id);
1439 
1440         rw_destroy(&rfs4_findclient_lock);
1441 
1442         /* First stop all of the reaper threads in the database */
1443         rfs4_database_shutdown(dbp);
1444         /* clean up any dangling stable storage structures */
1445         rfs4_ss_fini();
1446         /* Now actually destroy/release the database and its tables */
1447         rfs4_database_destroy(dbp);



1448 
1449         /* Reset the cache timers for next time */
1450         rfs4_client_cache_time = 0;
1451         rfs4_openowner_cache_time = 0;
1452         rfs4_state_cache_time = 0;
1453         rfs4_lo_state_cache_time = 0;
1454         rfs4_lockowner_cache_time = 0;
1455         rfs4_file_cache_time = 0;
1456         rfs4_deleg_state_cache_time = 0;
1457 
1458         mutex_exit(&rfs4_state_lock);
1459 
1460         /* destroy server instances and current instance ptr */
1461         rfs4_servinst_destroy_all();
1462 
1463         /* reset the "first NFSv4 request" status */
1464         rfs4_seen_first_compound = 0;
1465 
1466         /* DSS: distributed stable storage */
1467         nvlist_free(rfs4_dss_oldpaths);
1468         nvlist_free(rfs4_dss_paths);
1469         rfs4_dss_paths = rfs4_dss_oldpaths = NULL;
1470 }
1471 
1472 typedef union {
1473         struct {
1474                 uint32_t start_time;
1475                 uint32_t c_id;
1476         } impl_id;
1477         clientid4 id4;
1478 } cid;
1479 
1480 static int foreign_stateid(stateid_t *id);
1481 static int foreign_clientid(cid *cidp);
1482 static void embed_nodeid(cid *cidp);
1483 
1484 typedef union {
1485         struct {
1486                 uint32_t c_id;
1487                 uint32_t gen_num;
1488         } cv_impl;
1489         verifier4       confirm_verf;


1564          * If the sysadmin has used clear_locks for this
1565          * entry then forced_expire will be set and we
1566          * want this entry to be reaped. Or the entry
1567          * has exceeded its lease period.
1568          */
1569         cp_expired = (cp->rc_forced_expire ||
1570             (gethrestime_sec() - cp->rc_last_access
1571             > rfs4_lease_time));
1572 
1573         if (!cp->rc_ss_remove && cp_expired)
1574                 cp->rc_ss_remove = 1;
1575         return (cp_expired);
1576 }
1577 
1578 /*
1579  * Remove the leaf file from all distributed stable storage paths.
1580  */
1581 static void
1582 rfs4_dss_remove_cpleaf(rfs4_client_t *cp)
1583 {

1584         rfs4_servinst_t *sip;
1585         char *leaf = cp->rc_ss_pn->leaf;
1586 
1587         /*
1588          * since the state files are written to all DSS
1589          * paths we must remove this leaf file instance
1590          * from all server instances.
1591          */
1592 
1593         mutex_enter(&rfs4_servinst_lock);
1594         for (sip = rfs4_cur_servinst; sip != NULL; sip = sip->prev) {

1595                 /* remove the leaf file associated with this server instance */
1596                 rfs4_dss_remove_leaf(sip, NFS4_DSS_STATE_LEAF, leaf);
1597         }
1598         mutex_exit(&rfs4_servinst_lock);
1599 }
1600 
1601 static void
1602 rfs4_dss_remove_leaf(rfs4_servinst_t *sip, char *dir_leaf, char *leaf)
1603 {
1604         int i, npaths = sip->dss_npaths;
1605 
1606         for (i = 0; i < npaths; i++) {
1607                 rfs4_dss_path_t *dss_path = sip->dss_paths[i];
1608                 char *path, *dir;
1609                 size_t pathlen;
1610 
1611                 /* the HA-NFSv4 path might have been failed-over away from us */
1612                 if (dss_path == NULL)
1613                         continue;
1614 
1615                 dir = dss_path->path;
1616 
1617                 /* allow 3 extra bytes for two '/' & a NUL */
1618                 pathlen = strlen(dir) + strlen(dir_leaf) + strlen(leaf) + 3;


1646                 if (cp->rc_ss_remove)
1647                         rfs4_dss_remove_cpleaf(cp);
1648                 rfs4_ss_pnfree(cp->rc_ss_pn);
1649         }
1650 
1651         /* Free the client supplied client id */
1652         kmem_free(cp->rc_nfs_client.id_val, cp->rc_nfs_client.id_len);
1653 
1654         if (cp->rc_sysidt != LM_NOSYSID)
1655                 lm_free_sysidt(cp->rc_sysidt);
1656 }
1657 
1658 static bool_t
1659 rfs4_client_create(rfs4_entry_t u_entry, void *arg)
1660 {
1661         rfs4_client_t *cp = (rfs4_client_t *)u_entry;
1662         nfs_client_id4 *client = (nfs_client_id4 *)arg;
1663         struct sockaddr *ca;
1664         cid *cidp;
1665         scid_confirm_verf *scvp;

1666 


1667         /* Get a clientid to give to the client */
1668         cidp = (cid *)&cp->rc_clientid;
1669         cidp->impl_id.start_time = rfs4_start_time;
1670         cidp->impl_id.c_id = (uint32_t)rfs4_dbe_getid(cp->rc_dbe);
1671 
1672         /* If we are booted as a cluster node, embed our nodeid */
1673         if (cluster_bootflags & CLUSTER_BOOTED)
1674                 embed_nodeid(cidp);
1675 
1676         /* Allocate and copy client's client id value */
1677         cp->rc_nfs_client.id_val = kmem_alloc(client->id_len, KM_SLEEP);
1678         cp->rc_nfs_client.id_len = client->id_len;
1679         bcopy(client->id_val, cp->rc_nfs_client.id_val, client->id_len);
1680         cp->rc_nfs_client.verifier = client->verifier;
1681 
1682         /* Copy client's IP address */
1683         ca = client->cl_addr;
1684         if (ca->sa_family == AF_INET)
1685                 bcopy(ca, &cp->rc_addr, sizeof (struct sockaddr_in));
1686         else if (ca->sa_family == AF_INET6)
1687                 bcopy(ca, &cp->rc_addr, sizeof (struct sockaddr_in6));
1688         cp->rc_nfs_client.cl_addr = (struct sockaddr *)&cp->rc_addr;
1689 


1707 
1708         cp->rc_cr_set = NULL;
1709 
1710         cp->rc_sysidt = LM_NOSYSID;
1711 
1712         list_create(&cp->rc_openownerlist, sizeof (rfs4_openowner_t),
1713             offsetof(rfs4_openowner_t, ro_node));
1714 
1715         /* set up the callback control structure */
1716         cp->rc_cbinfo.cb_state = CB_UNINIT;
1717         mutex_init(cp->rc_cbinfo.cb_lock, NULL, MUTEX_DEFAULT, NULL);
1718         cv_init(cp->rc_cbinfo.cb_cv, NULL, CV_DEFAULT, NULL);
1719         cv_init(cp->rc_cbinfo.cb_cv_nullcaller, NULL, CV_DEFAULT, NULL);
1720 
1721         /*
1722          * Associate the client_t with the current server instance.
1723          * The hold is solely to satisfy the calling requirement of
1724          * rfs4_servinst_assign(). In this case it's not strictly necessary.
1725          */
1726         rfs4_dbe_hold(cp->rc_dbe);
1727         rfs4_servinst_assign(cp, rfs4_cur_servinst);
1728         rfs4_dbe_rele(cp->rc_dbe);
1729 
1730         return (TRUE);
1731 }
1732 
1733 /*
1734  * Caller wants to generate/update the setclientid_confirm verifier
1735  * associated with a client.  This is done during the SETCLIENTID
1736  * processing.
1737  */
1738 void
1739 rfs4_client_scv_next(rfs4_client_t *cp)
1740 {
1741         scid_confirm_verf *scvp;
1742 
1743         /* Init the value for the SETCLIENTID_CONFIRM verifier */
1744         scvp = (scid_confirm_verf *)&cp->rc_confirm_verf;
1745         scvp->cv_impl.gen_num++;
1746 }
1747 
1748 void
1749 rfs4_client_rele(rfs4_client_t *cp)
1750 {
1751         rfs4_dbe_rele(cp->rc_dbe);
1752 }
1753 
1754 rfs4_client_t *
1755 rfs4_findclient(nfs_client_id4 *client, bool_t *create, rfs4_client_t *oldcp)
1756 {
1757         rfs4_client_t *cp;


1758 
1759 
1760         if (oldcp) {
1761                 rw_enter(&rfs4_findclient_lock, RW_WRITER);
1762                 rfs4_dbe_hide(oldcp->rc_dbe);
1763         } else {
1764                 rw_enter(&rfs4_findclient_lock, RW_READER);
1765         }
1766 
1767         cp = (rfs4_client_t *)rfs4_dbsearch(rfs4_nfsclnt_idx, client,
1768             create, (void *)client, RFS4_DBS_VALID);
1769 
1770         if (oldcp)
1771                 rfs4_dbe_unhide(oldcp->rc_dbe);
1772 
1773         rw_exit(&rfs4_findclient_lock);
1774 
1775         return (cp);
1776 }
1777 
1778 rfs4_client_t *
1779 rfs4_findclient_by_id(clientid4 clientid, bool_t find_unconfirmed)
1780 {
1781         rfs4_client_t *cp;
1782         bool_t create = FALSE;
1783         cid *cidp = (cid *)&clientid;

1784 
1785         /* If we're a cluster and the nodeid isn't right, short-circuit */
1786         if (cluster_bootflags & CLUSTER_BOOTED && foreign_clientid(cidp))
1787                 return (NULL);
1788 
1789         rw_enter(&rfs4_findclient_lock, RW_READER);
1790 
1791         cp = (rfs4_client_t *)rfs4_dbsearch(rfs4_clientid_idx, &clientid,
1792             &create, NULL, RFS4_DBS_VALID);
1793 
1794         rw_exit(&rfs4_findclient_lock);
1795 
1796         if (cp && cp->rc_need_confirm && find_unconfirmed == FALSE) {
1797                 rfs4_client_rele(cp);
1798                 return (NULL);
1799         } else {
1800                 return (cp);
1801         }
1802 }
1803 
1804 static uint32_t
1805 clntip_hash(void *key)
1806 {
1807         struct sockaddr *addr = key;
1808         int i, len = 0;
1809         uint32_t hash = 0;
1810         char *ptr;
1811 
1812         if (addr->sa_family == AF_INET) {
1813                 struct sockaddr_in *a = (struct sockaddr_in *)addr;
1814                 len = sizeof (struct in_addr);


1882 {
1883         rfs4_clntip_t *cp = (rfs4_clntip_t *)u_entry;
1884         struct sockaddr *ca = (struct sockaddr *)arg;
1885 
1886         /* Copy client's IP address */
1887         if (ca->sa_family == AF_INET)
1888                 bcopy(ca, &cp->ri_addr, sizeof (struct sockaddr_in));
1889         else if (ca->sa_family == AF_INET6)
1890                 bcopy(ca, &cp->ri_addr, sizeof (struct sockaddr_in6));
1891         else
1892                 return (FALSE);
1893         cp->ri_no_referrals = 1;
1894 
1895         return (TRUE);
1896 }
1897 
1898 rfs4_clntip_t *
1899 rfs4_find_clntip(struct sockaddr *addr, bool_t *create)
1900 {
1901         rfs4_clntip_t *cp;

1902 
1903         rw_enter(&rfs4_findclient_lock, RW_READER);
1904 
1905         cp = (rfs4_clntip_t *)rfs4_dbsearch(rfs4_clntip_idx, addr,


1906             create, addr, RFS4_DBS_VALID);
1907 
1908         rw_exit(&rfs4_findclient_lock);
1909 
1910         return (cp);
1911 }
1912 
1913 void
1914 rfs4_invalidate_clntip(struct sockaddr *addr)
1915 {
1916         rfs4_clntip_t *cp;
1917         bool_t create = FALSE;

1918 
1919         rw_enter(&rfs4_findclient_lock, RW_READER);
1920 
1921         cp = (rfs4_clntip_t *)rfs4_dbsearch(rfs4_clntip_idx, addr,
1922             &create, NULL, RFS4_DBS_VALID);
1923         if (cp == NULL) {
1924                 rw_exit(&rfs4_findclient_lock);
1925                 return;
1926         }
1927         rfs4_dbe_invalidate(cp->ri_dbe);
1928         rfs4_dbe_rele(cp->ri_dbe);
1929 
1930         rw_exit(&rfs4_findclient_lock);
1931 }
1932 
1933 bool_t
1934 rfs4_lease_expired(rfs4_client_t *cp)
1935 {
1936         bool_t rc;
1937 
1938         rfs4_dbe_lock(cp->rc_dbe);
1939 
1940         /*
1941          * If the admin has executed clear_locks for this
1942          * client id, force expire will be set, so no need
1943          * to calculate anything because it's "outa here".
1944          */
1945         if (cp->rc_forced_expire) {
1946                 rc = TRUE;
1947         } else {
1948                 rc = (gethrestime_sec() - cp->rc_last_access > rfs4_lease_time);
1949         }
1950 


2058 
2059         /* Free the lock owner id */
2060         kmem_free(oo->ro_owner.owner_val, oo->ro_owner.owner_len);
2061 }
2062 
2063 void
2064 rfs4_openowner_rele(rfs4_openowner_t *oo)
2065 {
2066         rfs4_dbe_rele(oo->ro_dbe);
2067 }
2068 
2069 static bool_t
2070 rfs4_openowner_create(rfs4_entry_t u_entry, void *arg)
2071 {
2072         rfs4_openowner_t *oo = (rfs4_openowner_t *)u_entry;
2073         rfs4_openowner_t *argp = (rfs4_openowner_t *)arg;
2074         open_owner4 *openowner = &argp->ro_owner;
2075         seqid4 seqid = argp->ro_open_seqid;
2076         rfs4_client_t *cp;
2077         bool_t create = FALSE;

2078 
2079         rw_enter(&rfs4_findclient_lock, RW_READER);
2080 
2081         cp = (rfs4_client_t *)rfs4_dbsearch(rfs4_clientid_idx,
2082             &openowner->clientid,
2083             &create, NULL, RFS4_DBS_VALID);
2084 
2085         rw_exit(&rfs4_findclient_lock);
2086 
2087         if (cp == NULL)
2088                 return (FALSE);
2089 
2090         oo->ro_reply_fh.nfs_fh4_len = 0;
2091         oo->ro_reply_fh.nfs_fh4_val = NULL;
2092 
2093         oo->ro_owner.clientid = openowner->clientid;
2094         oo->ro_owner.owner_val =
2095             kmem_alloc(openowner->owner_len, KM_SLEEP);
2096 
2097         bcopy(openowner->owner_val,
2098             oo->ro_owner.owner_val, openowner->owner_len);
2099 
2100         oo->ro_owner.owner_len = openowner->owner_len;
2101 
2102         oo->ro_need_confirm = TRUE;
2103 
2104         rfs4_sw_init(&oo->ro_sw);
2105 


2107         bzero(&oo->ro_reply, sizeof (nfs_resop4));
2108         oo->ro_client = cp;
2109         oo->ro_cr_set = NULL;
2110 
2111         list_create(&oo->ro_statelist, sizeof (rfs4_state_t),
2112             offsetof(rfs4_state_t, rs_node));
2113 
2114         /* Insert openowner into client's open owner list */
2115         rfs4_dbe_lock(cp->rc_dbe);
2116         list_insert_tail(&cp->rc_openownerlist, oo);
2117         rfs4_dbe_unlock(cp->rc_dbe);
2118 
2119         return (TRUE);
2120 }
2121 
2122 rfs4_openowner_t *
2123 rfs4_findopenowner(open_owner4 *openowner, bool_t *create, seqid4 seqid)
2124 {
2125         rfs4_openowner_t *oo;
2126         rfs4_openowner_t arg;

2127 
2128         arg.ro_owner = *openowner;
2129         arg.ro_open_seqid = seqid;
2130         oo = (rfs4_openowner_t *)rfs4_dbsearch(rfs4_openowner_idx, openowner,

2131             create, &arg, RFS4_DBS_VALID);
2132 
2133         return (oo);
2134 }
2135 
2136 void
2137 rfs4_update_open_sequence(rfs4_openowner_t *oo)
2138 {
2139 
2140         rfs4_dbe_lock(oo->ro_dbe);
2141 
2142         oo->ro_open_seqid++;
2143 
2144         rfs4_dbe_unlock(oo->ro_dbe);
2145 }
2146 
2147 void
2148 rfs4_update_open_resp(rfs4_openowner_t *oo, nfs_resop4 *resp, nfs_fh4 *fh)
2149 {
2150 


2253 }
2254 
2255 /* ARGSUSED */
2256 static bool_t
2257 rfs4_lockowner_expiry(rfs4_entry_t u_entry)
2258 {
2259         /*
2260          * Since expiry is called with no other references on
2261          * this struct, go ahead and have it removed.
2262          */
2263         return (TRUE);
2264 }
2265 
2266 static bool_t
2267 rfs4_lockowner_create(rfs4_entry_t u_entry, void *arg)
2268 {
2269         rfs4_lockowner_t *lo = (rfs4_lockowner_t *)u_entry;
2270         lock_owner4 *lockowner = (lock_owner4 *)arg;
2271         rfs4_client_t *cp;
2272         bool_t create = FALSE;

2273 
2274         rw_enter(&rfs4_findclient_lock, RW_READER);
2275 
2276         cp = (rfs4_client_t *)rfs4_dbsearch(rfs4_clientid_idx,
2277             &lockowner->clientid,
2278             &create, NULL, RFS4_DBS_VALID);
2279 
2280         rw_exit(&rfs4_findclient_lock);
2281 
2282         if (cp == NULL)
2283                 return (FALSE);
2284 
2285         /* Reference client */
2286         lo->rl_client = cp;
2287         lo->rl_owner.clientid = lockowner->clientid;
2288         lo->rl_owner.owner_val = kmem_alloc(lockowner->owner_len, KM_SLEEP);
2289         bcopy(lockowner->owner_val, lo->rl_owner.owner_val,
2290             lockowner->owner_len);
2291         lo->rl_owner.owner_len = lockowner->owner_len;
2292         lo->rl_pid = rfs4_dbe_getid(lo->rl_dbe);
2293 
2294         return (TRUE);
2295 }
2296 
2297 rfs4_lockowner_t *
2298 rfs4_findlockowner(lock_owner4 *lockowner, bool_t *create)
2299 {
2300         rfs4_lockowner_t *lo;

2301 
2302         lo = (rfs4_lockowner_t *)rfs4_dbsearch(rfs4_lockowner_idx, lockowner,

2303             create, lockowner, RFS4_DBS_VALID);
2304 
2305         return (lo);
2306 }
2307 
2308 rfs4_lockowner_t *
2309 rfs4_findlockowner_by_pid(pid_t pid)
2310 {
2311         rfs4_lockowner_t *lo;
2312         bool_t create = FALSE;

2313 
2314         lo = (rfs4_lockowner_t *)rfs4_dbsearch(rfs4_lockowner_pid_idx,
2315             (void *)(uintptr_t)pid, &create, NULL, RFS4_DBS_VALID);
2316 
2317         return (lo);
2318 }
2319 
2320 
2321 static uint32_t
2322 file_hash(void *key)
2323 {
2324         return (ADDRHASH(key));
2325 }
2326 
2327 static void *
2328 file_mkkey(rfs4_entry_t u_entry)
2329 {
2330         rfs4_file_t *fp = (rfs4_file_t *)u_entry;
2331 
2332         return (fp->rf_vp);
2333 }
2334 


2405 
2406         mutex_init(fp->rf_dinfo.rd_recall_lock, NULL, MUTEX_DEFAULT, NULL);
2407         cv_init(fp->rf_dinfo.rd_recall_cv, NULL, CV_DEFAULT, NULL);
2408 
2409         fp->rf_dinfo.rd_dtype = OPEN_DELEGATE_NONE;
2410 
2411         rw_init(&fp->rf_file_rwlock, NULL, RW_DEFAULT, NULL);
2412 
2413         mutex_enter(&vp->v_vsd_lock);
2414         VERIFY(vsd_set(vp, nfs4_srv_vkey, (void *)fp) == 0);
2415         mutex_exit(&vp->v_vsd_lock);
2416 
2417         return (TRUE);
2418 }
2419 
2420 rfs4_file_t *
2421 rfs4_findfile(vnode_t *vp, nfs_fh4 *fh, bool_t *create)
2422 {
2423         rfs4_file_t *fp;
2424         rfs4_fcreate_arg arg;

2425 
2426         arg.vp = vp;
2427         arg.fh = fh;
2428 
2429         if (*create == TRUE)
2430                 fp = (rfs4_file_t *)rfs4_dbsearch(rfs4_file_idx, vp, create,

2431                     &arg, RFS4_DBS_VALID);
2432         else {
2433                 mutex_enter(&vp->v_vsd_lock);
2434                 fp = (rfs4_file_t *)vsd_get(vp, nfs4_srv_vkey);
2435                 if (fp) {
2436                         rfs4_dbe_lock(fp->rf_dbe);
2437                         if (rfs4_dbe_is_invalid(fp->rf_dbe) ||
2438                             (rfs4_dbe_refcnt(fp->rf_dbe) == 0)) {
2439                                 rfs4_dbe_unlock(fp->rf_dbe);
2440                                 fp = NULL;
2441                         } else {
2442                                 rfs4_dbe_hold(fp->rf_dbe);
2443                                 rfs4_dbe_unlock(fp->rf_dbe);
2444                         }
2445                 }
2446                 mutex_exit(&vp->v_vsd_lock);
2447         }
2448         return (fp);
2449 }
2450 
2451 /*
2452  * Find a file in the db and once it is located, take the rw lock.
2453  * Need to check the vnode pointer and if it does not exist (it was
2454  * removed between the db location and check) redo the find.  This
2455  * assumes that a file struct that has a NULL vnode pointer is marked
2456  * at 'invalid' and will not be found in the db the second time
2457  * around.
2458  */
2459 rfs4_file_t *
2460 rfs4_findfile_withlock(vnode_t *vp, nfs_fh4 *fh, bool_t *create)
2461 {
2462         rfs4_file_t *fp;
2463         rfs4_fcreate_arg arg;
2464         bool_t screate = *create;

2465 
2466         if (screate == FALSE) {
2467                 mutex_enter(&vp->v_vsd_lock);
2468                 fp = (rfs4_file_t *)vsd_get(vp, nfs4_srv_vkey);
2469                 if (fp) {
2470                         rfs4_dbe_lock(fp->rf_dbe);
2471                         if (rfs4_dbe_is_invalid(fp->rf_dbe) ||
2472                             (rfs4_dbe_refcnt(fp->rf_dbe) == 0)) {
2473                                 rfs4_dbe_unlock(fp->rf_dbe);
2474                                 mutex_exit(&vp->v_vsd_lock);
2475                                 fp = NULL;
2476                         } else {
2477                                 rfs4_dbe_hold(fp->rf_dbe);
2478                                 rfs4_dbe_unlock(fp->rf_dbe);
2479                                 mutex_exit(&vp->v_vsd_lock);
2480                                 rw_enter(&fp->rf_file_rwlock, RW_WRITER);
2481                                 if (fp->rf_vp == NULL) {
2482                                         rw_exit(&fp->rf_file_rwlock);
2483                                         rfs4_file_rele(fp);
2484                                         fp = NULL;
2485                                 }
2486                         }
2487                 } else {
2488                         mutex_exit(&vp->v_vsd_lock);
2489                 }
2490         } else {
2491 retry:
2492                 arg.vp = vp;
2493                 arg.fh = fh;
2494 
2495                 fp = (rfs4_file_t *)rfs4_dbsearch(rfs4_file_idx, vp, create,
2496                     &arg, RFS4_DBS_VALID);
2497                 if (fp != NULL) {
2498                         rw_enter(&fp->rf_file_rwlock, RW_WRITER);
2499                         if (fp->rf_vp == NULL) {
2500                                 rw_exit(&fp->rf_file_rwlock);
2501                                 rfs4_file_rele(fp);
2502                                 *create = screate;
2503                                 goto retry;
2504                         }
2505                 }
2506         }
2507 
2508         return (fp);
2509 }
2510 
2511 static uint32_t
2512 lo_state_hash(void *key)
2513 {
2514         stateid_t *id = key;
2515 
2516         return (id->bits.ident+id->bits.pid);


2631         list_insert_tail(&sp->rs_lostatelist, lsp);
2632         rfs4_dbe_hold(sp->rs_dbe);
2633         rfs4_dbe_unlock(sp->rs_dbe);
2634 
2635         return (TRUE);
2636 }
2637 
2638 void
2639 rfs4_lo_state_rele(rfs4_lo_state_t *lsp, bool_t unlock_fp)
2640 {
2641         if (unlock_fp == TRUE)
2642                 rw_exit(&lsp->rls_state->rs_finfo->rf_file_rwlock);
2643         rfs4_dbe_rele(lsp->rls_dbe);
2644 }
2645 
2646 static rfs4_lo_state_t *
2647 rfs4_findlo_state(stateid_t *id, bool_t lock_fp)
2648 {
2649         rfs4_lo_state_t *lsp;
2650         bool_t create = FALSE;

2651 
2652         lsp = (rfs4_lo_state_t *)rfs4_dbsearch(rfs4_lo_state_idx, id,
2653             &create, NULL, RFS4_DBS_VALID);
2654         if (lock_fp == TRUE && lsp != NULL)
2655                 rw_enter(&lsp->rls_state->rs_finfo->rf_file_rwlock, RW_READER);
2656 
2657         return (lsp);
2658 }
2659 
2660 
2661 static uint32_t
2662 lo_state_lo_hash(void *key)
2663 {
2664         rfs4_lo_state_t *lsp = key;
2665 
2666         return (ADDRHASH(lsp->rls_locker) ^ ADDRHASH(lsp->rls_state));
2667 }
2668 
2669 static bool_t
2670 lo_state_lo_compare(rfs4_entry_t u_entry, void *key)
2671 {
2672         rfs4_lo_state_t *lsp = (rfs4_lo_state_t *)u_entry;
2673         rfs4_lo_state_t *keyp = key;
2674 
2675         return (keyp->rls_locker == lsp->rls_locker &&
2676             keyp->rls_state == lsp->rls_state);
2677 }
2678 
2679 static void *
2680 lo_state_lo_mkkey(rfs4_entry_t u_entry)
2681 {
2682         return (u_entry);
2683 }
2684 
2685 rfs4_lo_state_t *
2686 rfs4_findlo_state_by_owner(rfs4_lockowner_t *lo, rfs4_state_t *sp,
2687     bool_t *create)
2688 {
2689         rfs4_lo_state_t *lsp;
2690         rfs4_lo_state_t arg;

2691 
2692         arg.rls_locker = lo;
2693         arg.rls_state = sp;
2694 
2695         lsp = (rfs4_lo_state_t *)rfs4_dbsearch(rfs4_lo_state_owner_idx, &arg,
2696             create, &arg, RFS4_DBS_VALID);
2697 
2698         return (lsp);
2699 }
2700 
2701 static stateid_t
2702 get_stateid(id_t eid)
2703 {
2704         stateid_t id;

2705 
2706         id.bits.boottime = rfs4_start_time;


2707         id.bits.ident = eid;
2708         id.bits.chgseq = 0;
2709         id.bits.type = 0;
2710         id.bits.pid = 0;
2711 
2712         /*
2713          * If we are booted as a cluster node, embed our nodeid.
2714          * We've already done sanity checks in rfs4_client_create() so no
2715          * need to repeat them here.
2716          */
2717         id.bits.clnodeid = (cluster_bootflags & CLUSTER_BOOTED) ?
2718             clconf_get_nodeid() : 0;
2719 
2720         return (id);
2721 }
2722 
2723 /*
2724  * For use only when booted as a cluster node.
2725  * Returns TRUE if the embedded nodeid indicates that this stateid was
2726  * generated on another node.


2942 rfs4_deleg_state_destroy(rfs4_entry_t u_entry)
2943 {
2944         rfs4_deleg_state_t *dsp = (rfs4_deleg_state_t *)u_entry;
2945 
2946         /* return delegation if necessary */
2947         rfs4_return_deleg(dsp, FALSE);
2948 
2949         /* Were done with the file */
2950         rfs4_file_rele(dsp->rds_finfo);
2951         dsp->rds_finfo = NULL;
2952 
2953         /* And now with the openowner */
2954         rfs4_client_rele(dsp->rds_client);
2955         dsp->rds_client = NULL;
2956 }
2957 
2958 rfs4_deleg_state_t *
2959 rfs4_finddeleg(rfs4_state_t *sp, bool_t *create)
2960 {
2961         rfs4_deleg_state_t ds, *dsp;

2962 
2963         ds.rds_client = sp->rs_owner->ro_client;
2964         ds.rds_finfo = sp->rs_finfo;
2965 
2966         dsp = (rfs4_deleg_state_t *)rfs4_dbsearch(rfs4_deleg_idx, &ds,
2967             create, &ds, RFS4_DBS_VALID);
2968 
2969         return (dsp);
2970 }
2971 
2972 rfs4_deleg_state_t *
2973 rfs4_finddelegstate(stateid_t *id)
2974 {
2975         rfs4_deleg_state_t *dsp;
2976         bool_t create = FALSE;

2977 
2978         dsp = (rfs4_deleg_state_t *)rfs4_dbsearch(rfs4_deleg_state_idx, id,
2979             &create, NULL, RFS4_DBS_VALID);
2980 
2981         return (dsp);
2982 }
2983 
2984 void
2985 rfs4_deleg_state_rele(rfs4_deleg_state_t *dsp)
2986 {
2987         rfs4_dbe_rele(dsp->rds_dbe);
2988 }
2989 
2990 void
2991 rfs4_update_lock_sequence(rfs4_lo_state_t *lsp)
2992 {
2993 
2994         rfs4_dbe_lock(lsp->rls_dbe);
2995 
2996         /*
2997          * If we are skipping sequence id checking, this means that
2998          * this is the first lock request and therefore the sequence
2999          * id does not need to be updated.  This only happens on the


3078         if (sp->rs_closed == TRUE)
3079                 return (FALSE);
3080 
3081         return (fp == sp->rs_finfo);
3082 }
3083 
3084 static void *
3085 state_file_mkkey(rfs4_entry_t u_entry)
3086 {
3087         rfs4_state_t *sp = (rfs4_state_t *)u_entry;
3088 
3089         return (sp->rs_finfo);
3090 }
3091 
3092 rfs4_state_t *
3093 rfs4_findstate_by_owner_file(rfs4_openowner_t *oo, rfs4_file_t *fp,
3094         bool_t *create)
3095 {
3096         rfs4_state_t *sp;
3097         rfs4_state_t key;

3098 
3099         key.rs_owner = oo;
3100         key.rs_finfo = fp;
3101 
3102         sp = (rfs4_state_t *)rfs4_dbsearch(rfs4_state_owner_file_idx, &key,
3103             create, &key, RFS4_DBS_VALID);
3104 
3105         return (sp);
3106 }
3107 
3108 /* This returns ANY state struct that refers to this file */
3109 static rfs4_state_t *
3110 rfs4_findstate_by_file(rfs4_file_t *fp)
3111 {
3112         bool_t create = FALSE;

3113 
3114         return ((rfs4_state_t *)rfs4_dbsearch(rfs4_state_file_idx, fp,
3115             &create, fp, RFS4_DBS_VALID));
3116 }
3117 
3118 static bool_t
3119 rfs4_state_expiry(rfs4_entry_t u_entry)
3120 {
3121         rfs4_state_t *sp = (rfs4_state_t *)u_entry;
3122 
3123         if (rfs4_dbe_is_invalid(sp->rs_dbe))
3124                 return (TRUE);
3125 
3126         if (sp->rs_closed == TRUE &&
3127             ((gethrestime_sec() - rfs4_dbe_get_timerele(sp->rs_dbe))
3128             > rfs4_lease_time))
3129                 return (TRUE);
3130 
3131         return ((gethrestime_sec() - sp->rs_owner->ro_client->rc_last_access
3132             > rfs4_lease_time));
3133 }
3134 


3145         sp->rs_stateid.bits.type = OPENID;
3146         sp->rs_owner = oo;
3147         sp->rs_finfo = fp;
3148 
3149         list_create(&sp->rs_lostatelist, sizeof (rfs4_lo_state_t),
3150             offsetof(rfs4_lo_state_t, rls_node));
3151 
3152         /* Insert state on per open owner's list */
3153         rfs4_dbe_lock(oo->ro_dbe);
3154         list_insert_tail(&oo->ro_statelist, sp);
3155         rfs4_dbe_unlock(oo->ro_dbe);
3156 
3157         return (TRUE);
3158 }
3159 
3160 static rfs4_state_t *
3161 rfs4_findstate(stateid_t *id, rfs4_dbsearch_type_t find_invalid, bool_t lock_fp)
3162 {
3163         rfs4_state_t *sp;
3164         bool_t create = FALSE;

3165 
3166         sp = (rfs4_state_t *)rfs4_dbsearch(rfs4_state_idx, id,
3167             &create, NULL, find_invalid);
3168         if (lock_fp == TRUE && sp != NULL)
3169                 rw_enter(&sp->rs_finfo->rf_file_rwlock, RW_READER);
3170 
3171         return (sp);
3172 }
3173 
3174 void
3175 rfs4_state_close(rfs4_state_t *sp, bool_t lock_held, bool_t close_of_client,
3176     cred_t *cr)
3177 {
3178         /* Remove the associated lo_state owners */
3179         if (!lock_held)
3180                 rfs4_dbe_lock(sp->rs_dbe);
3181 
3182         /*
3183          * If refcnt == 0, the dbe is about to be destroyed.
3184          * lock state will be released by the reaper thread.
3185          */
3186 


3214 }
3215 
3216 void
3217 rfs4_client_close(rfs4_client_t *cp)
3218 {
3219         /* Mark client as going away. */
3220         rfs4_dbe_lock(cp->rc_dbe);
3221         rfs4_dbe_invalidate(cp->rc_dbe);
3222         rfs4_dbe_unlock(cp->rc_dbe);
3223 
3224         rfs4_client_state_remove(cp);
3225 
3226         /* Release the client */
3227         rfs4_client_rele(cp);
3228 }
3229 
3230 nfsstat4
3231 rfs4_check_clientid(clientid4 *cp, int setclid_confirm)
3232 {
3233         cid *cidp = (cid *) cp;

3234 


3235         /*
3236          * If we are booted as a cluster node, check the embedded nodeid.
3237          * If it indicates that this clientid was generated on another node,
3238          * inform the client accordingly.
3239          */
3240         if (cluster_bootflags & CLUSTER_BOOTED && foreign_clientid(cidp))
3241                 return (NFS4ERR_STALE_CLIENTID);
3242 
3243         /*
3244          * If the server start time matches the time provided
3245          * by the client (via the clientid) and this is NOT a
3246          * setclientid_confirm then return EXPIRED.
3247          */
3248         if (!setclid_confirm && cidp->impl_id.start_time == rfs4_start_time)

3249                 return (NFS4ERR_EXPIRED);
3250 
3251         return (NFS4ERR_STALE_CLIENTID);
3252 }
3253 
3254 /*
3255  * This is used when a stateid has not been found amongst the
3256  * current server's state.  Check the stateid to see if it
3257  * was from this server instantiation or not.
3258  */
3259 static nfsstat4
3260 what_stateid_error(stateid_t *id, stateid_type_t type)
3261 {




3262         /* If we are booted as a cluster node, was stateid locally generated? */
3263         if ((cluster_bootflags & CLUSTER_BOOTED) && foreign_stateid(id))
3264                 return (NFS4ERR_STALE_STATEID);
3265 
3266         /* If types don't match then no use checking further */
3267         if (type != id->bits.type)
3268                 return (NFS4ERR_BAD_STATEID);
3269 
3270         /* From a different server instantiation, return STALE */
3271         if (id->bits.boottime != rfs4_start_time)
3272                 return (NFS4ERR_STALE_STATEID);
3273 
3274         /*
3275          * From this server but the state is most likely beyond lease
3276          * timeout: return NFS4ERR_EXPIRED.  However, there is the
3277          * case of a delegation stateid.  For delegations, there is a
3278          * case where the state can be removed without the client's
3279          * knowledge/consent: revocation.  In the case of delegation
3280          * revocation, the delegation state will be removed and will
3281          * not be found.  If the client does something like a
3282          * DELEGRETURN or even a READ/WRITE with a delegatoin stateid
3283          * that has been revoked, the server should return BAD_STATEID
3284          * instead of the more common EXPIRED error.
3285          */
3286         if (id->bits.boottime == rfs4_start_time) {
3287                 if (type == DELEGID)
3288                         return (NFS4ERR_BAD_STATEID);
3289                 else
3290                         return (NFS4ERR_EXPIRED);
3291         }
3292 
3293         return (NFS4ERR_BAD_STATEID);
3294 }
3295 
3296 /*
3297  * Used later on to find the various state structs.  When called from
3298  * rfs4_check_stateid()->rfs4_get_all_state(), no file struct lock is
3299  * taken (it is not needed) and helps on the read/write path with
3300  * respect to performance.
3301  */
3302 static nfsstat4
3303 rfs4_get_state_lockit(stateid4 *stateid, rfs4_state_t **spp,
3304     rfs4_dbsearch_type_t find_invalid, bool_t lock_fp)
3305 {
3306         stateid_t *id = (stateid_t *)stateid;


3768 
3769 /*
3770  * This is a special function in that for the file struct provided the
3771  * server wants to remove/close all current state associated with the
3772  * file.  The prime use of this would be with OP_REMOVE to force the
3773  * release of state and particularly of file locks.
3774  *
3775  * There is an assumption that there is no delegations outstanding on
3776  * this file at this point.  The caller should have waited for those
3777  * to be returned or revoked.
3778  */
3779 void
3780 rfs4_close_all_state(rfs4_file_t *fp)
3781 {
3782         rfs4_state_t *sp;
3783 
3784         rfs4_dbe_lock(fp->rf_dbe);
3785 
3786 #ifdef DEBUG
3787         /* only applies when server is handing out delegations */
3788         if (rfs4_deleg_policy != SRV_NEVER_DELEGATE)
3789                 ASSERT(fp->rf_dinfo.rd_hold_grant > 0);
3790 #endif
3791 
3792         /* No delegations for this file */
3793         ASSERT(list_is_empty(&fp->rf_delegstatelist));
3794 
3795         /* Make sure that it can not be found */
3796         rfs4_dbe_invalidate(fp->rf_dbe);
3797 
3798         if (fp->rf_vp == NULL) {
3799                 rfs4_dbe_unlock(fp->rf_dbe);
3800                 return;
3801         }
3802         rfs4_dbe_unlock(fp->rf_dbe);
3803 
3804         /*
3805          * Hold as writer to prevent other server threads from
3806          * processing requests related to the file while all state is
3807          * being removed.
3808          */


3978                         }
3979                         mutex_enter(&vp->v_vsd_lock);
3980                         (void) vsd_set(vp, nfs4_srv_vkey, NULL);
3981                         mutex_exit(&vp->v_vsd_lock);
3982                         VN_RELE(vp);
3983                         fp->rf_vp = NULL;
3984                 }
3985                 rfs4_dbe_invalidate(fp->rf_dbe);
3986         }
3987 }
3988 
3989 /*
3990  * Given a directory that is being unexported, cleanup/release all
3991  * state in the server that refers to objects residing underneath this
3992  * particular export.  The ordering of the release is important.
3993  * Lock_owner, then state and then file.
3994  */
3995 void
3996 rfs4_clean_state_exi(struct exportinfo *exi)
3997 {
3998         mutex_enter(&rfs4_state_lock);
3999 
4000         if (rfs4_server_state == NULL) {
4001                 mutex_exit(&rfs4_state_lock);



4002                 return;
4003         }
4004 
4005         rfs4_dbe_walk(rfs4_lo_state_tab, rfs4_lo_state_walk_callout, exi);
4006         rfs4_dbe_walk(rfs4_state_tab, rfs4_state_walk_callout, exi);
4007         rfs4_dbe_walk(rfs4_deleg_state_tab, rfs4_deleg_state_walk_callout, exi);
4008         rfs4_dbe_walk(rfs4_file_tab, rfs4_file_walk_callout, exi);


4009 
4010         mutex_exit(&rfs4_state_lock);
4011 }
   1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.

  24  */
  25 
  26 /*
  27  * Copyright 2018 Nexenta Systems, Inc.
  28  */
  29 
  30 #include <sys/systm.h>
  31 #include <sys/kmem.h>
  32 #include <sys/cmn_err.h>
  33 #include <sys/atomic.h>
  34 #include <sys/clconf.h>
  35 #include <sys/cladm.h>
  36 #include <sys/flock.h>
  37 #include <nfs/export.h>
  38 #include <nfs/nfs.h>
  39 #include <nfs/nfs4.h>
  40 #include <nfs/nfssys.h>
  41 #include <nfs/lm.h>
  42 #include <sys/pathname.h>
  43 #include <sys/sdt.h>
  44 #include <sys/nvpair.h>
  45 
  46 extern u_longlong_t nfs4_srv_caller_id;
  47 

  48 extern uint_t nfs4_srv_vkey;
  49 
  50 stateid4 special0 = {
  51         0,
  52         { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }
  53 };
  54 
  55 stateid4 special1 = {
  56         0xffffffff,
  57         {
  58                 (char)0xff, (char)0xff, (char)0xff, (char)0xff,
  59                 (char)0xff, (char)0xff, (char)0xff, (char)0xff,
  60                 (char)0xff, (char)0xff, (char)0xff, (char)0xff
  61         }
  62 };
  63 
  64 
  65 #define ISSPECIAL(id)  (stateid4_cmp(id, &special0) || \
  66                         stateid4_cmp(id, &special1))
  67 
  68 /* For embedding the cluster nodeid into our clientid */
  69 #define CLUSTER_NODEID_SHIFT    24
  70 #define CLUSTER_MAX_NODEID      255
  71 
  72 #ifdef DEBUG
  73 int rfs4_debug;
  74 #endif
  75 
  76 static uint32_t rfs4_database_debug = 0x00;
  77 
  78 /* CSTYLED */
  79 static void rfs4_ss_clid_write(nfs4_srv_t *nsrv4, rfs4_client_t *cp, char *leaf);
  80 static void rfs4_ss_clid_write_one(rfs4_client_t *cp, char *dir, char *leaf);
  81 static void rfs4_dss_clear_oldstate(rfs4_servinst_t *sip);
  82 static void rfs4_ss_chkclid_sip(rfs4_client_t *cp, rfs4_servinst_t *sip);
  83 
  84 /*
  85  * Couple of simple init/destroy functions for a general waiter
  86  */
  87 void
  88 rfs4_sw_init(rfs4_state_wait_t *swp)
  89 {
  90         mutex_init(swp->sw_cv_lock, NULL, MUTEX_DEFAULT, NULL);
  91         cv_init(swp->sw_cv, NULL, CV_DEFAULT, NULL);
  92         swp->sw_active = FALSE;
  93         swp->sw_wait_count = 0;
  94 }
  95 
  96 void
  97 rfs4_sw_destroy(rfs4_state_wait_t *swp)
  98 {
  99         mutex_destroy(swp->sw_cv_lock);


 108                 swp->sw_wait_count++;
 109                 cv_wait(swp->sw_cv, swp->sw_cv_lock);
 110                 swp->sw_wait_count--;
 111         }
 112         ASSERT(swp->sw_active == FALSE);
 113         swp->sw_active = TRUE;
 114         mutex_exit(swp->sw_cv_lock);
 115 }
 116 
 117 void
 118 rfs4_sw_exit(rfs4_state_wait_t *swp)
 119 {
 120         mutex_enter(swp->sw_cv_lock);
 121         ASSERT(swp->sw_active == TRUE);
 122         swp->sw_active = FALSE;
 123         if (swp->sw_wait_count != 0)
 124                 cv_broadcast(swp->sw_cv);
 125         mutex_exit(swp->sw_cv_lock);
 126 }
 127 





 128 static void
 129 deep_lock_copy(LOCK4res *dres, LOCK4res *sres)
 130 {
 131         lock_owner4 *slo = &sres->LOCK4res_u.denied.owner;
 132         lock_owner4 *dlo = &dres->LOCK4res_u.denied.owner;
 133 
 134         if (sres->status == NFS4ERR_DENIED) {
 135                 dlo->owner_val = kmem_alloc(slo->owner_len, KM_SLEEP);
 136                 bcopy(slo->owner_val, dlo->owner_val, slo->owner_len);
 137         }
 138 }
 139 
 140 /*
 141  * CPR callback id -- not related to v4 callbacks
 142  */
 143 static callb_id_t cpr_id = 0;
 144 
 145 static void
 146 deep_lock_free(LOCK4res *res)
 147 {
 148         lock_owner4 *lo = &res->LOCK4res_u.denied.owner;
 149 
 150         if (res->status == NFS4ERR_DENIED)
 151                 kmem_free(lo->owner_val, lo->owner_len);
 152 }
 153 
 154 static void
 155 deep_open_copy(OPEN4res *dres, OPEN4res *sres)
 156 {
 157         nfsace4 *sacep, *dacep;
 158 
 159         if (sres->status != NFS4_OK) {
 160                 return;
 161         }
 162 
 163         dres->attrset = sres->attrset;
 164 


 260 /*
 261  * This code is some what prototypical for now. Its purpose currently is to
 262  * implement the interfaces sufficiently to finish the higher protocol
 263  * elements. This will be replaced by a dynamically resizeable tables
 264  * backed by kmem_cache allocator. However synchronization is handled
 265  * correctly (I hope) and will not change by much.  The mutexes for
 266  * the hash buckets that can be used to create new instances of data
 267  * structures  might be good candidates to evolve into reader writer
 268  * locks. If it has to do a creation, it would be holding the
 269  * mutex across a kmem_alloc with KM_SLEEP specified.
 270  */
 271 
 272 #ifdef DEBUG
 273 #define TABSIZE 17
 274 #else
 275 #define TABSIZE 2047
 276 #endif
 277 
 278 #define ADDRHASH(key) ((unsigned long)(key) >> 3)
 279 



































 280 #define MAXTABSZ 1024*1024
 281 
 282 /* The values below are rfs4_lease_time units */
 283 
 284 #ifdef DEBUG
 285 #define CLIENT_CACHE_TIME 1
 286 #define OPENOWNER_CACHE_TIME 1
 287 #define STATE_CACHE_TIME 1
 288 #define LO_STATE_CACHE_TIME 1
 289 #define LOCKOWNER_CACHE_TIME 1
 290 #define FILE_CACHE_TIME 3
 291 #define DELEG_STATE_CACHE_TIME 1
 292 #else
 293 #define CLIENT_CACHE_TIME 10
 294 #define OPENOWNER_CACHE_TIME 5
 295 #define STATE_CACHE_TIME 1
 296 #define LO_STATE_CACHE_TIME 1
 297 #define LOCKOWNER_CACHE_TIME 3
 298 #define FILE_CACHE_TIME 40
 299 #define DELEG_STATE_CACHE_TIME 1
 300 #endif
 301 
 302 /*
 303  * NFSv4 server state databases
 304  *
 305  * Initilized when the module is loaded and used by NFSv4 state tables.
 306  * These kmem_cache databases are global, the tables that make use of these
 307  * are per zone.
 308  */
 309 kmem_cache_t *rfs4_client_mem_cache;
 310 kmem_cache_t *rfs4_clntIP_mem_cache;
 311 kmem_cache_t *rfs4_openown_mem_cache;
 312 kmem_cache_t *rfs4_openstID_mem_cache;
 313 kmem_cache_t *rfs4_lockstID_mem_cache;
 314 kmem_cache_t *rfs4_lockown_mem_cache;
 315 kmem_cache_t *rfs4_file_mem_cache;
 316 kmem_cache_t *rfs4_delegstID_mem_cache;
 317 
 318 /*
 319  * NFSv4 state table functions
 320  */






 321 static bool_t rfs4_client_create(rfs4_entry_t, void *);
 322 static void rfs4_dss_remove_cpleaf(rfs4_client_t *);
 323 static void rfs4_dss_remove_leaf(rfs4_servinst_t *, char *, char *);
 324 static void rfs4_client_destroy(rfs4_entry_t);
 325 static bool_t rfs4_client_expiry(rfs4_entry_t);
 326 static uint32_t clientid_hash(void *);
 327 static bool_t clientid_compare(rfs4_entry_t, void *);
 328 static void *clientid_mkkey(rfs4_entry_t);
 329 static uint32_t nfsclnt_hash(void *);
 330 static bool_t nfsclnt_compare(rfs4_entry_t, void *);
 331 static void *nfsclnt_mkkey(rfs4_entry_t);
 332 static bool_t rfs4_clntip_expiry(rfs4_entry_t);
 333 static void rfs4_clntip_destroy(rfs4_entry_t);
 334 static bool_t rfs4_clntip_create(rfs4_entry_t, void *);
 335 static uint32_t clntip_hash(void *);
 336 static bool_t clntip_compare(rfs4_entry_t, void *);
 337 static void *clntip_mkkey(rfs4_entry_t);
 338 static bool_t rfs4_openowner_create(rfs4_entry_t, void *);
 339 static void rfs4_openowner_destroy(rfs4_entry_t);
 340 static bool_t rfs4_openowner_expiry(rfs4_entry_t);


 666                                         cl_ss->ss_pn = rfs4_ss_movestate(
 667                                             statedir, destdir, dep->d_name);
 668                                 } else {
 669                                         cl_ss->ss_pn = ss_pn;
 670                                 }
 671                                 insque(cl_ss, oldstate);
 672                         } else {
 673                                 rfs4_ss_pnfree(ss_pn);
 674                         }
 675                 }
 676         }
 677 
 678 out:
 679         (void) VOP_CLOSE(dvp, FREAD, 1, (offset_t)0, CRED(), NULL);
 680         VN_RELE(dvp);
 681         if (dirt)
 682                 kmem_free((caddr_t)dirt, RFS4_SS_DIRSIZE);
 683 }
 684 
 685 static void
 686 rfs4_ss_init(nfs4_srv_t *nsrv4)
 687 {
 688         int npaths = 1;
 689         char *default_dss_path = NFS4_DSS_VAR_DIR;
 690 
 691         /* read the default stable storage state */
 692         rfs4_dss_readstate(nsrv4, npaths, &default_dss_path);
 693 
 694         rfs4_ss_enabled = 1;
 695 }
 696 
 697 static void
 698 rfs4_ss_fini(nfs4_srv_t *nsrv4)
 699 {
 700         rfs4_servinst_t *sip;
 701 
 702         mutex_enter(&nsrv4->servinst_lock);
 703         sip = nsrv4->nfs4_cur_servinst;
 704         while (sip != NULL) {
 705                 rfs4_dss_clear_oldstate(sip);
 706                 sip = sip->next;
 707         }
 708         mutex_exit(&nsrv4->servinst_lock);
 709 }
 710 
 711 /*
 712  * Remove all oldstate files referenced by this servinst.
 713  */
 714 static void
 715 rfs4_dss_clear_oldstate(rfs4_servinst_t *sip)
 716 {
 717         rfs4_oldstate_t *os_head, *osp;
 718 
 719         rw_enter(&sip->oldstate_lock, RW_WRITER);
 720         os_head = sip->oldstate;
 721 
 722         if (os_head == NULL) {
 723                 rw_exit(&sip->oldstate_lock);
 724                 return;
 725         }
 726 
 727         /* skip dummy entry */
 728         osp = os_head->next;


 732 
 733                 rfs4_dss_remove_leaf(sip, NFS4_DSS_OLDSTATE_LEAF, leaf);
 734 
 735                 if (osp->cl_id4.id_val)
 736                         kmem_free(osp->cl_id4.id_val, osp->cl_id4.id_len);
 737                 rfs4_ss_pnfree(osp->ss_pn);
 738 
 739                 os_next = osp->next;
 740                 remque(osp);
 741                 kmem_free(osp, sizeof (rfs4_oldstate_t));
 742                 osp = os_next;
 743         }
 744 
 745         rw_exit(&sip->oldstate_lock);
 746 }
 747 
 748 /*
 749  * Form the state and oldstate paths, and read in the stable storage files.
 750  */
 751 void
 752 rfs4_dss_readstate(nfs4_srv_t *nsrv4, int npaths, char **paths)
 753 {
 754         int i;
 755         char *state, *oldstate;
 756 
 757         state = kmem_alloc(MAXPATHLEN, KM_SLEEP);
 758         oldstate = kmem_alloc(MAXPATHLEN, KM_SLEEP);
 759 
 760         for (i = 0; i < npaths; i++) {
 761                 char *path = paths[i];
 762 
 763                 (void) sprintf(state, "%s/%s", path, NFS4_DSS_STATE_LEAF);
 764                 (void) sprintf(oldstate, "%s/%s", path, NFS4_DSS_OLDSTATE_LEAF);
 765 
 766                 /*
 767                  * Populate the current server instance's oldstate list.
 768                  *
 769                  * 1. Read stable storage data from old state directory,
 770                  *    leaving its contents alone.
 771                  *
 772                  * 2. Read stable storage data from state directory,
 773                  *    and move the latter's contents to old state
 774                  *    directory.
 775                  */
 776                 /* CSTYLED */
 777                 rfs4_ss_oldstate(nsrv4->nfs4_cur_servinst->oldstate, oldstate, NULL);
 778                 /* CSTYLED */
 779                 rfs4_ss_oldstate(nsrv4->nfs4_cur_servinst->oldstate, state, oldstate);
 780         }
 781 
 782         kmem_free(state, MAXPATHLEN);
 783         kmem_free(oldstate, MAXPATHLEN);
 784 }
 785 
 786 
 787 /*
 788  * Check if we are still in grace and if the client can be
 789  * granted permission to perform reclaims.
 790  */
 791 void
 792 rfs4_ss_chkclid(nfs4_srv_t *nsrv4, rfs4_client_t *cp)
 793 {
 794         rfs4_servinst_t *sip;
 795 
 796         /*
 797          * It should be sufficient to check the oldstate data for just
 798          * this client's instance. However, since our per-instance
 799          * client grouping is solely temporal, HA-NFSv4 RG failover
 800          * might result in clients of the same RG being partitioned into
 801          * separate instances.
 802          *
 803          * Until the client grouping is improved, we must check the
 804          * oldstate data for all instances with an active grace period.
 805          *
 806          * This also serves as the mechanism to remove stale oldstate data.
 807          * The first time we check an instance after its grace period has
 808          * expired, the oldstate data should be cleared.
 809          *
 810          * Start at the current instance, and walk the list backwards
 811          * to the first.
 812          */
 813         mutex_enter(&nsrv4->servinst_lock);
 814         for (sip = nsrv4->nfs4_cur_servinst; sip != NULL; sip = sip->prev) {
 815                 rfs4_ss_chkclid_sip(cp, sip);
 816 
 817                 /* if the above check found this client, we're done */
 818                 if (cp->rc_can_reclaim)
 819                         break;
 820         }
 821         mutex_exit(&nsrv4->servinst_lock);
 822 }
 823 
 824 static void
 825 rfs4_ss_chkclid_sip(rfs4_client_t *cp, rfs4_servinst_t *sip)
 826 {
 827         rfs4_oldstate_t *osp, *os_head;
 828 
 829         /* short circuit everything if this server instance has no oldstate */
 830         rw_enter(&sip->oldstate_lock, RW_READER);
 831         os_head = sip->oldstate;
 832         rw_exit(&sip->oldstate_lock);
 833         if (os_head == NULL)
 834                 return;
 835 
 836         /*
 837          * If this server instance is no longer in a grace period then
 838          * the client won't be able to reclaim. No further need for this
 839          * instance's oldstate data, so it can be cleared.
 840          */
 841         if (!rfs4_servinst_in_grace(sip))


 851         while (osp != os_head) {
 852                 if (osp->cl_id4.id_len == cp->rc_nfs_client.id_len) {
 853                         if (bcmp(osp->cl_id4.id_val, cp->rc_nfs_client.id_val,
 854                             osp->cl_id4.id_len) == 0) {
 855                                 cp->rc_can_reclaim = 1;
 856                                 break;
 857                         }
 858                 }
 859                 osp = osp->next;
 860         }
 861 
 862         rw_exit(&sip->oldstate_lock);
 863 }
 864 
 865 /*
 866  * Place client information into stable storage: 1/3.
 867  * First, generate the leaf filename, from the client's IP address and
 868  * the server-generated short-hand clientid.
 869  */
 870 void
 871 rfs4_ss_clid(nfs4_srv_t *nsrv4, rfs4_client_t *cp)
 872 {
 873         const char *kinet_ntop6(uchar_t *, char *, size_t);
 874         char leaf[MAXNAMELEN], buf[INET6_ADDRSTRLEN];
 875         struct sockaddr *ca;
 876         uchar_t *b;
 877 
 878         if (rfs4_ss_enabled == 0) {
 879                 return;
 880         }
 881 
 882         buf[0] = 0;
 883 
 884         ca = (struct sockaddr *)&cp->rc_addr;
 885 
 886         /*
 887          * Convert the caller's IP address to a dotted string
 888          */
 889         if (ca->sa_family == AF_INET) {
 890                 b = (uchar_t *)&((struct sockaddr_in *)ca)->sin_addr;
 891                 (void) sprintf(buf, "%03d.%03d.%03d.%03d", b[0] & 0xFF,
 892                     b[1] & 0xFF, b[2] & 0xFF, b[3] & 0xFF);
 893         } else if (ca->sa_family == AF_INET6) {
 894                 struct sockaddr_in6 *sin6;
 895 
 896                 sin6 = (struct sockaddr_in6 *)ca;
 897                 (void) kinet_ntop6((uchar_t *)&sin6->sin6_addr,
 898                     buf, INET6_ADDRSTRLEN);
 899         }
 900 
 901         (void) snprintf(leaf, MAXNAMELEN, "%s-%llx", buf,
 902             (longlong_t)cp->rc_clientid);
 903         rfs4_ss_clid_write(nsrv4, cp, leaf);
 904 }
 905 
 906 /*
 907  * Place client information into stable storage: 2/3.
 908  * DSS: distributed stable storage: the file may need to be written to
 909  * multiple directories.
 910  */
 911 static void
 912 rfs4_ss_clid_write(nfs4_srv_t *nsrv4, rfs4_client_t *cp, char *leaf)
 913 {
 914         rfs4_servinst_t *sip;
 915 
 916         /*
 917          * It should be sufficient to write the leaf file to (all) DSS paths
 918          * associated with just this client's instance. However, since our
 919          * per-instance client grouping is solely temporal, HA-NFSv4 RG
 920          * failover might result in us losing DSS data.
 921          *
 922          * Until the client grouping is improved, we must write the DSS data
 923          * to all instances' paths. Start at the current instance, and
 924          * walk the list backwards to the first.
 925          */
 926         mutex_enter(&nsrv4->servinst_lock);
 927         for (sip = nsrv4->nfs4_cur_servinst; sip != NULL; sip = sip->prev) {
 928                 int i, npaths = sip->dss_npaths;
 929 
 930                 /* write the leaf file to all DSS paths */
 931                 for (i = 0; i < npaths; i++) {
 932                         rfs4_dss_path_t *dss_path = sip->dss_paths[i];
 933 
 934                         /* HA-NFSv4 path might have been failed-away from us */
 935                         if (dss_path == NULL)
 936                                 continue;
 937 
 938                         rfs4_ss_clid_write_one(cp, dss_path->path, leaf);
 939                 }
 940         }
 941         mutex_exit(&nsrv4->servinst_lock);
 942 }
 943 
 944 /*
 945  * Place client information into stable storage: 3/3.
 946  * Write the stable storage data to the requested file.
 947  */
 948 static void
 949 rfs4_ss_clid_write_one(rfs4_client_t *cp, char *dss_path, char *leaf)
 950 {
 951         int ioflag;
 952         int file_vers = NFS4_SS_VERSION;
 953         size_t dirlen;
 954         struct uio uio;
 955         struct iovec iov[4];
 956         char *dir;
 957         rfs4_ss_pn_t *ss_pn;
 958         vnode_t *vp;
 959         nfs_client_id4 *cl_id4 = &(cp->rc_nfs_client);
 960 
 961         /* allow 2 extra bytes for '/' & NUL */


1114                  * for forced expiration
1115                  */
1116                 if (ent_sin->sin_addr.s_addr == clr_in.s_addr) {
1117                         cp->rc_forced_expire = 1;
1118                 }
1119                 break;
1120 
1121         default:
1122                 /* force this assert to fail */
1123                 ASSERT(clr->addr_type != clr->addr_type);
1124         }
1125 }
1126 
1127 /*
1128  * This is called from nfssys() in order to clear server state
1129  * for the specified client IP Address.
1130  */
1131 void
1132 rfs4_clear_client_state(struct nfs4clrst_args *clr)
1133 {
1134         nfs4_srv_t *nsrv4;
1135         nsrv4 = zone_getspecific(rfs4_zone_key, curzone);
1136         (void) rfs4_dbe_walk(nsrv4->rfs4_client_tab, rfs4_client_scrub, clr);
1137 }
1138 
1139 /*
1140  * Used to initialize the NFSv4 server's state or database.  All of
1141  * the tables are created and timers are set.

1142  */
1143 void
1144 rfs4_state_g_init()
1145 {

1146         extern boolean_t rfs4_cpr_callb(void *, int);
1147         /*
1148          * Add a CPR callback so that we can update client
1149          * access times to extend the lease after a suspend
1150          * and resume (using the same class as rpcmod/connmgr)
1151          */
1152         cpr_id = callb_add(rfs4_cpr_callb, 0, CB_CL_CPR_RPC, "rfs4");
1153 
1154         /*
1155          * NFSv4 server state databases
1156          *
1157          * Initilized when the module is loaded and used by NFSv4 state tables.
1158          * These kmem_cache free pools are used globally, the NFSv4 state
1159          * tables which make use of these kmem_cache free pools are per zone.
1160          *
1161          * initialize the global kmem_cache free pools which will be used by
1162          * the NFSv4 state tables.
1163          */
1164         /* CSTYLED */
1165         rfs4_client_mem_cache = nfs4_init_mem_cache("Client_entry_cache", 2, sizeof (rfs4_client_t), 0);
1166         /* CSTYLED */
1167         rfs4_clntIP_mem_cache = nfs4_init_mem_cache("ClntIP_entry_cache", 1, sizeof (rfs4_clntip_t), 1);
1168         /* CSTYLED */
1169         rfs4_openown_mem_cache = nfs4_init_mem_cache("OpenOwner_entry_cache", 1, sizeof (rfs4_openowner_t), 2);
1170         /* CSTYLED */
1171         rfs4_openstID_mem_cache = nfs4_init_mem_cache("OpenStateID_entry_cache", 3, sizeof (rfs4_state_t), 3);
1172         /* CSTYLED */
1173         rfs4_lockstID_mem_cache = nfs4_init_mem_cache("LockStateID_entry_cache", 3, sizeof (rfs4_lo_state_t), 4);
1174         /* CSTYLED */
1175         rfs4_lockown_mem_cache = nfs4_init_mem_cache("Lockowner_entry_cache", 2, sizeof (rfs4_lockowner_t), 5);
1176         /* CSTYLED */
1177         rfs4_file_mem_cache = nfs4_init_mem_cache("File_entry_cache", 1, sizeof (rfs4_file_t), 6);
1178         /* CSTYLED */
1179         rfs4_delegstID_mem_cache = nfs4_init_mem_cache("DelegStateID_entry_cache", 2, sizeof (rfs4_deleg_state_t), 7);
1180 
1181         rfs4_client_clrst = rfs4_clear_client_state;
1182 }
1183 
1184 
1185 /*
1186  * Used at server shutdown to cleanup all of the NFSv4 server's structures
1187  * and other state.
1188  */
1189 void
1190 rfs4_state_g_fini()
1191 {
1192         int i;
1193         /*
1194          * Cleanup the CPR callback.

1195          */
1196         if (cpr_id)
1197                 (void) callb_delete(cpr_id);
1198 
1199         rfs4_client_clrst = NULL;
1200 
1201         /* free the NFSv4 state databases */
1202         for (i = 0; i < RFS4_DB_MEM_CACHE_NUM; i++) {
1203                 kmem_cache_destroy(rfs4_db_mem_cache_table[i].r_db_mem_cache);
1204                 rfs4_db_mem_cache_table[i].r_db_mem_cache = NULL;
1205         }
1206 
1207         rfs4_client_mem_cache = NULL;
1208         rfs4_clntIP_mem_cache = NULL;
1209         rfs4_openown_mem_cache = NULL;
1210         rfs4_openstID_mem_cache = NULL;
1211         rfs4_lockstID_mem_cache = NULL;
1212         rfs4_lockown_mem_cache = NULL;
1213         rfs4_file_mem_cache = NULL;
1214         rfs4_delegstID_mem_cache = NULL;
1215 
1216         /* DSS: distributed stable storage */
1217         nvlist_free(rfs4_dss_oldpaths);
1218         nvlist_free(rfs4_dss_paths);
1219         rfs4_dss_paths = rfs4_dss_oldpaths = NULL;
1220 }
1221 
1222 /*
1223  * Used to initialize the per zone NFSv4 server's state
1224  */
1225 void
1226 rfs4_state_zone_init(nfs4_srv_t *nsrv4)
1227 {
1228         time_t start_time;
1229         int start_grace;
1230         char *dss_path = NFS4_DSS_VAR_DIR;
1231 
1232         /* DSS: distributed stable storage: initialise served paths list */
1233         nsrv4->dss_pathlist = NULL;
1234 
1235         /*
1236          * Set the boot time.  If the server
1237          * has been restarted quickly and has had the opportunity to
1238          * service clients, then the start_time needs to be bumped
1239          * regardless.  A small window but it exists...
1240          */
1241         start_time = gethrestime_sec();
1242         if (nsrv4->rfs4_start_time < start_time)
1243                 nsrv4->rfs4_start_time = start_time;
1244         else
1245                 nsrv4->rfs4_start_time++;
1246 



1247         /*
1248          * Create the first server instance, or a new one if the server has
1249          * been restarted; see above comments on rfs4_start_time. Don't
1250          * start its grace period; that will be done later, to maximise the
1251          * clients' recovery window.
1252          */
1253         start_grace = 0;
1254         rfs4_servinst_create(nsrv4, start_grace, 1, &dss_path);
1255 
1256         /* reset the "first NFSv4 request" status */
1257         nsrv4->seen_first_compound = 0;
1258 
1259         mutex_enter(&nsrv4->state_lock);
1260 
1261         /*
1262          * If the server state database has already been initialized,
1263          * skip it

1264          */
1265         if (nsrv4->nfs4_server_state != NULL) {
1266                 mutex_exit(&nsrv4->state_lock);
1267                 return;
1268         }
1269 
1270         rw_init(&nsrv4->rfs4_findclient_lock, NULL, RW_DEFAULT, NULL);
1271 
1272         /* set the various cache timers for table creation */
1273         if (nsrv4->rfs4_client_cache_time == 0)
1274                 nsrv4->rfs4_client_cache_time = CLIENT_CACHE_TIME;
1275         if (nsrv4->rfs4_openowner_cache_time == 0)
1276                 nsrv4->rfs4_openowner_cache_time = OPENOWNER_CACHE_TIME;
1277         if (nsrv4->rfs4_state_cache_time == 0)
1278                 nsrv4->rfs4_state_cache_time = STATE_CACHE_TIME;
1279         if (nsrv4->rfs4_lo_state_cache_time == 0)
1280                 nsrv4->rfs4_lo_state_cache_time = LO_STATE_CACHE_TIME;
1281         if (nsrv4->rfs4_lockowner_cache_time == 0)
1282                 nsrv4->rfs4_lockowner_cache_time = LOCKOWNER_CACHE_TIME;
1283         if (nsrv4->rfs4_file_cache_time == 0)
1284                 nsrv4->rfs4_file_cache_time = FILE_CACHE_TIME;
1285         if (nsrv4->rfs4_deleg_state_cache_time == 0)
1286                 nsrv4->rfs4_deleg_state_cache_time = DELEG_STATE_CACHE_TIME;
1287 
1288         /* Create the overall database to hold all server state */
1289         nsrv4->nfs4_server_state = rfs4_database_create(rfs4_database_debug);
1290 
1291         /* Now create the individual tables */
1292         nsrv4->rfs4_client_cache_time *= rfs4_lease_time;
1293         nsrv4->rfs4_client_tab = rfs4_table_create(nsrv4->nfs4_server_state,
1294             "Client",
1295             nsrv4->rfs4_client_cache_time,
1296             2,
1297             rfs4_client_create,
1298             rfs4_client_destroy,
1299             rfs4_client_expiry,
1300             sizeof (rfs4_client_t),
1301             TABSIZE,
1302             MAXTABSZ/8, 100);
1303         nsrv4->rfs4_nfsclnt_idx = rfs4_index_create(nsrv4->rfs4_client_tab,
1304             "nfs_client_id4", nfsclnt_hash,
1305             nfsclnt_compare, nfsclnt_mkkey,
1306             TRUE);
1307         nsrv4->rfs4_clientid_idx = rfs4_index_create(nsrv4->rfs4_client_tab,
1308             "client_id", clientid_hash,
1309             clientid_compare, clientid_mkkey,
1310             FALSE);
1311 
1312         nsrv4->rfs4_clntip_cache_time = 86400 * 365; /* about a year */
1313         nsrv4->rfs4_clntip_tab = rfs4_table_create(nsrv4->nfs4_server_state,
1314             "ClntIP",
1315             nsrv4->rfs4_clntip_cache_time,
1316             1,
1317             rfs4_clntip_create,
1318             rfs4_clntip_destroy,
1319             rfs4_clntip_expiry,
1320             sizeof (rfs4_clntip_t),
1321             TABSIZE,
1322             MAXTABSZ, 100);
1323         nsrv4->rfs4_clntip_idx = rfs4_index_create(nsrv4->rfs4_clntip_tab,
1324             "client_ip", clntip_hash,
1325             clntip_compare, clntip_mkkey,
1326             TRUE);
1327 
1328         nsrv4->rfs4_openowner_cache_time *= rfs4_lease_time;
1329         nsrv4->rfs4_openowner_tab = rfs4_table_create(nsrv4->nfs4_server_state,
1330             "OpenOwner",
1331             nsrv4->rfs4_openowner_cache_time,
1332             1,
1333             rfs4_openowner_create,
1334             rfs4_openowner_destroy,
1335             rfs4_openowner_expiry,
1336             sizeof (rfs4_openowner_t),
1337             TABSIZE,
1338             MAXTABSZ, 100);
1339         nsrv4->rfs4_openowner_idx = rfs4_index_create(nsrv4->rfs4_openowner_tab,
1340             "open_owner4", openowner_hash,
1341             openowner_compare,
1342             openowner_mkkey, TRUE);
1343 
1344         nsrv4->rfs4_state_cache_time *= rfs4_lease_time;
1345         nsrv4->rfs4_state_tab = rfs4_table_create(nsrv4->nfs4_server_state,
1346             "OpenStateID",
1347             nsrv4->rfs4_state_cache_time,
1348             3,
1349             rfs4_state_create,
1350             rfs4_state_destroy,
1351             rfs4_state_expiry,
1352             sizeof (rfs4_state_t),
1353             TABSIZE,
1354             MAXTABSZ, 100);
1355 
1356         /* CSTYLED */
1357         nsrv4->rfs4_state_owner_file_idx = rfs4_index_create(nsrv4->rfs4_state_tab,
1358             "Openowner-File",
1359             state_owner_file_hash,
1360             state_owner_file_compare,
1361             state_owner_file_mkkey, TRUE);
1362 
1363         nsrv4->rfs4_state_idx = rfs4_index_create(nsrv4->rfs4_state_tab,
1364             "State-id", state_hash,
1365             state_compare, state_mkkey, FALSE);
1366 
1367         nsrv4->rfs4_state_file_idx = rfs4_index_create(nsrv4->rfs4_state_tab,
1368             "File", state_file_hash,
1369             state_file_compare, state_file_mkkey,
1370             FALSE);
1371 
1372         nsrv4->rfs4_lo_state_cache_time *= rfs4_lease_time;
1373         nsrv4->rfs4_lo_state_tab = rfs4_table_create(nsrv4->nfs4_server_state,
1374             "LockStateID",
1375             nsrv4->rfs4_lo_state_cache_time,
1376             2,
1377             rfs4_lo_state_create,
1378             rfs4_lo_state_destroy,
1379             rfs4_lo_state_expiry,
1380             sizeof (rfs4_lo_state_t),
1381             TABSIZE,
1382             MAXTABSZ, 100);
1383 
1384         /* CSTYLED */
1385         nsrv4->rfs4_lo_state_owner_idx = rfs4_index_create(nsrv4->rfs4_lo_state_tab,
1386             "lockownerxstate",
1387             lo_state_lo_hash,
1388             lo_state_lo_compare,
1389             lo_state_lo_mkkey, TRUE);
1390 
1391         nsrv4->rfs4_lo_state_idx = rfs4_index_create(nsrv4->rfs4_lo_state_tab,
1392             "State-id",
1393             lo_state_hash, lo_state_compare,
1394             lo_state_mkkey, FALSE);
1395 
1396         nsrv4->rfs4_lockowner_cache_time *= rfs4_lease_time;
1397 
1398         nsrv4->rfs4_lockowner_tab = rfs4_table_create(nsrv4->nfs4_server_state,
1399             "Lockowner",
1400             nsrv4->rfs4_lockowner_cache_time,
1401             2,
1402             rfs4_lockowner_create,
1403             rfs4_lockowner_destroy,
1404             rfs4_lockowner_expiry,
1405             sizeof (rfs4_lockowner_t),
1406             TABSIZE,
1407             MAXTABSZ, 100);
1408 
1409         nsrv4->rfs4_lockowner_idx = rfs4_index_create(nsrv4->rfs4_lockowner_tab,
1410             "lock_owner4", lockowner_hash,
1411             lockowner_compare,
1412             lockowner_mkkey, TRUE);
1413 
1414         /* CSTYLED */
1415         nsrv4->rfs4_lockowner_pid_idx = rfs4_index_create(nsrv4->rfs4_lockowner_tab,
1416             "pid", pid_hash,
1417             pid_compare, pid_mkkey,
1418             FALSE);
1419 
1420         nsrv4->rfs4_file_cache_time *= rfs4_lease_time;
1421         nsrv4->rfs4_file_tab = rfs4_table_create(nsrv4->nfs4_server_state,
1422             "File",
1423             nsrv4->rfs4_file_cache_time,
1424             1,
1425             rfs4_file_create,
1426             rfs4_file_destroy,
1427             NULL,
1428             sizeof (rfs4_file_t),
1429             TABSIZE,
1430             MAXTABSZ, -1);
1431 
1432         nsrv4->rfs4_file_idx = rfs4_index_create(nsrv4->rfs4_file_tab,
1433             "Filehandle", file_hash,
1434             file_compare, file_mkkey, TRUE);
1435 
1436         nsrv4->rfs4_deleg_state_cache_time *= rfs4_lease_time;
1437         /* CSTYLED */
1438         nsrv4->rfs4_deleg_state_tab = rfs4_table_create(nsrv4->nfs4_server_state,
1439             "DelegStateID",
1440             nsrv4->rfs4_deleg_state_cache_time,
1441             2,
1442             rfs4_deleg_state_create,
1443             rfs4_deleg_state_destroy,
1444             rfs4_deleg_state_expiry,
1445             sizeof (rfs4_deleg_state_t),
1446             TABSIZE,
1447             MAXTABSZ, 100);
1448         nsrv4->rfs4_deleg_idx = rfs4_index_create(nsrv4->rfs4_deleg_state_tab,
1449             "DelegByFileClient",
1450             deleg_hash,
1451             deleg_compare,
1452             deleg_mkkey, TRUE);
1453 
1454         /* CSTYLED */
1455         nsrv4->rfs4_deleg_state_idx = rfs4_index_create(nsrv4->rfs4_deleg_state_tab,
1456             "DelegState",
1457             deleg_state_hash,
1458             deleg_state_compare,
1459             deleg_state_mkkey, FALSE);
1460 
1461         mutex_exit(&nsrv4->state_lock);
1462 
1463         /*
1464          * Init the stable storage.
1465          */
1466         rfs4_ss_init(nsrv4);




1467 }
1468 

1469 /*
1470  * Used at server shutdown to cleanup all of NFSv4 server's zone structures
1471  * and state.
1472  */
1473 void
1474 rfs4_state_zone_fini()
1475 {
1476         rfs4_database_t *dbp;
1477         nfs4_srv_t *nsrv4;
1478         nsrv4 = zone_getspecific(rfs4_zone_key, curzone);
1479 
1480         rfs4_set_deleg_policy(nsrv4, SRV_NEVER_DELEGATE);
1481 
1482         mutex_enter(&nsrv4->state_lock);
1483 
1484         if (nsrv4->nfs4_server_state == NULL) {
1485                 mutex_exit(&nsrv4->state_lock);
1486                 return;
1487         }
1488 
1489         /* destroy server instances and current instance ptr */
1490         rfs4_servinst_destroy_all(nsrv4);
1491 
1492         /* reset the "first NFSv4 request" status */
1493         nsrv4->seen_first_compound = 0;

1494 
1495         dbp = nsrv4->nfs4_server_state;
1496         nsrv4->nfs4_server_state = NULL;



1497 
1498         rw_destroy(&nsrv4->rfs4_findclient_lock);
1499 
1500         /* First stop all of the reaper threads in the database */
1501         rfs4_database_shutdown(dbp);
1502         /*
1503          * XXX workaround
1504          * Skip destrying the state database yet just in case there
1505          * are unfinished operations depending on it.
1506          */
1507         /* Now destroy/release the database tables */
1508         /* rfs4_database_destroy(dbp); */
1509 
1510         /* Reset the cache timers for next time */
1511         nsrv4->rfs4_client_cache_time = 0;
1512         nsrv4->rfs4_openowner_cache_time = 0;
1513         nsrv4->rfs4_state_cache_time = 0;
1514         nsrv4->rfs4_lo_state_cache_time = 0;
1515         nsrv4->rfs4_lockowner_cache_time = 0;
1516         nsrv4->rfs4_file_cache_time = 0;
1517         nsrv4->rfs4_deleg_state_cache_time = 0;
1518 
1519         mutex_exit(&nsrv4->state_lock);
1520 
1521         /* clean up any dangling stable storage structures */
1522         rfs4_ss_fini(nsrv4);








1523 }
1524 
1525 typedef union {
1526         struct {
1527                 uint32_t start_time;
1528                 uint32_t c_id;
1529         } impl_id;
1530         clientid4 id4;
1531 } cid;
1532 
1533 static int foreign_stateid(stateid_t *id);
1534 static int foreign_clientid(cid *cidp);
1535 static void embed_nodeid(cid *cidp);
1536 
1537 typedef union {
1538         struct {
1539                 uint32_t c_id;
1540                 uint32_t gen_num;
1541         } cv_impl;
1542         verifier4       confirm_verf;


1617          * If the sysadmin has used clear_locks for this
1618          * entry then forced_expire will be set and we
1619          * want this entry to be reaped. Or the entry
1620          * has exceeded its lease period.
1621          */
1622         cp_expired = (cp->rc_forced_expire ||
1623             (gethrestime_sec() - cp->rc_last_access
1624             > rfs4_lease_time));
1625 
1626         if (!cp->rc_ss_remove && cp_expired)
1627                 cp->rc_ss_remove = 1;
1628         return (cp_expired);
1629 }
1630 
1631 /*
1632  * Remove the leaf file from all distributed stable storage paths.
1633  */
1634 static void
1635 rfs4_dss_remove_cpleaf(rfs4_client_t *cp)
1636 {
1637         nfs4_srv_t *nsrv4;
1638         rfs4_servinst_t *sip;
1639         char *leaf = cp->rc_ss_pn->leaf;
1640 
1641         /*
1642          * since the state files are written to all DSS
1643          * paths we must remove this leaf file instance
1644          * from all server instances.
1645          */
1646 
1647         nsrv4 = zone_getspecific(rfs4_zone_key, curzone);
1648         mutex_enter(&nsrv4->servinst_lock);
1649         for (sip = nsrv4->nfs4_cur_servinst; sip != NULL; sip = sip->prev) {
1650                 /* remove the leaf file associated with this server instance */
1651                 rfs4_dss_remove_leaf(sip, NFS4_DSS_STATE_LEAF, leaf);
1652         }
1653         mutex_exit(&nsrv4->servinst_lock);
1654 }
1655 
1656 static void
1657 rfs4_dss_remove_leaf(rfs4_servinst_t *sip, char *dir_leaf, char *leaf)
1658 {
1659         int i, npaths = sip->dss_npaths;
1660 
1661         for (i = 0; i < npaths; i++) {
1662                 rfs4_dss_path_t *dss_path = sip->dss_paths[i];
1663                 char *path, *dir;
1664                 size_t pathlen;
1665 
1666                 /* the HA-NFSv4 path might have been failed-over away from us */
1667                 if (dss_path == NULL)
1668                         continue;
1669 
1670                 dir = dss_path->path;
1671 
1672                 /* allow 3 extra bytes for two '/' & a NUL */
1673                 pathlen = strlen(dir) + strlen(dir_leaf) + strlen(leaf) + 3;


1701                 if (cp->rc_ss_remove)
1702                         rfs4_dss_remove_cpleaf(cp);
1703                 rfs4_ss_pnfree(cp->rc_ss_pn);
1704         }
1705 
1706         /* Free the client supplied client id */
1707         kmem_free(cp->rc_nfs_client.id_val, cp->rc_nfs_client.id_len);
1708 
1709         if (cp->rc_sysidt != LM_NOSYSID)
1710                 lm_free_sysidt(cp->rc_sysidt);
1711 }
1712 
1713 static bool_t
1714 rfs4_client_create(rfs4_entry_t u_entry, void *arg)
1715 {
1716         rfs4_client_t *cp = (rfs4_client_t *)u_entry;
1717         nfs_client_id4 *client = (nfs_client_id4 *)arg;
1718         struct sockaddr *ca;
1719         cid *cidp;
1720         scid_confirm_verf *scvp;
1721         nfs4_srv_t *nsrv4;
1722 
1723         nsrv4 = zone_getspecific(rfs4_zone_key, curzone);
1724 
1725         /* Get a clientid to give to the client */
1726         cidp = (cid *)&cp->rc_clientid;
1727         cidp->impl_id.start_time = nsrv4->rfs4_start_time;
1728         cidp->impl_id.c_id = (uint32_t)rfs4_dbe_getid(cp->rc_dbe);
1729 
1730         /* If we are booted as a cluster node, embed our nodeid */
1731         if (cluster_bootflags & CLUSTER_BOOTED)
1732                 embed_nodeid(cidp);
1733 
1734         /* Allocate and copy client's client id value */
1735         cp->rc_nfs_client.id_val = kmem_alloc(client->id_len, KM_SLEEP);
1736         cp->rc_nfs_client.id_len = client->id_len;
1737         bcopy(client->id_val, cp->rc_nfs_client.id_val, client->id_len);
1738         cp->rc_nfs_client.verifier = client->verifier;
1739 
1740         /* Copy client's IP address */
1741         ca = client->cl_addr;
1742         if (ca->sa_family == AF_INET)
1743                 bcopy(ca, &cp->rc_addr, sizeof (struct sockaddr_in));
1744         else if (ca->sa_family == AF_INET6)
1745                 bcopy(ca, &cp->rc_addr, sizeof (struct sockaddr_in6));
1746         cp->rc_nfs_client.cl_addr = (struct sockaddr *)&cp->rc_addr;
1747 


1765 
1766         cp->rc_cr_set = NULL;
1767 
1768         cp->rc_sysidt = LM_NOSYSID;
1769 
1770         list_create(&cp->rc_openownerlist, sizeof (rfs4_openowner_t),
1771             offsetof(rfs4_openowner_t, ro_node));
1772 
1773         /* set up the callback control structure */
1774         cp->rc_cbinfo.cb_state = CB_UNINIT;
1775         mutex_init(cp->rc_cbinfo.cb_lock, NULL, MUTEX_DEFAULT, NULL);
1776         cv_init(cp->rc_cbinfo.cb_cv, NULL, CV_DEFAULT, NULL);
1777         cv_init(cp->rc_cbinfo.cb_cv_nullcaller, NULL, CV_DEFAULT, NULL);
1778 
1779         /*
1780          * Associate the client_t with the current server instance.
1781          * The hold is solely to satisfy the calling requirement of
1782          * rfs4_servinst_assign(). In this case it's not strictly necessary.
1783          */
1784         rfs4_dbe_hold(cp->rc_dbe);
1785         rfs4_servinst_assign(nsrv4, cp, nsrv4->nfs4_cur_servinst);
1786         rfs4_dbe_rele(cp->rc_dbe);
1787 
1788         return (TRUE);
1789 }
1790 
1791 /*
1792  * Caller wants to generate/update the setclientid_confirm verifier
1793  * associated with a client.  This is done during the SETCLIENTID
1794  * processing.
1795  */
1796 void
1797 rfs4_client_scv_next(rfs4_client_t *cp)
1798 {
1799         scid_confirm_verf *scvp;
1800 
1801         /* Init the value for the SETCLIENTID_CONFIRM verifier */
1802         scvp = (scid_confirm_verf *)&cp->rc_confirm_verf;
1803         scvp->cv_impl.gen_num++;
1804 }
1805 
1806 void
1807 rfs4_client_rele(rfs4_client_t *cp)
1808 {
1809         rfs4_dbe_rele(cp->rc_dbe);
1810 }
1811 
1812 rfs4_client_t *
1813 rfs4_findclient(nfs_client_id4 *client, bool_t *create, rfs4_client_t *oldcp)
1814 {
1815         rfs4_client_t *cp;
1816         nfs4_srv_t *nsrv4;
1817         nsrv4 = zone_getspecific(rfs4_zone_key, curzone);
1818 
1819 
1820         if (oldcp) {
1821                 rw_enter(&nsrv4->rfs4_findclient_lock, RW_WRITER);
1822                 rfs4_dbe_hide(oldcp->rc_dbe);
1823         } else {
1824                 rw_enter(&nsrv4->rfs4_findclient_lock, RW_READER);
1825         }
1826 
1827         cp = (rfs4_client_t *)rfs4_dbsearch(nsrv4->rfs4_nfsclnt_idx, client,
1828             create, (void *)client, RFS4_DBS_VALID);
1829 
1830         if (oldcp)
1831                 rfs4_dbe_unhide(oldcp->rc_dbe);
1832 
1833         rw_exit(&nsrv4->rfs4_findclient_lock);
1834 
1835         return (cp);
1836 }
1837 
1838 rfs4_client_t *
1839 rfs4_findclient_by_id(clientid4 clientid, bool_t find_unconfirmed)
1840 {
1841         rfs4_client_t *cp;
1842         bool_t create = FALSE;
1843         cid *cidp = (cid *)&clientid;
1844         nfs4_srv_t *nsrv4 = zone_getspecific(rfs4_zone_key, curzone);
1845 
1846         /* If we're a cluster and the nodeid isn't right, short-circuit */
1847         if (cluster_bootflags & CLUSTER_BOOTED && foreign_clientid(cidp))
1848                 return (NULL);
1849 
1850         rw_enter(&nsrv4->rfs4_findclient_lock, RW_READER);
1851 
1852         cp = (rfs4_client_t *)rfs4_dbsearch(nsrv4->rfs4_clientid_idx, &clientid,
1853             &create, NULL, RFS4_DBS_VALID);
1854 
1855         rw_exit(&nsrv4->rfs4_findclient_lock);
1856 
1857         if (cp && cp->rc_need_confirm && find_unconfirmed == FALSE) {
1858                 rfs4_client_rele(cp);
1859                 return (NULL);
1860         } else {
1861                 return (cp);
1862         }
1863 }
1864 
1865 static uint32_t
1866 clntip_hash(void *key)
1867 {
1868         struct sockaddr *addr = key;
1869         int i, len = 0;
1870         uint32_t hash = 0;
1871         char *ptr;
1872 
1873         if (addr->sa_family == AF_INET) {
1874                 struct sockaddr_in *a = (struct sockaddr_in *)addr;
1875                 len = sizeof (struct in_addr);


1943 {
1944         rfs4_clntip_t *cp = (rfs4_clntip_t *)u_entry;
1945         struct sockaddr *ca = (struct sockaddr *)arg;
1946 
1947         /* Copy client's IP address */
1948         if (ca->sa_family == AF_INET)
1949                 bcopy(ca, &cp->ri_addr, sizeof (struct sockaddr_in));
1950         else if (ca->sa_family == AF_INET6)
1951                 bcopy(ca, &cp->ri_addr, sizeof (struct sockaddr_in6));
1952         else
1953                 return (FALSE);
1954         cp->ri_no_referrals = 1;
1955 
1956         return (TRUE);
1957 }
1958 
1959 rfs4_clntip_t *
1960 rfs4_find_clntip(struct sockaddr *addr, bool_t *create)
1961 {
1962         rfs4_clntip_t *cp;
1963         nfs4_srv_t *nsrv4;
1964 
1965         nsrv4 = zone_getspecific(rfs4_zone_key, curzone);
1966 
1967         rw_enter(&nsrv4->rfs4_findclient_lock, RW_READER);
1968 
1969         cp = (rfs4_clntip_t *)rfs4_dbsearch(nsrv4->rfs4_clntip_idx, addr,
1970             create, addr, RFS4_DBS_VALID);
1971 
1972         rw_exit(&nsrv4->rfs4_findclient_lock);
1973 
1974         return (cp);
1975 }
1976 
1977 void
1978 rfs4_invalidate_clntip(struct sockaddr *addr)
1979 {
1980         rfs4_clntip_t *cp;
1981         bool_t create = FALSE;
1982         nfs4_srv_t *nsrv4 = zone_getspecific(rfs4_zone_key, curzone);
1983 
1984         rw_enter(&nsrv4->rfs4_findclient_lock, RW_READER);
1985 
1986         cp = (rfs4_clntip_t *)rfs4_dbsearch(nsrv4->rfs4_clntip_idx, addr,
1987             &create, NULL, RFS4_DBS_VALID);
1988         if (cp == NULL) {
1989                 rw_exit(&nsrv4->rfs4_findclient_lock);
1990                 return;
1991         }
1992         rfs4_dbe_invalidate(cp->ri_dbe);
1993         rfs4_dbe_rele(cp->ri_dbe);
1994 
1995         rw_exit(&nsrv4->rfs4_findclient_lock);
1996 }
1997 
1998 bool_t
1999 rfs4_lease_expired(rfs4_client_t *cp)
2000 {
2001         bool_t rc;
2002 
2003         rfs4_dbe_lock(cp->rc_dbe);
2004 
2005         /*
2006          * If the admin has executed clear_locks for this
2007          * client id, force expire will be set, so no need
2008          * to calculate anything because it's "outa here".
2009          */
2010         if (cp->rc_forced_expire) {
2011                 rc = TRUE;
2012         } else {
2013                 rc = (gethrestime_sec() - cp->rc_last_access > rfs4_lease_time);
2014         }
2015 


2123 
2124         /* Free the lock owner id */
2125         kmem_free(oo->ro_owner.owner_val, oo->ro_owner.owner_len);
2126 }
2127 
2128 void
2129 rfs4_openowner_rele(rfs4_openowner_t *oo)
2130 {
2131         rfs4_dbe_rele(oo->ro_dbe);
2132 }
2133 
2134 static bool_t
2135 rfs4_openowner_create(rfs4_entry_t u_entry, void *arg)
2136 {
2137         rfs4_openowner_t *oo = (rfs4_openowner_t *)u_entry;
2138         rfs4_openowner_t *argp = (rfs4_openowner_t *)arg;
2139         open_owner4 *openowner = &argp->ro_owner;
2140         seqid4 seqid = argp->ro_open_seqid;
2141         rfs4_client_t *cp;
2142         bool_t create = FALSE;
2143         nfs4_srv_t *nsrv4 = zone_getspecific(rfs4_zone_key, curzone);
2144 
2145         rw_enter(&nsrv4->rfs4_findclient_lock, RW_READER);
2146 
2147         cp = (rfs4_client_t *)rfs4_dbsearch(nsrv4->rfs4_clientid_idx,
2148             &openowner->clientid,
2149             &create, NULL, RFS4_DBS_VALID);
2150 
2151         rw_exit(&nsrv4->rfs4_findclient_lock);
2152 
2153         if (cp == NULL)
2154                 return (FALSE);
2155 
2156         oo->ro_reply_fh.nfs_fh4_len = 0;
2157         oo->ro_reply_fh.nfs_fh4_val = NULL;
2158 
2159         oo->ro_owner.clientid = openowner->clientid;
2160         oo->ro_owner.owner_val =
2161             kmem_alloc(openowner->owner_len, KM_SLEEP);
2162 
2163         bcopy(openowner->owner_val,
2164             oo->ro_owner.owner_val, openowner->owner_len);
2165 
2166         oo->ro_owner.owner_len = openowner->owner_len;
2167 
2168         oo->ro_need_confirm = TRUE;
2169 
2170         rfs4_sw_init(&oo->ro_sw);
2171 


2173         bzero(&oo->ro_reply, sizeof (nfs_resop4));
2174         oo->ro_client = cp;
2175         oo->ro_cr_set = NULL;
2176 
2177         list_create(&oo->ro_statelist, sizeof (rfs4_state_t),
2178             offsetof(rfs4_state_t, rs_node));
2179 
2180         /* Insert openowner into client's open owner list */
2181         rfs4_dbe_lock(cp->rc_dbe);
2182         list_insert_tail(&cp->rc_openownerlist, oo);
2183         rfs4_dbe_unlock(cp->rc_dbe);
2184 
2185         return (TRUE);
2186 }
2187 
2188 rfs4_openowner_t *
2189 rfs4_findopenowner(open_owner4 *openowner, bool_t *create, seqid4 seqid)
2190 {
2191         rfs4_openowner_t *oo;
2192         rfs4_openowner_t arg;
2193         nfs4_srv_t *nsrv4 = zone_getspecific(rfs4_zone_key, curzone);
2194 
2195         arg.ro_owner = *openowner;
2196         arg.ro_open_seqid = seqid;
2197         /* CSTYLED */
2198         oo = (rfs4_openowner_t *)rfs4_dbsearch(nsrv4->rfs4_openowner_idx, openowner,
2199             create, &arg, RFS4_DBS_VALID);
2200 
2201         return (oo);
2202 }
2203 
2204 void
2205 rfs4_update_open_sequence(rfs4_openowner_t *oo)
2206 {
2207 
2208         rfs4_dbe_lock(oo->ro_dbe);
2209 
2210         oo->ro_open_seqid++;
2211 
2212         rfs4_dbe_unlock(oo->ro_dbe);
2213 }
2214 
2215 void
2216 rfs4_update_open_resp(rfs4_openowner_t *oo, nfs_resop4 *resp, nfs_fh4 *fh)
2217 {
2218 


2321 }
2322 
2323 /* ARGSUSED */
2324 static bool_t
2325 rfs4_lockowner_expiry(rfs4_entry_t u_entry)
2326 {
2327         /*
2328          * Since expiry is called with no other references on
2329          * this struct, go ahead and have it removed.
2330          */
2331         return (TRUE);
2332 }
2333 
2334 static bool_t
2335 rfs4_lockowner_create(rfs4_entry_t u_entry, void *arg)
2336 {
2337         rfs4_lockowner_t *lo = (rfs4_lockowner_t *)u_entry;
2338         lock_owner4 *lockowner = (lock_owner4 *)arg;
2339         rfs4_client_t *cp;
2340         bool_t create = FALSE;
2341         nfs4_srv_t *nsrv4 = zone_getspecific(rfs4_zone_key, curzone);
2342 
2343         rw_enter(&nsrv4->rfs4_findclient_lock, RW_READER);
2344 
2345         cp = (rfs4_client_t *)rfs4_dbsearch(nsrv4->rfs4_clientid_idx,
2346             &lockowner->clientid,
2347             &create, NULL, RFS4_DBS_VALID);
2348 
2349         rw_exit(&nsrv4->rfs4_findclient_lock);
2350 
2351         if (cp == NULL)
2352                 return (FALSE);
2353 
2354         /* Reference client */
2355         lo->rl_client = cp;
2356         lo->rl_owner.clientid = lockowner->clientid;
2357         lo->rl_owner.owner_val = kmem_alloc(lockowner->owner_len, KM_SLEEP);
2358         bcopy(lockowner->owner_val, lo->rl_owner.owner_val,
2359             lockowner->owner_len);
2360         lo->rl_owner.owner_len = lockowner->owner_len;
2361         lo->rl_pid = rfs4_dbe_getid(lo->rl_dbe);
2362 
2363         return (TRUE);
2364 }
2365 
2366 rfs4_lockowner_t *
2367 rfs4_findlockowner(lock_owner4 *lockowner, bool_t *create)
2368 {
2369         rfs4_lockowner_t *lo;
2370         nfs4_srv_t *nsrv4 = zone_getspecific(rfs4_zone_key, curzone);
2371 
2372         /* CSTYLED */
2373         lo = (rfs4_lockowner_t *)rfs4_dbsearch(nsrv4->rfs4_lockowner_idx, lockowner,
2374             create, lockowner, RFS4_DBS_VALID);
2375 
2376         return (lo);
2377 }
2378 
2379 rfs4_lockowner_t *
2380 rfs4_findlockowner_by_pid(pid_t pid)
2381 {
2382         rfs4_lockowner_t *lo;
2383         bool_t create = FALSE;
2384         nfs4_srv_t *nsrv4 = zone_getspecific(rfs4_zone_key, curzone);
2385 
2386         lo = (rfs4_lockowner_t *)rfs4_dbsearch(nsrv4->rfs4_lockowner_pid_idx,
2387             (void *)(uintptr_t)pid, &create, NULL, RFS4_DBS_VALID);
2388 
2389         return (lo);
2390 }
2391 
2392 
2393 static uint32_t
2394 file_hash(void *key)
2395 {
2396         return (ADDRHASH(key));
2397 }
2398 
2399 static void *
2400 file_mkkey(rfs4_entry_t u_entry)
2401 {
2402         rfs4_file_t *fp = (rfs4_file_t *)u_entry;
2403 
2404         return (fp->rf_vp);
2405 }
2406 


2477 
2478         mutex_init(fp->rf_dinfo.rd_recall_lock, NULL, MUTEX_DEFAULT, NULL);
2479         cv_init(fp->rf_dinfo.rd_recall_cv, NULL, CV_DEFAULT, NULL);
2480 
2481         fp->rf_dinfo.rd_dtype = OPEN_DELEGATE_NONE;
2482 
2483         rw_init(&fp->rf_file_rwlock, NULL, RW_DEFAULT, NULL);
2484 
2485         mutex_enter(&vp->v_vsd_lock);
2486         VERIFY(vsd_set(vp, nfs4_srv_vkey, (void *)fp) == 0);
2487         mutex_exit(&vp->v_vsd_lock);
2488 
2489         return (TRUE);
2490 }
2491 
2492 rfs4_file_t *
2493 rfs4_findfile(vnode_t *vp, nfs_fh4 *fh, bool_t *create)
2494 {
2495         rfs4_file_t *fp;
2496         rfs4_fcreate_arg arg;
2497         nfs4_srv_t *nsrv4 = zone_getspecific(rfs4_zone_key, curzone);
2498 
2499         arg.vp = vp;
2500         arg.fh = fh;
2501 
2502         if (*create == TRUE)
2503                 /* CSTYLED */
2504                 fp = (rfs4_file_t *)rfs4_dbsearch(nsrv4->rfs4_file_idx, vp, create,
2505                     &arg, RFS4_DBS_VALID);
2506         else {
2507                 mutex_enter(&vp->v_vsd_lock);
2508                 fp = (rfs4_file_t *)vsd_get(vp, nfs4_srv_vkey);
2509                 if (fp) {
2510                         rfs4_dbe_lock(fp->rf_dbe);
2511                         if (rfs4_dbe_is_invalid(fp->rf_dbe) ||
2512                             (rfs4_dbe_refcnt(fp->rf_dbe) == 0)) {
2513                                 rfs4_dbe_unlock(fp->rf_dbe);
2514                                 fp = NULL;
2515                         } else {
2516                                 rfs4_dbe_hold(fp->rf_dbe);
2517                                 rfs4_dbe_unlock(fp->rf_dbe);
2518                         }
2519                 }
2520                 mutex_exit(&vp->v_vsd_lock);
2521         }
2522         return (fp);
2523 }
2524 
2525 /*
2526  * Find a file in the db and once it is located, take the rw lock.
2527  * Need to check the vnode pointer and if it does not exist (it was
2528  * removed between the db location and check) redo the find.  This
2529  * assumes that a file struct that has a NULL vnode pointer is marked
2530  * at 'invalid' and will not be found in the db the second time
2531  * around.
2532  */
2533 rfs4_file_t *
2534 rfs4_findfile_withlock(vnode_t *vp, nfs_fh4 *fh, bool_t *create)
2535 {
2536         rfs4_file_t *fp;
2537         rfs4_fcreate_arg arg;
2538         bool_t screate = *create;
2539         nfs4_srv_t *nsrv4 = zone_getspecific(rfs4_zone_key, curzone);
2540 
2541         if (screate == FALSE) {
2542                 mutex_enter(&vp->v_vsd_lock);
2543                 fp = (rfs4_file_t *)vsd_get(vp, nfs4_srv_vkey);
2544                 if (fp) {
2545                         rfs4_dbe_lock(fp->rf_dbe);
2546                         if (rfs4_dbe_is_invalid(fp->rf_dbe) ||
2547                             (rfs4_dbe_refcnt(fp->rf_dbe) == 0)) {
2548                                 rfs4_dbe_unlock(fp->rf_dbe);
2549                                 mutex_exit(&vp->v_vsd_lock);
2550                                 fp = NULL;
2551                         } else {
2552                                 rfs4_dbe_hold(fp->rf_dbe);
2553                                 rfs4_dbe_unlock(fp->rf_dbe);
2554                                 mutex_exit(&vp->v_vsd_lock);
2555                                 rw_enter(&fp->rf_file_rwlock, RW_WRITER);
2556                                 if (fp->rf_vp == NULL) {
2557                                         rw_exit(&fp->rf_file_rwlock);
2558                                         rfs4_file_rele(fp);
2559                                         fp = NULL;
2560                                 }
2561                         }
2562                 } else {
2563                         mutex_exit(&vp->v_vsd_lock);
2564                 }
2565         } else {
2566 retry:
2567                 arg.vp = vp;
2568                 arg.fh = fh;
2569 
2570                 fp = (rfs4_file_t *)rfs4_dbsearch(nsrv4->rfs4_file_idx, vp,
2571                     create, &arg, RFS4_DBS_VALID);
2572                 if (fp != NULL) {
2573                         rw_enter(&fp->rf_file_rwlock, RW_WRITER);
2574                         if (fp->rf_vp == NULL) {
2575                                 rw_exit(&fp->rf_file_rwlock);
2576                                 rfs4_file_rele(fp);
2577                                 *create = screate;
2578                                 goto retry;
2579                         }
2580                 }
2581         }
2582 
2583         return (fp);
2584 }
2585 
2586 static uint32_t
2587 lo_state_hash(void *key)
2588 {
2589         stateid_t *id = key;
2590 
2591         return (id->bits.ident+id->bits.pid);


2706         list_insert_tail(&sp->rs_lostatelist, lsp);
2707         rfs4_dbe_hold(sp->rs_dbe);
2708         rfs4_dbe_unlock(sp->rs_dbe);
2709 
2710         return (TRUE);
2711 }
2712 
2713 void
2714 rfs4_lo_state_rele(rfs4_lo_state_t *lsp, bool_t unlock_fp)
2715 {
2716         if (unlock_fp == TRUE)
2717                 rw_exit(&lsp->rls_state->rs_finfo->rf_file_rwlock);
2718         rfs4_dbe_rele(lsp->rls_dbe);
2719 }
2720 
2721 static rfs4_lo_state_t *
2722 rfs4_findlo_state(stateid_t *id, bool_t lock_fp)
2723 {
2724         rfs4_lo_state_t *lsp;
2725         bool_t create = FALSE;
2726         nfs4_srv_t *nsrv4 = zone_getspecific(rfs4_zone_key, curzone);
2727 
2728         lsp = (rfs4_lo_state_t *)rfs4_dbsearch(nsrv4->rfs4_lo_state_idx, id,
2729             &create, NULL, RFS4_DBS_VALID);
2730         if (lock_fp == TRUE && lsp != NULL)
2731                 rw_enter(&lsp->rls_state->rs_finfo->rf_file_rwlock, RW_READER);
2732 
2733         return (lsp);
2734 }
2735 
2736 
2737 static uint32_t
2738 lo_state_lo_hash(void *key)
2739 {
2740         rfs4_lo_state_t *lsp = key;
2741 
2742         return (ADDRHASH(lsp->rls_locker) ^ ADDRHASH(lsp->rls_state));
2743 }
2744 
2745 static bool_t
2746 lo_state_lo_compare(rfs4_entry_t u_entry, void *key)
2747 {
2748         rfs4_lo_state_t *lsp = (rfs4_lo_state_t *)u_entry;
2749         rfs4_lo_state_t *keyp = key;
2750 
2751         return (keyp->rls_locker == lsp->rls_locker &&
2752             keyp->rls_state == lsp->rls_state);
2753 }
2754 
2755 static void *
2756 lo_state_lo_mkkey(rfs4_entry_t u_entry)
2757 {
2758         return (u_entry);
2759 }
2760 
2761 rfs4_lo_state_t *
2762 rfs4_findlo_state_by_owner(rfs4_lockowner_t *lo, rfs4_state_t *sp,
2763     bool_t *create)
2764 {
2765         rfs4_lo_state_t *lsp;
2766         rfs4_lo_state_t arg;
2767         nfs4_srv_t *nsrv4 = zone_getspecific(rfs4_zone_key, curzone);
2768 
2769         arg.rls_locker = lo;
2770         arg.rls_state = sp;
2771 
2772         lsp = (rfs4_lo_state_t *)rfs4_dbsearch(nsrv4->rfs4_lo_state_owner_idx,
2773             &arg, create, &arg, RFS4_DBS_VALID);
2774 
2775         return (lsp);
2776 }
2777 
2778 static stateid_t
2779 get_stateid(id_t eid)
2780 {
2781         stateid_t id;
2782         nfs4_srv_t *nsrv4;
2783 
2784         nsrv4 = zone_getspecific(rfs4_zone_key, curzone);
2785 
2786         id.bits.boottime = nsrv4->rfs4_start_time;
2787         id.bits.ident = eid;
2788         id.bits.chgseq = 0;
2789         id.bits.type = 0;
2790         id.bits.pid = 0;
2791 
2792         /*
2793          * If we are booted as a cluster node, embed our nodeid.
2794          * We've already done sanity checks in rfs4_client_create() so no
2795          * need to repeat them here.
2796          */
2797         id.bits.clnodeid = (cluster_bootflags & CLUSTER_BOOTED) ?
2798             clconf_get_nodeid() : 0;
2799 
2800         return (id);
2801 }
2802 
2803 /*
2804  * For use only when booted as a cluster node.
2805  * Returns TRUE if the embedded nodeid indicates that this stateid was
2806  * generated on another node.


3022 rfs4_deleg_state_destroy(rfs4_entry_t u_entry)
3023 {
3024         rfs4_deleg_state_t *dsp = (rfs4_deleg_state_t *)u_entry;
3025 
3026         /* return delegation if necessary */
3027         rfs4_return_deleg(dsp, FALSE);
3028 
3029         /* Were done with the file */
3030         rfs4_file_rele(dsp->rds_finfo);
3031         dsp->rds_finfo = NULL;
3032 
3033         /* And now with the openowner */
3034         rfs4_client_rele(dsp->rds_client);
3035         dsp->rds_client = NULL;
3036 }
3037 
3038 rfs4_deleg_state_t *
3039 rfs4_finddeleg(rfs4_state_t *sp, bool_t *create)
3040 {
3041         rfs4_deleg_state_t ds, *dsp;
3042         nfs4_srv_t *nsrv4 = zone_getspecific(rfs4_zone_key, curzone);
3043 
3044         ds.rds_client = sp->rs_owner->ro_client;
3045         ds.rds_finfo = sp->rs_finfo;
3046 
3047         dsp = (rfs4_deleg_state_t *)rfs4_dbsearch(nsrv4->rfs4_deleg_idx, &ds,
3048             create, &ds, RFS4_DBS_VALID);
3049 
3050         return (dsp);
3051 }
3052 
3053 rfs4_deleg_state_t *
3054 rfs4_finddelegstate(stateid_t *id)
3055 {
3056         rfs4_deleg_state_t *dsp;
3057         bool_t create = FALSE;
3058         nfs4_srv_t *nsrv4 = zone_getspecific(rfs4_zone_key, curzone);
3059 
3060         dsp = (rfs4_deleg_state_t *)rfs4_dbsearch(nsrv4->rfs4_deleg_state_idx,
3061             id, &create, NULL, RFS4_DBS_VALID);
3062 
3063         return (dsp);
3064 }
3065 
3066 void
3067 rfs4_deleg_state_rele(rfs4_deleg_state_t *dsp)
3068 {
3069         rfs4_dbe_rele(dsp->rds_dbe);
3070 }
3071 
3072 void
3073 rfs4_update_lock_sequence(rfs4_lo_state_t *lsp)
3074 {
3075 
3076         rfs4_dbe_lock(lsp->rls_dbe);
3077 
3078         /*
3079          * If we are skipping sequence id checking, this means that
3080          * this is the first lock request and therefore the sequence
3081          * id does not need to be updated.  This only happens on the


3160         if (sp->rs_closed == TRUE)
3161                 return (FALSE);
3162 
3163         return (fp == sp->rs_finfo);
3164 }
3165 
3166 static void *
3167 state_file_mkkey(rfs4_entry_t u_entry)
3168 {
3169         rfs4_state_t *sp = (rfs4_state_t *)u_entry;
3170 
3171         return (sp->rs_finfo);
3172 }
3173 
3174 rfs4_state_t *
3175 rfs4_findstate_by_owner_file(rfs4_openowner_t *oo, rfs4_file_t *fp,
3176     bool_t *create)
3177 {
3178         rfs4_state_t *sp;
3179         rfs4_state_t key;
3180         nfs4_srv_t *nsrv4 = zone_getspecific(rfs4_zone_key, curzone);
3181 
3182         key.rs_owner = oo;
3183         key.rs_finfo = fp;
3184 
3185         sp = (rfs4_state_t *)rfs4_dbsearch(nsrv4->rfs4_state_owner_file_idx,
3186             &key, create, &key, RFS4_DBS_VALID);
3187 
3188         return (sp);
3189 }
3190 
3191 /* This returns ANY state struct that refers to this file */
3192 static rfs4_state_t *
3193 rfs4_findstate_by_file(rfs4_file_t *fp)
3194 {
3195         bool_t create = FALSE;
3196         nfs4_srv_t *nsrv4 = zone_getspecific(rfs4_zone_key, curzone);
3197 
3198         return ((rfs4_state_t *)rfs4_dbsearch(nsrv4->rfs4_state_file_idx, fp,
3199             &create, fp, RFS4_DBS_VALID));
3200 }
3201 
3202 static bool_t
3203 rfs4_state_expiry(rfs4_entry_t u_entry)
3204 {
3205         rfs4_state_t *sp = (rfs4_state_t *)u_entry;
3206 
3207         if (rfs4_dbe_is_invalid(sp->rs_dbe))
3208                 return (TRUE);
3209 
3210         if (sp->rs_closed == TRUE &&
3211             ((gethrestime_sec() - rfs4_dbe_get_timerele(sp->rs_dbe))
3212             > rfs4_lease_time))
3213                 return (TRUE);
3214 
3215         return ((gethrestime_sec() - sp->rs_owner->ro_client->rc_last_access
3216             > rfs4_lease_time));
3217 }
3218 


3229         sp->rs_stateid.bits.type = OPENID;
3230         sp->rs_owner = oo;
3231         sp->rs_finfo = fp;
3232 
3233         list_create(&sp->rs_lostatelist, sizeof (rfs4_lo_state_t),
3234             offsetof(rfs4_lo_state_t, rls_node));
3235 
3236         /* Insert state on per open owner's list */
3237         rfs4_dbe_lock(oo->ro_dbe);
3238         list_insert_tail(&oo->ro_statelist, sp);
3239         rfs4_dbe_unlock(oo->ro_dbe);
3240 
3241         return (TRUE);
3242 }
3243 
3244 static rfs4_state_t *
3245 rfs4_findstate(stateid_t *id, rfs4_dbsearch_type_t find_invalid, bool_t lock_fp)
3246 {
3247         rfs4_state_t *sp;
3248         bool_t create = FALSE;
3249         nfs4_srv_t *nsrv4 = zone_getspecific(rfs4_zone_key, curzone);
3250 
3251         sp = (rfs4_state_t *)rfs4_dbsearch(nsrv4->rfs4_state_idx, id,
3252             &create, NULL, find_invalid);
3253         if (lock_fp == TRUE && sp != NULL)
3254                 rw_enter(&sp->rs_finfo->rf_file_rwlock, RW_READER);
3255 
3256         return (sp);
3257 }
3258 
3259 void
3260 rfs4_state_close(rfs4_state_t *sp, bool_t lock_held, bool_t close_of_client,
3261     cred_t *cr)
3262 {
3263         /* Remove the associated lo_state owners */
3264         if (!lock_held)
3265                 rfs4_dbe_lock(sp->rs_dbe);
3266 
3267         /*
3268          * If refcnt == 0, the dbe is about to be destroyed.
3269          * lock state will be released by the reaper thread.
3270          */
3271 


3299 }
3300 
3301 void
3302 rfs4_client_close(rfs4_client_t *cp)
3303 {
3304         /* Mark client as going away. */
3305         rfs4_dbe_lock(cp->rc_dbe);
3306         rfs4_dbe_invalidate(cp->rc_dbe);
3307         rfs4_dbe_unlock(cp->rc_dbe);
3308 
3309         rfs4_client_state_remove(cp);
3310 
3311         /* Release the client */
3312         rfs4_client_rele(cp);
3313 }
3314 
3315 nfsstat4
3316 rfs4_check_clientid(clientid4 *cp, int setclid_confirm)
3317 {
3318         cid *cidp = (cid *) cp;
3319         nfs4_srv_t *nsrv4;
3320 
3321         nsrv4 = zone_getspecific(rfs4_zone_key, curzone);
3322 
3323         /*
3324          * If we are booted as a cluster node, check the embedded nodeid.
3325          * If it indicates that this clientid was generated on another node,
3326          * inform the client accordingly.
3327          */
3328         if (cluster_bootflags & CLUSTER_BOOTED && foreign_clientid(cidp))
3329                 return (NFS4ERR_STALE_CLIENTID);
3330 
3331         /*
3332          * If the server start time matches the time provided
3333          * by the client (via the clientid) and this is NOT a
3334          * setclientid_confirm then return EXPIRED.
3335          */
3336         if (!setclid_confirm &&
3337             cidp->impl_id.start_time == nsrv4->rfs4_start_time)
3338                 return (NFS4ERR_EXPIRED);
3339 
3340         return (NFS4ERR_STALE_CLIENTID);
3341 }
3342 
3343 /*
3344  * This is used when a stateid has not been found amongst the
3345  * current server's state.  Check the stateid to see if it
3346  * was from this server instantiation or not.
3347  */
3348 static nfsstat4
3349 what_stateid_error(stateid_t *id, stateid_type_t type)
3350 {
3351         nfs4_srv_t *nsrv4;
3352 
3353         nsrv4 = zone_getspecific(rfs4_zone_key, curzone);
3354 
3355         /* If we are booted as a cluster node, was stateid locally generated? */
3356         if ((cluster_bootflags & CLUSTER_BOOTED) && foreign_stateid(id))
3357                 return (NFS4ERR_STALE_STATEID);
3358 
3359         /* If types don't match then no use checking further */
3360         if (type != id->bits.type)
3361                 return (NFS4ERR_BAD_STATEID);
3362 
3363         /* From a different server instantiation, return STALE */
3364         if (id->bits.boottime != nsrv4->rfs4_start_time)
3365                 return (NFS4ERR_STALE_STATEID);
3366 
3367         /*
3368          * From this server but the state is most likely beyond lease
3369          * timeout: return NFS4ERR_EXPIRED.  However, there is the
3370          * case of a delegation stateid.  For delegations, there is a
3371          * case where the state can be removed without the client's
3372          * knowledge/consent: revocation.  In the case of delegation
3373          * revocation, the delegation state will be removed and will
3374          * not be found.  If the client does something like a
3375          * DELEGRETURN or even a READ/WRITE with a delegatoin stateid
3376          * that has been revoked, the server should return BAD_STATEID
3377          * instead of the more common EXPIRED error.
3378          */
3379         if (id->bits.boottime == nsrv4->rfs4_start_time) {
3380                 if (type == DELEGID)
3381                         return (NFS4ERR_BAD_STATEID);
3382                 else
3383                         return (NFS4ERR_EXPIRED);
3384         }
3385 
3386         return (NFS4ERR_BAD_STATEID);
3387 }
3388 
3389 /*
3390  * Used later on to find the various state structs.  When called from
3391  * rfs4_check_stateid()->rfs4_get_all_state(), no file struct lock is
3392  * taken (it is not needed) and helps on the read/write path with
3393  * respect to performance.
3394  */
3395 static nfsstat4
3396 rfs4_get_state_lockit(stateid4 *stateid, rfs4_state_t **spp,
3397     rfs4_dbsearch_type_t find_invalid, bool_t lock_fp)
3398 {
3399         stateid_t *id = (stateid_t *)stateid;


3861 
3862 /*
3863  * This is a special function in that for the file struct provided the
3864  * server wants to remove/close all current state associated with the
3865  * file.  The prime use of this would be with OP_REMOVE to force the
3866  * release of state and particularly of file locks.
3867  *
3868  * There is an assumption that there is no delegations outstanding on
3869  * this file at this point.  The caller should have waited for those
3870  * to be returned or revoked.
3871  */
3872 void
3873 rfs4_close_all_state(rfs4_file_t *fp)
3874 {
3875         rfs4_state_t *sp;
3876 
3877         rfs4_dbe_lock(fp->rf_dbe);
3878 
3879 #ifdef DEBUG
3880         /* only applies when server is handing out delegations */
3881         if (nfs4_get_deleg_policy() != SRV_NEVER_DELEGATE)
3882                 ASSERT(fp->rf_dinfo.rd_hold_grant > 0);
3883 #endif
3884 
3885         /* No delegations for this file */
3886         ASSERT(list_is_empty(&fp->rf_delegstatelist));
3887 
3888         /* Make sure that it can not be found */
3889         rfs4_dbe_invalidate(fp->rf_dbe);
3890 
3891         if (fp->rf_vp == NULL) {
3892                 rfs4_dbe_unlock(fp->rf_dbe);
3893                 return;
3894         }
3895         rfs4_dbe_unlock(fp->rf_dbe);
3896 
3897         /*
3898          * Hold as writer to prevent other server threads from
3899          * processing requests related to the file while all state is
3900          * being removed.
3901          */


4071                         }
4072                         mutex_enter(&vp->v_vsd_lock);
4073                         (void) vsd_set(vp, nfs4_srv_vkey, NULL);
4074                         mutex_exit(&vp->v_vsd_lock);
4075                         VN_RELE(vp);
4076                         fp->rf_vp = NULL;
4077                 }
4078                 rfs4_dbe_invalidate(fp->rf_dbe);
4079         }
4080 }
4081 
4082 /*
4083  * Given a directory that is being unexported, cleanup/release all
4084  * state in the server that refers to objects residing underneath this
4085  * particular export.  The ordering of the release is important.
4086  * Lock_owner, then state and then file.
4087  */
4088 void
4089 rfs4_clean_state_exi(struct exportinfo *exi)
4090 {
4091         nfs4_srv_t *nsrv4;
4092 
4093         nsrv4 = zone_getspecific(rfs4_zone_key, curzone);
4094         mutex_enter(&nsrv4->state_lock);
4095 
4096         if (nsrv4->nfs4_server_state == NULL) {
4097                 mutex_exit(&nsrv4->state_lock);
4098                 return;
4099         }
4100 
4101         /* CSTYLED */
4102         rfs4_dbe_walk(nsrv4->rfs4_lo_state_tab, rfs4_lo_state_walk_callout, exi);
4103         rfs4_dbe_walk(nsrv4->rfs4_state_tab, rfs4_state_walk_callout, exi);
4104         /* CSTYLED */
4105         rfs4_dbe_walk(nsrv4->rfs4_deleg_state_tab, rfs4_deleg_state_walk_callout, exi);
4106         rfs4_dbe_walk(nsrv4->rfs4_file_tab, rfs4_file_walk_callout, exi);
4107 
4108         mutex_exit(&nsrv4->state_lock);
4109 }