Print this page
11083 support NFS server in zone
Portions contributed by: Dan Kruchinin <dan.kruchinin@nexenta.com>
Portions contributed by: Stepan Zastupov <stepan.zastupov@gmail.com>
Portions contributed by: Joyce McIntosh <joyce.mcintosh@nexenta.com>
Portions contributed by: Mike Zeller <mike@mikezeller.net>
Portions contributed by: Dan McDonald <danmcd@joyent.com>
Portions contributed by: Gordon Ross <gordon.w.ross@gmail.com>
Portions contributed by: Vitaliy Gusev <gusev.vitaliy@gmail.com>
Reviewed by: Rick McNeal <rick.mcneal@nexenta.com>
Reviewed by: Rob Gittins <rob.gittins@nexenta.com>
Reviewed by: Sanjay Nadkarni <sanjay.nadkarni@nexenta.com>
Reviewed by: Jason King <jbk@joyent.com>
Reviewed by: C Fraire <cfraire@me.com>
Change-Id: I22f289d357503f9b48a0bc2482cc4328a6d43d16
   1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */

  21 /*
  22  * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
  23  * Copyright 2014 Nexenta Systems, Inc.  All rights reserved.
  24  */
  25 





  26 #include <sys/systm.h>
  27 #include <sys/kmem.h>
  28 #include <sys/cmn_err.h>
  29 #include <sys/atomic.h>
  30 #include <sys/clconf.h>
  31 #include <sys/cladm.h>
  32 #include <sys/flock.h>
  33 #include <nfs/export.h>
  34 #include <nfs/nfs.h>
  35 #include <nfs/nfs4.h>
  36 #include <nfs/nfssys.h>
  37 #include <nfs/lm.h>
  38 #include <sys/pathname.h>
  39 #include <sys/sdt.h>
  40 #include <sys/nvpair.h>
  41 
  42 extern u_longlong_t nfs4_srv_caller_id;
  43 
  44 extern time_t rfs4_start_time;
  45 extern uint_t nfs4_srv_vkey;
  46 
  47 stateid4 special0 = {
  48         0,
  49         { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }
  50 };
  51 
  52 stateid4 special1 = {
  53         0xffffffff,
  54         {
  55                 (char)0xff, (char)0xff, (char)0xff, (char)0xff,
  56                 (char)0xff, (char)0xff, (char)0xff, (char)0xff,
  57                 (char)0xff, (char)0xff, (char)0xff, (char)0xff
  58         }
  59 };
  60 
  61 
  62 #define ISSPECIAL(id)  (stateid4_cmp(id, &special0) || \
  63                         stateid4_cmp(id, &special1))
  64 
  65 /* For embedding the cluster nodeid into our clientid */
  66 #define CLUSTER_NODEID_SHIFT    24
  67 #define CLUSTER_MAX_NODEID      255
  68 
  69 #ifdef DEBUG
  70 int rfs4_debug;
  71 #endif
  72 
  73 static uint32_t rfs4_database_debug = 0x00;
  74 
  75 static void rfs4_ss_clid_write(rfs4_client_t *cp, char *leaf);

  76 static void rfs4_ss_clid_write_one(rfs4_client_t *cp, char *dir, char *leaf);
  77 static void rfs4_dss_clear_oldstate(rfs4_servinst_t *sip);
  78 static void rfs4_ss_chkclid_sip(rfs4_client_t *cp, rfs4_servinst_t *sip);
  79 
  80 /*
  81  * Couple of simple init/destroy functions for a general waiter
  82  */
  83 void
  84 rfs4_sw_init(rfs4_state_wait_t *swp)
  85 {
  86         mutex_init(swp->sw_cv_lock, NULL, MUTEX_DEFAULT, NULL);
  87         cv_init(swp->sw_cv, NULL, CV_DEFAULT, NULL);
  88         swp->sw_active = FALSE;
  89         swp->sw_wait_count = 0;
  90 }
  91 
  92 void
  93 rfs4_sw_destroy(rfs4_state_wait_t *swp)
  94 {
  95         mutex_destroy(swp->sw_cv_lock);


 104                 swp->sw_wait_count++;
 105                 cv_wait(swp->sw_cv, swp->sw_cv_lock);
 106                 swp->sw_wait_count--;
 107         }
 108         ASSERT(swp->sw_active == FALSE);
 109         swp->sw_active = TRUE;
 110         mutex_exit(swp->sw_cv_lock);
 111 }
 112 
 113 void
 114 rfs4_sw_exit(rfs4_state_wait_t *swp)
 115 {
 116         mutex_enter(swp->sw_cv_lock);
 117         ASSERT(swp->sw_active == TRUE);
 118         swp->sw_active = FALSE;
 119         if (swp->sw_wait_count != 0)
 120                 cv_broadcast(swp->sw_cv);
 121         mutex_exit(swp->sw_cv_lock);
 122 }
 123 
 124 /*
 125  * CPR callback id -- not related to v4 callbacks
 126  */
 127 static callb_id_t cpr_id = 0;
 128 
 129 static void
 130 deep_lock_copy(LOCK4res *dres, LOCK4res *sres)
 131 {
 132         lock_owner4 *slo = &sres->LOCK4res_u.denied.owner;
 133         lock_owner4 *dlo = &dres->LOCK4res_u.denied.owner;
 134 
 135         if (sres->status == NFS4ERR_DENIED) {
 136                 dlo->owner_val = kmem_alloc(slo->owner_len, KM_SLEEP);
 137                 bcopy(slo->owner_val, dlo->owner_val, slo->owner_len);
 138         }
 139 }
 140 





 141 static void
 142 deep_lock_free(LOCK4res *res)
 143 {
 144         lock_owner4 *lo = &res->LOCK4res_u.denied.owner;
 145 
 146         if (res->status == NFS4ERR_DENIED)
 147                 kmem_free(lo->owner_val, lo->owner_len);
 148 }
 149 
 150 static void
 151 deep_open_copy(OPEN4res *dres, OPEN4res *sres)
 152 {
 153         nfsace4 *sacep, *dacep;
 154 
 155         if (sres->status != NFS4_OK) {
 156                 return;
 157         }
 158 
 159         dres->attrset = sres->attrset;
 160 


 256 /*
 257  * This code is some what prototypical for now. Its purpose currently is to
 258  * implement the interfaces sufficiently to finish the higher protocol
 259  * elements. This will be replaced by a dynamically resizeable tables
 260  * backed by kmem_cache allocator. However synchronization is handled
 261  * correctly (I hope) and will not change by much.  The mutexes for
 262  * the hash buckets that can be used to create new instances of data
 263  * structures  might be good candidates to evolve into reader writer
 264  * locks. If it has to do a creation, it would be holding the
 265  * mutex across a kmem_alloc with KM_SLEEP specified.
 266  */
 267 
 268 #ifdef DEBUG
 269 #define TABSIZE 17
 270 #else
 271 #define TABSIZE 2047
 272 #endif
 273 
 274 #define ADDRHASH(key) ((unsigned long)(key) >> 3)
 275 
 276 /* Used to serialize create/destroy of rfs4_server_state database */
 277 kmutex_t        rfs4_state_lock;
 278 static rfs4_database_t *rfs4_server_state = NULL;
 279 
 280 /* Used to serialize lookups of clientids */
 281 static  krwlock_t       rfs4_findclient_lock;
 282 
 283 /*
 284  * For now this "table" is exposed so that the CPR callback
 285  * function can tromp through it..
 286  */
 287 rfs4_table_t *rfs4_client_tab;
 288 
 289 static rfs4_index_t *rfs4_clientid_idx;
 290 static rfs4_index_t *rfs4_nfsclnt_idx;
 291 static rfs4_table_t *rfs4_clntip_tab;
 292 static rfs4_index_t *rfs4_clntip_idx;
 293 static rfs4_table_t *rfs4_openowner_tab;
 294 static rfs4_index_t *rfs4_openowner_idx;
 295 static rfs4_table_t *rfs4_state_tab;
 296 static rfs4_index_t *rfs4_state_idx;
 297 static rfs4_index_t *rfs4_state_owner_file_idx;
 298 static rfs4_index_t *rfs4_state_file_idx;
 299 static rfs4_table_t *rfs4_lo_state_tab;
 300 static rfs4_index_t *rfs4_lo_state_idx;
 301 static rfs4_index_t *rfs4_lo_state_owner_idx;
 302 static rfs4_table_t *rfs4_lockowner_tab;
 303 static rfs4_index_t *rfs4_lockowner_idx;
 304 static rfs4_index_t *rfs4_lockowner_pid_idx;
 305 static rfs4_table_t *rfs4_file_tab;
 306 static rfs4_index_t *rfs4_file_idx;
 307 static rfs4_table_t *rfs4_deleg_state_tab;
 308 static rfs4_index_t *rfs4_deleg_idx;
 309 static rfs4_index_t *rfs4_deleg_state_idx;
 310 
 311 #define MAXTABSZ 1024*1024
 312 
 313 /* The values below are rfs4_lease_time units */
 314 
 315 #ifdef DEBUG
 316 #define CLIENT_CACHE_TIME 1
 317 #define OPENOWNER_CACHE_TIME 1
 318 #define STATE_CACHE_TIME 1
 319 #define LO_STATE_CACHE_TIME 1
 320 #define LOCKOWNER_CACHE_TIME 1
 321 #define FILE_CACHE_TIME 3
 322 #define DELEG_STATE_CACHE_TIME 1
 323 #else
 324 #define CLIENT_CACHE_TIME 10
 325 #define OPENOWNER_CACHE_TIME 5
 326 #define STATE_CACHE_TIME 1
 327 #define LO_STATE_CACHE_TIME 1
 328 #define LOCKOWNER_CACHE_TIME 3
 329 #define FILE_CACHE_TIME 40
 330 #define DELEG_STATE_CACHE_TIME 1
 331 #endif
 332 















 333 
 334 static time_t rfs4_client_cache_time = 0;
 335 static time_t rfs4_clntip_cache_time = 0;
 336 static time_t rfs4_openowner_cache_time = 0;
 337 static time_t rfs4_state_cache_time = 0;
 338 static time_t rfs4_lo_state_cache_time = 0;
 339 static time_t rfs4_lockowner_cache_time = 0;
 340 static time_t rfs4_file_cache_time = 0;
 341 static time_t rfs4_deleg_state_cache_time = 0;
 342 
 343 static bool_t rfs4_client_create(rfs4_entry_t, void *);
 344 static void rfs4_dss_remove_cpleaf(rfs4_client_t *);
 345 static void rfs4_dss_remove_leaf(rfs4_servinst_t *, char *, char *);
 346 static void rfs4_client_destroy(rfs4_entry_t);
 347 static bool_t rfs4_client_expiry(rfs4_entry_t);
 348 static uint32_t clientid_hash(void *);
 349 static bool_t clientid_compare(rfs4_entry_t, void *);
 350 static void *clientid_mkkey(rfs4_entry_t);
 351 static uint32_t nfsclnt_hash(void *);
 352 static bool_t nfsclnt_compare(rfs4_entry_t, void *);
 353 static void *nfsclnt_mkkey(rfs4_entry_t);
 354 static bool_t rfs4_clntip_expiry(rfs4_entry_t);
 355 static void rfs4_clntip_destroy(rfs4_entry_t);
 356 static bool_t rfs4_clntip_create(rfs4_entry_t, void *);
 357 static uint32_t clntip_hash(void *);
 358 static bool_t clntip_compare(rfs4_entry_t, void *);
 359 static void *clntip_mkkey(rfs4_entry_t);
 360 static bool_t rfs4_openowner_create(rfs4_entry_t, void *);
 361 static void rfs4_openowner_destroy(rfs4_entry_t);
 362 static bool_t rfs4_openowner_expiry(rfs4_entry_t);


 688                                         cl_ss->ss_pn = rfs4_ss_movestate(
 689                                             statedir, destdir, dep->d_name);
 690                                 } else {
 691                                         cl_ss->ss_pn = ss_pn;
 692                                 }
 693                                 insque(cl_ss, oldstate);
 694                         } else {
 695                                 rfs4_ss_pnfree(ss_pn);
 696                         }
 697                 }
 698         }
 699 
 700 out:
 701         (void) VOP_CLOSE(dvp, FREAD, 1, (offset_t)0, CRED(), NULL);
 702         VN_RELE(dvp);
 703         if (dirt)
 704                 kmem_free((caddr_t)dirt, RFS4_SS_DIRSIZE);
 705 }
 706 
 707 static void
 708 rfs4_ss_init(void)
 709 {
 710         int npaths = 1;
 711         char *default_dss_path = NFS4_DSS_VAR_DIR;
 712 
 713         /* read the default stable storage state */
 714         rfs4_dss_readstate(npaths, &default_dss_path);
 715 
 716         rfs4_ss_enabled = 1;
 717 }
 718 
 719 static void
 720 rfs4_ss_fini(void)
 721 {
 722         rfs4_servinst_t *sip;
 723 
 724         mutex_enter(&rfs4_servinst_lock);
 725         sip = rfs4_cur_servinst;
 726         while (sip != NULL) {
 727                 rfs4_dss_clear_oldstate(sip);
 728                 sip = sip->next;
 729         }
 730         mutex_exit(&rfs4_servinst_lock);
 731 }
 732 
 733 /*
 734  * Remove all oldstate files referenced by this servinst.
 735  */
 736 static void
 737 rfs4_dss_clear_oldstate(rfs4_servinst_t *sip)
 738 {
 739         rfs4_oldstate_t *os_head, *osp;
 740 
 741         rw_enter(&sip->oldstate_lock, RW_WRITER);
 742         os_head = sip->oldstate;
 743 
 744         if (os_head == NULL) {
 745                 rw_exit(&sip->oldstate_lock);
 746                 return;
 747         }
 748 
 749         /* skip dummy entry */
 750         osp = os_head->next;


 754 
 755                 rfs4_dss_remove_leaf(sip, NFS4_DSS_OLDSTATE_LEAF, leaf);
 756 
 757                 if (osp->cl_id4.id_val)
 758                         kmem_free(osp->cl_id4.id_val, osp->cl_id4.id_len);
 759                 rfs4_ss_pnfree(osp->ss_pn);
 760 
 761                 os_next = osp->next;
 762                 remque(osp);
 763                 kmem_free(osp, sizeof (rfs4_oldstate_t));
 764                 osp = os_next;
 765         }
 766 
 767         rw_exit(&sip->oldstate_lock);
 768 }
 769 
 770 /*
 771  * Form the state and oldstate paths, and read in the stable storage files.
 772  */
 773 void
 774 rfs4_dss_readstate(int npaths, char **paths)
 775 {
 776         int i;
 777         char *state, *oldstate;
 778 
 779         state = kmem_alloc(MAXPATHLEN, KM_SLEEP);
 780         oldstate = kmem_alloc(MAXPATHLEN, KM_SLEEP);
 781 
 782         for (i = 0; i < npaths; i++) {
 783                 char *path = paths[i];
 784 
 785                 (void) sprintf(state, "%s/%s", path, NFS4_DSS_STATE_LEAF);
 786                 (void) sprintf(oldstate, "%s/%s", path, NFS4_DSS_OLDSTATE_LEAF);
 787 
 788                 /*
 789                  * Populate the current server instance's oldstate list.
 790                  *
 791                  * 1. Read stable storage data from old state directory,
 792                  *    leaving its contents alone.
 793                  *
 794                  * 2. Read stable storage data from state directory,
 795                  *    and move the latter's contents to old state
 796                  *    directory.
 797                  */
 798                 rfs4_ss_oldstate(rfs4_cur_servinst->oldstate, oldstate, NULL);
 799                 rfs4_ss_oldstate(rfs4_cur_servinst->oldstate, state, oldstate);


 800         }
 801 
 802         kmem_free(state, MAXPATHLEN);
 803         kmem_free(oldstate, MAXPATHLEN);
 804 }
 805 
 806 
 807 /*
 808  * Check if we are still in grace and if the client can be
 809  * granted permission to perform reclaims.
 810  */
 811 void
 812 rfs4_ss_chkclid(rfs4_client_t *cp)
 813 {
 814         rfs4_servinst_t *sip;
 815 
 816         /*
 817          * It should be sufficient to check the oldstate data for just
 818          * this client's instance. However, since our per-instance
 819          * client grouping is solely temporal, HA-NFSv4 RG failover
 820          * might result in clients of the same RG being partitioned into
 821          * separate instances.
 822          *
 823          * Until the client grouping is improved, we must check the
 824          * oldstate data for all instances with an active grace period.
 825          *
 826          * This also serves as the mechanism to remove stale oldstate data.
 827          * The first time we check an instance after its grace period has
 828          * expired, the oldstate data should be cleared.
 829          *
 830          * Start at the current instance, and walk the list backwards
 831          * to the first.
 832          */
 833         mutex_enter(&rfs4_servinst_lock);
 834         for (sip = rfs4_cur_servinst; sip != NULL; sip = sip->prev) {
 835                 rfs4_ss_chkclid_sip(cp, sip);
 836 
 837                 /* if the above check found this client, we're done */
 838                 if (cp->rc_can_reclaim)
 839                         break;
 840         }
 841         mutex_exit(&rfs4_servinst_lock);
 842 }
 843 
 844 static void
 845 rfs4_ss_chkclid_sip(rfs4_client_t *cp, rfs4_servinst_t *sip)
 846 {
 847         rfs4_oldstate_t *osp, *os_head;
 848 
 849         /* short circuit everything if this server instance has no oldstate */
 850         rw_enter(&sip->oldstate_lock, RW_READER);
 851         os_head = sip->oldstate;
 852         rw_exit(&sip->oldstate_lock);
 853         if (os_head == NULL)
 854                 return;
 855 
 856         /*
 857          * If this server instance is no longer in a grace period then
 858          * the client won't be able to reclaim. No further need for this
 859          * instance's oldstate data, so it can be cleared.
 860          */
 861         if (!rfs4_servinst_in_grace(sip))


 871         while (osp != os_head) {
 872                 if (osp->cl_id4.id_len == cp->rc_nfs_client.id_len) {
 873                         if (bcmp(osp->cl_id4.id_val, cp->rc_nfs_client.id_val,
 874                             osp->cl_id4.id_len) == 0) {
 875                                 cp->rc_can_reclaim = 1;
 876                                 break;
 877                         }
 878                 }
 879                 osp = osp->next;
 880         }
 881 
 882         rw_exit(&sip->oldstate_lock);
 883 }
 884 
 885 /*
 886  * Place client information into stable storage: 1/3.
 887  * First, generate the leaf filename, from the client's IP address and
 888  * the server-generated short-hand clientid.
 889  */
 890 void
 891 rfs4_ss_clid(rfs4_client_t *cp)
 892 {
 893         const char *kinet_ntop6(uchar_t *, char *, size_t);
 894         char leaf[MAXNAMELEN], buf[INET6_ADDRSTRLEN];
 895         struct sockaddr *ca;
 896         uchar_t *b;
 897 
 898         if (rfs4_ss_enabled == 0) {
 899                 return;
 900         }
 901 
 902         buf[0] = 0;
 903 
 904         ca = (struct sockaddr *)&cp->rc_addr;
 905 
 906         /*
 907          * Convert the caller's IP address to a dotted string
 908          */
 909         if (ca->sa_family == AF_INET) {
 910                 b = (uchar_t *)&((struct sockaddr_in *)ca)->sin_addr;
 911                 (void) sprintf(buf, "%03d.%03d.%03d.%03d", b[0] & 0xFF,
 912                     b[1] & 0xFF, b[2] & 0xFF, b[3] & 0xFF);
 913         } else if (ca->sa_family == AF_INET6) {
 914                 struct sockaddr_in6 *sin6;
 915 
 916                 sin6 = (struct sockaddr_in6 *)ca;
 917                 (void) kinet_ntop6((uchar_t *)&sin6->sin6_addr,
 918                     buf, INET6_ADDRSTRLEN);
 919         }
 920 
 921         (void) snprintf(leaf, MAXNAMELEN, "%s-%llx", buf,
 922             (longlong_t)cp->rc_clientid);
 923         rfs4_ss_clid_write(cp, leaf);
 924 }
 925 
 926 /*
 927  * Place client information into stable storage: 2/3.
 928  * DSS: distributed stable storage: the file may need to be written to
 929  * multiple directories.
 930  */
 931 static void
 932 rfs4_ss_clid_write(rfs4_client_t *cp, char *leaf)
 933 {
 934         rfs4_servinst_t *sip;
 935 
 936         /*
 937          * It should be sufficient to write the leaf file to (all) DSS paths
 938          * associated with just this client's instance. However, since our
 939          * per-instance client grouping is solely temporal, HA-NFSv4 RG
 940          * failover might result in us losing DSS data.
 941          *
 942          * Until the client grouping is improved, we must write the DSS data
 943          * to all instances' paths. Start at the current instance, and
 944          * walk the list backwards to the first.
 945          */
 946         mutex_enter(&rfs4_servinst_lock);
 947         for (sip = rfs4_cur_servinst; sip != NULL; sip = sip->prev) {
 948                 int i, npaths = sip->dss_npaths;
 949 
 950                 /* write the leaf file to all DSS paths */
 951                 for (i = 0; i < npaths; i++) {
 952                         rfs4_dss_path_t *dss_path = sip->dss_paths[i];
 953 
 954                         /* HA-NFSv4 path might have been failed-away from us */
 955                         if (dss_path == NULL)
 956                                 continue;
 957 
 958                         rfs4_ss_clid_write_one(cp, dss_path->path, leaf);
 959                 }
 960         }
 961         mutex_exit(&rfs4_servinst_lock);
 962 }
 963 
 964 /*
 965  * Place client information into stable storage: 3/3.
 966  * Write the stable storage data to the requested file.
 967  */
 968 static void
 969 rfs4_ss_clid_write_one(rfs4_client_t *cp, char *dss_path, char *leaf)
 970 {
 971         int ioflag;
 972         int file_vers = NFS4_SS_VERSION;
 973         size_t dirlen;
 974         struct uio uio;
 975         struct iovec iov[4];
 976         char *dir;
 977         rfs4_ss_pn_t *ss_pn;
 978         vnode_t *vp;
 979         nfs_client_id4 *cl_id4 = &(cp->rc_nfs_client);
 980 
 981         /* allow 2 extra bytes for '/' & NUL */


1134                  * for forced expiration
1135                  */
1136                 if (ent_sin->sin_addr.s_addr == clr_in.s_addr) {
1137                         cp->rc_forced_expire = 1;
1138                 }
1139                 break;
1140 
1141         default:
1142                 /* force this assert to fail */
1143                 ASSERT(clr->addr_type != clr->addr_type);
1144         }
1145 }
1146 
1147 /*
1148  * This is called from nfssys() in order to clear server state
1149  * for the specified client IP Address.
1150  */
1151 void
1152 rfs4_clear_client_state(struct nfs4clrst_args *clr)
1153 {
1154         (void) rfs4_dbe_walk(rfs4_client_tab, rfs4_client_scrub, clr);


1155 }
1156 
1157 /*
1158  * Used to initialize the NFSv4 server's state or database.  All of
1159  * the tables are created and timers are set. Only called when NFSv4
1160  * service is provided.
1161  */
1162 void
1163 rfs4_state_init()
1164 {
1165         int start_grace;
1166         extern boolean_t rfs4_cpr_callb(void *, int);
1167         char *dss_path = NFS4_DSS_VAR_DIR;
1168         time_t start_time;




1169 
1170         mutex_enter(&rfs4_state_lock);


























1171 












1172         /*
1173          * If the server state database has already been initialized,
1174          * skip it
1175          */
1176         if (rfs4_server_state != NULL) {
1177                 mutex_exit(&rfs4_state_lock);
1178                 return;






1179         }
1180 
1181         rw_init(&rfs4_findclient_lock, NULL, RW_DEFAULT, NULL);







1182 



















1183         /*
1184          * Set the boot time.  If the server
1185          * has been restarted quickly and has had the opportunity to
1186          * service clients, then the start_time needs to be bumped
1187          * regardless.  A small window but it exists...
1188          */
1189         start_time = gethrestime_sec();
1190         if (rfs4_start_time < start_time)
1191                 rfs4_start_time = start_time;
1192         else
1193                 rfs4_start_time++;
1194 
1195         /* DSS: distributed stable storage: initialise served paths list */
1196         rfs4_dss_pathlist = NULL;
1197 
1198         /*
1199          * Create the first server instance, or a new one if the server has
1200          * been restarted; see above comments on rfs4_start_time. Don't
1201          * start its grace period; that will be done later, to maximise the
1202          * clients' recovery window.
1203          */
1204         start_grace = 0;
1205         rfs4_servinst_create(start_grace, 1, &dss_path);



















1206 
1207         /* reset the "first NFSv4 request" status */
1208         rfs4_seen_first_compound = 0;
1209 


1210         /*
1211          * Add a CPR callback so that we can update client
1212          * access times to extend the lease after a suspend
1213          * and resume (using the same class as rpcmod/connmgr)
1214          */
1215         cpr_id = callb_add(rfs4_cpr_callb, 0, CB_CL_CPR_RPC, "rfs4");



1216 


1217         /* set the various cache timers for table creation */
1218         if (rfs4_client_cache_time == 0)
1219                 rfs4_client_cache_time = CLIENT_CACHE_TIME;
1220         if (rfs4_openowner_cache_time == 0)
1221                 rfs4_openowner_cache_time = OPENOWNER_CACHE_TIME;
1222         if (rfs4_state_cache_time == 0)
1223                 rfs4_state_cache_time = STATE_CACHE_TIME;
1224         if (rfs4_lo_state_cache_time == 0)
1225                 rfs4_lo_state_cache_time = LO_STATE_CACHE_TIME;
1226         if (rfs4_lockowner_cache_time == 0)
1227                 rfs4_lockowner_cache_time = LOCKOWNER_CACHE_TIME;
1228         if (rfs4_file_cache_time == 0)
1229                 rfs4_file_cache_time = FILE_CACHE_TIME;
1230         if (rfs4_deleg_state_cache_time == 0)
1231                 rfs4_deleg_state_cache_time = DELEG_STATE_CACHE_TIME;
1232 
1233         /* Create the overall database to hold all server state */
1234         rfs4_server_state = rfs4_database_create(rfs4_database_debug);
1235 
1236         /* Now create the individual tables */
1237         rfs4_client_cache_time *= rfs4_lease_time;
1238         rfs4_client_tab = rfs4_table_create(rfs4_server_state,
1239             "Client",
1240             rfs4_client_cache_time,
1241             2,
1242             rfs4_client_create,
1243             rfs4_client_destroy,
1244             rfs4_client_expiry,
1245             sizeof (rfs4_client_t),
1246             TABSIZE,
1247             MAXTABSZ/8, 100);
1248         rfs4_nfsclnt_idx = rfs4_index_create(rfs4_client_tab,
1249             "nfs_client_id4", nfsclnt_hash,
1250             nfsclnt_compare, nfsclnt_mkkey,
1251             TRUE);
1252         rfs4_clientid_idx = rfs4_index_create(rfs4_client_tab,
1253             "client_id", clientid_hash,
1254             clientid_compare, clientid_mkkey,
1255             FALSE);
1256 
1257         rfs4_clntip_cache_time = 86400 * 365;   /* about a year */
1258         rfs4_clntip_tab = rfs4_table_create(rfs4_server_state,
1259             "ClntIP",
1260             rfs4_clntip_cache_time,
1261             1,
1262             rfs4_clntip_create,
1263             rfs4_clntip_destroy,
1264             rfs4_clntip_expiry,
1265             sizeof (rfs4_clntip_t),
1266             TABSIZE,
1267             MAXTABSZ, 100);
1268         rfs4_clntip_idx = rfs4_index_create(rfs4_clntip_tab,
1269             "client_ip", clntip_hash,
1270             clntip_compare, clntip_mkkey,
1271             TRUE);
1272 
1273         rfs4_openowner_cache_time *= rfs4_lease_time;
1274         rfs4_openowner_tab = rfs4_table_create(rfs4_server_state,
1275             "OpenOwner",
1276             rfs4_openowner_cache_time,
1277             1,
1278             rfs4_openowner_create,
1279             rfs4_openowner_destroy,
1280             rfs4_openowner_expiry,
1281             sizeof (rfs4_openowner_t),
1282             TABSIZE,
1283             MAXTABSZ, 100);
1284         rfs4_openowner_idx = rfs4_index_create(rfs4_openowner_tab,
1285             "open_owner4", openowner_hash,
1286             openowner_compare,
1287             openowner_mkkey, TRUE);
1288 
1289         rfs4_state_cache_time *= rfs4_lease_time;
1290         rfs4_state_tab = rfs4_table_create(rfs4_server_state,
1291             "OpenStateID",
1292             rfs4_state_cache_time,
1293             3,
1294             rfs4_state_create,
1295             rfs4_state_destroy,
1296             rfs4_state_expiry,
1297             sizeof (rfs4_state_t),
1298             TABSIZE,
1299             MAXTABSZ, 100);
1300 
1301         rfs4_state_owner_file_idx = rfs4_index_create(rfs4_state_tab,

1302             "Openowner-File",
1303             state_owner_file_hash,
1304             state_owner_file_compare,
1305             state_owner_file_mkkey, TRUE);
1306 
1307         rfs4_state_idx = rfs4_index_create(rfs4_state_tab,
1308             "State-id", state_hash,
1309             state_compare, state_mkkey, FALSE);
1310 
1311         rfs4_state_file_idx = rfs4_index_create(rfs4_state_tab,
1312             "File", state_file_hash,
1313             state_file_compare, state_file_mkkey,
1314             FALSE);
1315 
1316         rfs4_lo_state_cache_time *= rfs4_lease_time;
1317         rfs4_lo_state_tab = rfs4_table_create(rfs4_server_state,
1318             "LockStateID",
1319             rfs4_lo_state_cache_time,
1320             2,
1321             rfs4_lo_state_create,
1322             rfs4_lo_state_destroy,
1323             rfs4_lo_state_expiry,
1324             sizeof (rfs4_lo_state_t),
1325             TABSIZE,
1326             MAXTABSZ, 100);
1327 
1328         rfs4_lo_state_owner_idx = rfs4_index_create(rfs4_lo_state_tab,

1329             "lockownerxstate",
1330             lo_state_lo_hash,
1331             lo_state_lo_compare,
1332             lo_state_lo_mkkey, TRUE);
1333 
1334         rfs4_lo_state_idx = rfs4_index_create(rfs4_lo_state_tab,
1335             "State-id",
1336             lo_state_hash, lo_state_compare,
1337             lo_state_mkkey, FALSE);
1338 
1339         rfs4_lockowner_cache_time *= rfs4_lease_time;
1340 
1341         rfs4_lockowner_tab = rfs4_table_create(rfs4_server_state,
1342             "Lockowner",
1343             rfs4_lockowner_cache_time,
1344             2,
1345             rfs4_lockowner_create,
1346             rfs4_lockowner_destroy,
1347             rfs4_lockowner_expiry,
1348             sizeof (rfs4_lockowner_t),
1349             TABSIZE,
1350             MAXTABSZ, 100);
1351 
1352         rfs4_lockowner_idx = rfs4_index_create(rfs4_lockowner_tab,
1353             "lock_owner4", lockowner_hash,
1354             lockowner_compare,
1355             lockowner_mkkey, TRUE);
1356 
1357         rfs4_lockowner_pid_idx = rfs4_index_create(rfs4_lockowner_tab,

1358             "pid", pid_hash,
1359             pid_compare, pid_mkkey,
1360             FALSE);
1361 
1362         rfs4_file_cache_time *= rfs4_lease_time;
1363         rfs4_file_tab = rfs4_table_create(rfs4_server_state,
1364             "File",
1365             rfs4_file_cache_time,
1366             1,
1367             rfs4_file_create,
1368             rfs4_file_destroy,
1369             NULL,
1370             sizeof (rfs4_file_t),
1371             TABSIZE,
1372             MAXTABSZ, -1);
1373 
1374         rfs4_file_idx = rfs4_index_create(rfs4_file_tab,
1375             "Filehandle", file_hash,
1376             file_compare, file_mkkey, TRUE);
1377 
1378         rfs4_deleg_state_cache_time *= rfs4_lease_time;
1379         rfs4_deleg_state_tab = rfs4_table_create(rfs4_server_state,

1380             "DelegStateID",
1381             rfs4_deleg_state_cache_time,
1382             2,
1383             rfs4_deleg_state_create,
1384             rfs4_deleg_state_destroy,
1385             rfs4_deleg_state_expiry,
1386             sizeof (rfs4_deleg_state_t),
1387             TABSIZE,
1388             MAXTABSZ, 100);
1389         rfs4_deleg_idx = rfs4_index_create(rfs4_deleg_state_tab,
1390             "DelegByFileClient",
1391             deleg_hash,
1392             deleg_compare,
1393             deleg_mkkey, TRUE);
1394 
1395         rfs4_deleg_state_idx = rfs4_index_create(rfs4_deleg_state_tab,

1396             "DelegState",
1397             deleg_state_hash,
1398             deleg_state_compare,
1399             deleg_state_mkkey, FALSE);
1400 


1401         /*
1402          * Init the stable storage.
1403          */
1404         rfs4_ss_init();
1405 
1406         rfs4_client_clrst = rfs4_clear_client_state;
1407 
1408         mutex_exit(&rfs4_state_lock);
1409 }
1410 
1411 
1412 /*
1413  * Used at server shutdown to cleanup all of the NFSv4 server's structures
1414  * and other state.
1415  */
1416 void
1417 rfs4_state_fini()
1418 {
1419         rfs4_database_t *dbp;


1420 
1421         mutex_enter(&rfs4_state_lock);
1422 
1423         if (rfs4_server_state == NULL) {
1424                 mutex_exit(&rfs4_state_lock);










1425                 return;
1426         }
1427 
1428         rfs4_client_clrst = NULL;

1429 
1430         rfs4_set_deleg_policy(SRV_NEVER_DELEGATE);
1431         dbp = rfs4_server_state;
1432         rfs4_server_state = NULL;
1433 
1434         /*
1435          * Cleanup the CPR callback.
1436          */
1437         if (cpr_id)
1438                 (void) callb_delete(cpr_id);
1439 
1440         rw_destroy(&rfs4_findclient_lock);
1441 
1442         /* First stop all of the reaper threads in the database */
1443         rfs4_database_shutdown(dbp);
1444         /* clean up any dangling stable storage structures */
1445         rfs4_ss_fini();
1446         /* Now actually destroy/release the database and its tables */






1447         rfs4_database_destroy(dbp);
1448 
1449         /* Reset the cache timers for next time */
1450         rfs4_client_cache_time = 0;
1451         rfs4_openowner_cache_time = 0;
1452         rfs4_state_cache_time = 0;
1453         rfs4_lo_state_cache_time = 0;
1454         rfs4_lockowner_cache_time = 0;
1455         rfs4_file_cache_time = 0;
1456         rfs4_deleg_state_cache_time = 0;
1457 
1458         mutex_exit(&rfs4_state_lock);
1459 
1460         /* destroy server instances and current instance ptr */
1461         rfs4_servinst_destroy_all();
1462 
1463         /* reset the "first NFSv4 request" status */
1464         rfs4_seen_first_compound = 0;
1465 
1466         /* DSS: distributed stable storage */
1467         nvlist_free(rfs4_dss_oldpaths);
1468         nvlist_free(rfs4_dss_paths);
1469         rfs4_dss_paths = rfs4_dss_oldpaths = NULL;
1470 }
1471 
1472 typedef union {
1473         struct {
1474                 uint32_t start_time;
1475                 uint32_t c_id;
1476         } impl_id;
1477         clientid4 id4;
1478 } cid;
1479 
1480 static int foreign_stateid(stateid_t *id);
1481 static int foreign_clientid(cid *cidp);
1482 static void embed_nodeid(cid *cidp);
1483 
1484 typedef union {
1485         struct {
1486                 uint32_t c_id;
1487                 uint32_t gen_num;
1488         } cv_impl;
1489         verifier4       confirm_verf;


1564          * If the sysadmin has used clear_locks for this
1565          * entry then forced_expire will be set and we
1566          * want this entry to be reaped. Or the entry
1567          * has exceeded its lease period.
1568          */
1569         cp_expired = (cp->rc_forced_expire ||
1570             (gethrestime_sec() - cp->rc_last_access
1571             > rfs4_lease_time));
1572 
1573         if (!cp->rc_ss_remove && cp_expired)
1574                 cp->rc_ss_remove = 1;
1575         return (cp_expired);
1576 }
1577 
1578 /*
1579  * Remove the leaf file from all distributed stable storage paths.
1580  */
1581 static void
1582 rfs4_dss_remove_cpleaf(rfs4_client_t *cp)
1583 {

1584         rfs4_servinst_t *sip;
1585         char *leaf = cp->rc_ss_pn->leaf;
1586 
1587         /*
1588          * since the state files are written to all DSS
1589          * paths we must remove this leaf file instance
1590          * from all server instances.
1591          */
1592 
1593         mutex_enter(&rfs4_servinst_lock);
1594         for (sip = rfs4_cur_servinst; sip != NULL; sip = sip->prev) {

1595                 /* remove the leaf file associated with this server instance */
1596                 rfs4_dss_remove_leaf(sip, NFS4_DSS_STATE_LEAF, leaf);
1597         }
1598         mutex_exit(&rfs4_servinst_lock);
1599 }
1600 
1601 static void
1602 rfs4_dss_remove_leaf(rfs4_servinst_t *sip, char *dir_leaf, char *leaf)
1603 {
1604         int i, npaths = sip->dss_npaths;
1605 
1606         for (i = 0; i < npaths; i++) {
1607                 rfs4_dss_path_t *dss_path = sip->dss_paths[i];
1608                 char *path, *dir;
1609                 size_t pathlen;
1610 
1611                 /* the HA-NFSv4 path might have been failed-over away from us */
1612                 if (dss_path == NULL)
1613                         continue;
1614 
1615                 dir = dss_path->path;
1616 
1617                 /* allow 3 extra bytes for two '/' & a NUL */
1618                 pathlen = strlen(dir) + strlen(dir_leaf) + strlen(leaf) + 3;


1646                 if (cp->rc_ss_remove)
1647                         rfs4_dss_remove_cpleaf(cp);
1648                 rfs4_ss_pnfree(cp->rc_ss_pn);
1649         }
1650 
1651         /* Free the client supplied client id */
1652         kmem_free(cp->rc_nfs_client.id_val, cp->rc_nfs_client.id_len);
1653 
1654         if (cp->rc_sysidt != LM_NOSYSID)
1655                 lm_free_sysidt(cp->rc_sysidt);
1656 }
1657 
1658 static bool_t
1659 rfs4_client_create(rfs4_entry_t u_entry, void *arg)
1660 {
1661         rfs4_client_t *cp = (rfs4_client_t *)u_entry;
1662         nfs_client_id4 *client = (nfs_client_id4 *)arg;
1663         struct sockaddr *ca;
1664         cid *cidp;
1665         scid_confirm_verf *scvp;

1666 


1667         /* Get a clientid to give to the client */
1668         cidp = (cid *)&cp->rc_clientid;
1669         cidp->impl_id.start_time = rfs4_start_time;
1670         cidp->impl_id.c_id = (uint32_t)rfs4_dbe_getid(cp->rc_dbe);
1671 
1672         /* If we are booted as a cluster node, embed our nodeid */
1673         if (cluster_bootflags & CLUSTER_BOOTED)
1674                 embed_nodeid(cidp);
1675 
1676         /* Allocate and copy client's client id value */
1677         cp->rc_nfs_client.id_val = kmem_alloc(client->id_len, KM_SLEEP);
1678         cp->rc_nfs_client.id_len = client->id_len;
1679         bcopy(client->id_val, cp->rc_nfs_client.id_val, client->id_len);
1680         cp->rc_nfs_client.verifier = client->verifier;
1681 
1682         /* Copy client's IP address */
1683         ca = client->cl_addr;
1684         if (ca->sa_family == AF_INET)
1685                 bcopy(ca, &cp->rc_addr, sizeof (struct sockaddr_in));
1686         else if (ca->sa_family == AF_INET6)
1687                 bcopy(ca, &cp->rc_addr, sizeof (struct sockaddr_in6));
1688         cp->rc_nfs_client.cl_addr = (struct sockaddr *)&cp->rc_addr;
1689 


1707 
1708         cp->rc_cr_set = NULL;
1709 
1710         cp->rc_sysidt = LM_NOSYSID;
1711 
1712         list_create(&cp->rc_openownerlist, sizeof (rfs4_openowner_t),
1713             offsetof(rfs4_openowner_t, ro_node));
1714 
1715         /* set up the callback control structure */
1716         cp->rc_cbinfo.cb_state = CB_UNINIT;
1717         mutex_init(cp->rc_cbinfo.cb_lock, NULL, MUTEX_DEFAULT, NULL);
1718         cv_init(cp->rc_cbinfo.cb_cv, NULL, CV_DEFAULT, NULL);
1719         cv_init(cp->rc_cbinfo.cb_cv_nullcaller, NULL, CV_DEFAULT, NULL);
1720 
1721         /*
1722          * Associate the client_t with the current server instance.
1723          * The hold is solely to satisfy the calling requirement of
1724          * rfs4_servinst_assign(). In this case it's not strictly necessary.
1725          */
1726         rfs4_dbe_hold(cp->rc_dbe);
1727         rfs4_servinst_assign(cp, rfs4_cur_servinst);
1728         rfs4_dbe_rele(cp->rc_dbe);
1729 
1730         return (TRUE);
1731 }
1732 
1733 /*
1734  * Caller wants to generate/update the setclientid_confirm verifier
1735  * associated with a client.  This is done during the SETCLIENTID
1736  * processing.
1737  */
1738 void
1739 rfs4_client_scv_next(rfs4_client_t *cp)
1740 {
1741         scid_confirm_verf *scvp;
1742 
1743         /* Init the value for the SETCLIENTID_CONFIRM verifier */
1744         scvp = (scid_confirm_verf *)&cp->rc_confirm_verf;
1745         scvp->cv_impl.gen_num++;
1746 }
1747 
1748 void
1749 rfs4_client_rele(rfs4_client_t *cp)
1750 {
1751         rfs4_dbe_rele(cp->rc_dbe);
1752 }
1753 
1754 rfs4_client_t *
1755 rfs4_findclient(nfs_client_id4 *client, bool_t *create, rfs4_client_t *oldcp)
1756 {
1757         rfs4_client_t *cp;


1758 
1759 
1760         if (oldcp) {
1761                 rw_enter(&rfs4_findclient_lock, RW_WRITER);
1762                 rfs4_dbe_hide(oldcp->rc_dbe);
1763         } else {
1764                 rw_enter(&rfs4_findclient_lock, RW_READER);
1765         }
1766 
1767         cp = (rfs4_client_t *)rfs4_dbsearch(rfs4_nfsclnt_idx, client,
1768             create, (void *)client, RFS4_DBS_VALID);
1769 
1770         if (oldcp)
1771                 rfs4_dbe_unhide(oldcp->rc_dbe);
1772 
1773         rw_exit(&rfs4_findclient_lock);
1774 
1775         return (cp);
1776 }
1777 
1778 rfs4_client_t *
1779 rfs4_findclient_by_id(clientid4 clientid, bool_t find_unconfirmed)
1780 {
1781         rfs4_client_t *cp;
1782         bool_t create = FALSE;
1783         cid *cidp = (cid *)&clientid;

1784 
1785         /* If we're a cluster and the nodeid isn't right, short-circuit */
1786         if (cluster_bootflags & CLUSTER_BOOTED && foreign_clientid(cidp))
1787                 return (NULL);
1788 
1789         rw_enter(&rfs4_findclient_lock, RW_READER);
1790 
1791         cp = (rfs4_client_t *)rfs4_dbsearch(rfs4_clientid_idx, &clientid,
1792             &create, NULL, RFS4_DBS_VALID);
1793 
1794         rw_exit(&rfs4_findclient_lock);
1795 
1796         if (cp && cp->rc_need_confirm && find_unconfirmed == FALSE) {
1797                 rfs4_client_rele(cp);
1798                 return (NULL);
1799         } else {
1800                 return (cp);
1801         }
1802 }
1803 
1804 static uint32_t
1805 clntip_hash(void *key)
1806 {
1807         struct sockaddr *addr = key;
1808         int i, len = 0;
1809         uint32_t hash = 0;
1810         char *ptr;
1811 
1812         if (addr->sa_family == AF_INET) {
1813                 struct sockaddr_in *a = (struct sockaddr_in *)addr;
1814                 len = sizeof (struct in_addr);


1882 {
1883         rfs4_clntip_t *cp = (rfs4_clntip_t *)u_entry;
1884         struct sockaddr *ca = (struct sockaddr *)arg;
1885 
1886         /* Copy client's IP address */
1887         if (ca->sa_family == AF_INET)
1888                 bcopy(ca, &cp->ri_addr, sizeof (struct sockaddr_in));
1889         else if (ca->sa_family == AF_INET6)
1890                 bcopy(ca, &cp->ri_addr, sizeof (struct sockaddr_in6));
1891         else
1892                 return (FALSE);
1893         cp->ri_no_referrals = 1;
1894 
1895         return (TRUE);
1896 }
1897 
1898 rfs4_clntip_t *
1899 rfs4_find_clntip(struct sockaddr *addr, bool_t *create)
1900 {
1901         rfs4_clntip_t *cp;

1902 
1903         rw_enter(&rfs4_findclient_lock, RW_READER);
1904 
1905         cp = (rfs4_clntip_t *)rfs4_dbsearch(rfs4_clntip_idx, addr,


1906             create, addr, RFS4_DBS_VALID);
1907 
1908         rw_exit(&rfs4_findclient_lock);
1909 
1910         return (cp);
1911 }
1912 
1913 void
1914 rfs4_invalidate_clntip(struct sockaddr *addr)
1915 {
1916         rfs4_clntip_t *cp;
1917         bool_t create = FALSE;

1918 
1919         rw_enter(&rfs4_findclient_lock, RW_READER);
1920 
1921         cp = (rfs4_clntip_t *)rfs4_dbsearch(rfs4_clntip_idx, addr,
1922             &create, NULL, RFS4_DBS_VALID);
1923         if (cp == NULL) {
1924                 rw_exit(&rfs4_findclient_lock);
1925                 return;
1926         }
1927         rfs4_dbe_invalidate(cp->ri_dbe);
1928         rfs4_dbe_rele(cp->ri_dbe);
1929 
1930         rw_exit(&rfs4_findclient_lock);
1931 }
1932 
1933 bool_t
1934 rfs4_lease_expired(rfs4_client_t *cp)
1935 {
1936         bool_t rc;
1937 
1938         rfs4_dbe_lock(cp->rc_dbe);
1939 
1940         /*
1941          * If the admin has executed clear_locks for this
1942          * client id, force expire will be set, so no need
1943          * to calculate anything because it's "outa here".
1944          */
1945         if (cp->rc_forced_expire) {
1946                 rc = TRUE;
1947         } else {
1948                 rc = (gethrestime_sec() - cp->rc_last_access > rfs4_lease_time);
1949         }
1950 


2058 
2059         /* Free the lock owner id */
2060         kmem_free(oo->ro_owner.owner_val, oo->ro_owner.owner_len);
2061 }
2062 
2063 void
2064 rfs4_openowner_rele(rfs4_openowner_t *oo)
2065 {
2066         rfs4_dbe_rele(oo->ro_dbe);
2067 }
2068 
2069 static bool_t
2070 rfs4_openowner_create(rfs4_entry_t u_entry, void *arg)
2071 {
2072         rfs4_openowner_t *oo = (rfs4_openowner_t *)u_entry;
2073         rfs4_openowner_t *argp = (rfs4_openowner_t *)arg;
2074         open_owner4 *openowner = &argp->ro_owner;
2075         seqid4 seqid = argp->ro_open_seqid;
2076         rfs4_client_t *cp;
2077         bool_t create = FALSE;

2078 
2079         rw_enter(&rfs4_findclient_lock, RW_READER);
2080 
2081         cp = (rfs4_client_t *)rfs4_dbsearch(rfs4_clientid_idx,
2082             &openowner->clientid,
2083             &create, NULL, RFS4_DBS_VALID);
2084 
2085         rw_exit(&rfs4_findclient_lock);
2086 
2087         if (cp == NULL)
2088                 return (FALSE);
2089 
2090         oo->ro_reply_fh.nfs_fh4_len = 0;
2091         oo->ro_reply_fh.nfs_fh4_val = NULL;
2092 
2093         oo->ro_owner.clientid = openowner->clientid;
2094         oo->ro_owner.owner_val =
2095             kmem_alloc(openowner->owner_len, KM_SLEEP);
2096 
2097         bcopy(openowner->owner_val,
2098             oo->ro_owner.owner_val, openowner->owner_len);
2099 
2100         oo->ro_owner.owner_len = openowner->owner_len;
2101 
2102         oo->ro_need_confirm = TRUE;
2103 
2104         rfs4_sw_init(&oo->ro_sw);
2105 


2107         bzero(&oo->ro_reply, sizeof (nfs_resop4));
2108         oo->ro_client = cp;
2109         oo->ro_cr_set = NULL;
2110 
2111         list_create(&oo->ro_statelist, sizeof (rfs4_state_t),
2112             offsetof(rfs4_state_t, rs_node));
2113 
2114         /* Insert openowner into client's open owner list */
2115         rfs4_dbe_lock(cp->rc_dbe);
2116         list_insert_tail(&cp->rc_openownerlist, oo);
2117         rfs4_dbe_unlock(cp->rc_dbe);
2118 
2119         return (TRUE);
2120 }
2121 
2122 rfs4_openowner_t *
2123 rfs4_findopenowner(open_owner4 *openowner, bool_t *create, seqid4 seqid)
2124 {
2125         rfs4_openowner_t *oo;
2126         rfs4_openowner_t arg;

2127 
2128         arg.ro_owner = *openowner;
2129         arg.ro_open_seqid = seqid;
2130         oo = (rfs4_openowner_t *)rfs4_dbsearch(rfs4_openowner_idx, openowner,

2131             create, &arg, RFS4_DBS_VALID);
2132 
2133         return (oo);
2134 }
2135 
2136 void
2137 rfs4_update_open_sequence(rfs4_openowner_t *oo)
2138 {
2139 
2140         rfs4_dbe_lock(oo->ro_dbe);
2141 
2142         oo->ro_open_seqid++;
2143 
2144         rfs4_dbe_unlock(oo->ro_dbe);
2145 }
2146 
2147 void
2148 rfs4_update_open_resp(rfs4_openowner_t *oo, nfs_resop4 *resp, nfs_fh4 *fh)
2149 {
2150 


2253 }
2254 
2255 /* ARGSUSED */
2256 static bool_t
2257 rfs4_lockowner_expiry(rfs4_entry_t u_entry)
2258 {
2259         /*
2260          * Since expiry is called with no other references on
2261          * this struct, go ahead and have it removed.
2262          */
2263         return (TRUE);
2264 }
2265 
2266 static bool_t
2267 rfs4_lockowner_create(rfs4_entry_t u_entry, void *arg)
2268 {
2269         rfs4_lockowner_t *lo = (rfs4_lockowner_t *)u_entry;
2270         lock_owner4 *lockowner = (lock_owner4 *)arg;
2271         rfs4_client_t *cp;
2272         bool_t create = FALSE;

2273 
2274         rw_enter(&rfs4_findclient_lock, RW_READER);
2275 
2276         cp = (rfs4_client_t *)rfs4_dbsearch(rfs4_clientid_idx,
2277             &lockowner->clientid,
2278             &create, NULL, RFS4_DBS_VALID);
2279 
2280         rw_exit(&rfs4_findclient_lock);
2281 
2282         if (cp == NULL)
2283                 return (FALSE);
2284 
2285         /* Reference client */
2286         lo->rl_client = cp;
2287         lo->rl_owner.clientid = lockowner->clientid;
2288         lo->rl_owner.owner_val = kmem_alloc(lockowner->owner_len, KM_SLEEP);
2289         bcopy(lockowner->owner_val, lo->rl_owner.owner_val,
2290             lockowner->owner_len);
2291         lo->rl_owner.owner_len = lockowner->owner_len;
2292         lo->rl_pid = rfs4_dbe_getid(lo->rl_dbe);
2293 
2294         return (TRUE);
2295 }
2296 
2297 rfs4_lockowner_t *
2298 rfs4_findlockowner(lock_owner4 *lockowner, bool_t *create)
2299 {
2300         rfs4_lockowner_t *lo;

2301 
2302         lo = (rfs4_lockowner_t *)rfs4_dbsearch(rfs4_lockowner_idx, lockowner,

2303             create, lockowner, RFS4_DBS_VALID);
2304 
2305         return (lo);
2306 }
2307 
2308 rfs4_lockowner_t *
2309 rfs4_findlockowner_by_pid(pid_t pid)
2310 {
2311         rfs4_lockowner_t *lo;
2312         bool_t create = FALSE;

2313 
2314         lo = (rfs4_lockowner_t *)rfs4_dbsearch(rfs4_lockowner_pid_idx,
2315             (void *)(uintptr_t)pid, &create, NULL, RFS4_DBS_VALID);
2316 
2317         return (lo);
2318 }
2319 
2320 
2321 static uint32_t
2322 file_hash(void *key)
2323 {
2324         return (ADDRHASH(key));
2325 }
2326 
2327 static void *
2328 file_mkkey(rfs4_entry_t u_entry)
2329 {
2330         rfs4_file_t *fp = (rfs4_file_t *)u_entry;
2331 
2332         return (fp->rf_vp);
2333 }
2334 


2405 
2406         mutex_init(fp->rf_dinfo.rd_recall_lock, NULL, MUTEX_DEFAULT, NULL);
2407         cv_init(fp->rf_dinfo.rd_recall_cv, NULL, CV_DEFAULT, NULL);
2408 
2409         fp->rf_dinfo.rd_dtype = OPEN_DELEGATE_NONE;
2410 
2411         rw_init(&fp->rf_file_rwlock, NULL, RW_DEFAULT, NULL);
2412 
2413         mutex_enter(&vp->v_vsd_lock);
2414         VERIFY(vsd_set(vp, nfs4_srv_vkey, (void *)fp) == 0);
2415         mutex_exit(&vp->v_vsd_lock);
2416 
2417         return (TRUE);
2418 }
2419 
2420 rfs4_file_t *
2421 rfs4_findfile(vnode_t *vp, nfs_fh4 *fh, bool_t *create)
2422 {
2423         rfs4_file_t *fp;
2424         rfs4_fcreate_arg arg;

2425 
2426         arg.vp = vp;
2427         arg.fh = fh;
2428 
2429         if (*create == TRUE)
2430                 fp = (rfs4_file_t *)rfs4_dbsearch(rfs4_file_idx, vp, create,

2431                     &arg, RFS4_DBS_VALID);
2432         else {
2433                 mutex_enter(&vp->v_vsd_lock);
2434                 fp = (rfs4_file_t *)vsd_get(vp, nfs4_srv_vkey);
2435                 if (fp) {
2436                         rfs4_dbe_lock(fp->rf_dbe);
2437                         if (rfs4_dbe_is_invalid(fp->rf_dbe) ||
2438                             (rfs4_dbe_refcnt(fp->rf_dbe) == 0)) {
2439                                 rfs4_dbe_unlock(fp->rf_dbe);
2440                                 fp = NULL;
2441                         } else {
2442                                 rfs4_dbe_hold(fp->rf_dbe);
2443                                 rfs4_dbe_unlock(fp->rf_dbe);
2444                         }
2445                 }
2446                 mutex_exit(&vp->v_vsd_lock);
2447         }
2448         return (fp);
2449 }
2450 
2451 /*
2452  * Find a file in the db and once it is located, take the rw lock.
2453  * Need to check the vnode pointer and if it does not exist (it was
2454  * removed between the db location and check) redo the find.  This
2455  * assumes that a file struct that has a NULL vnode pointer is marked
2456  * at 'invalid' and will not be found in the db the second time
2457  * around.
2458  */
2459 rfs4_file_t *
2460 rfs4_findfile_withlock(vnode_t *vp, nfs_fh4 *fh, bool_t *create)
2461 {
2462         rfs4_file_t *fp;
2463         rfs4_fcreate_arg arg;
2464         bool_t screate = *create;

2465 
2466         if (screate == FALSE) {
2467                 mutex_enter(&vp->v_vsd_lock);
2468                 fp = (rfs4_file_t *)vsd_get(vp, nfs4_srv_vkey);
2469                 if (fp) {
2470                         rfs4_dbe_lock(fp->rf_dbe);
2471                         if (rfs4_dbe_is_invalid(fp->rf_dbe) ||
2472                             (rfs4_dbe_refcnt(fp->rf_dbe) == 0)) {
2473                                 rfs4_dbe_unlock(fp->rf_dbe);
2474                                 mutex_exit(&vp->v_vsd_lock);
2475                                 fp = NULL;
2476                         } else {
2477                                 rfs4_dbe_hold(fp->rf_dbe);
2478                                 rfs4_dbe_unlock(fp->rf_dbe);
2479                                 mutex_exit(&vp->v_vsd_lock);
2480                                 rw_enter(&fp->rf_file_rwlock, RW_WRITER);
2481                                 if (fp->rf_vp == NULL) {
2482                                         rw_exit(&fp->rf_file_rwlock);
2483                                         rfs4_file_rele(fp);
2484                                         fp = NULL;
2485                                 }
2486                         }
2487                 } else {
2488                         mutex_exit(&vp->v_vsd_lock);
2489                 }
2490         } else {
2491 retry:
2492                 arg.vp = vp;
2493                 arg.fh = fh;
2494 
2495                 fp = (rfs4_file_t *)rfs4_dbsearch(rfs4_file_idx, vp, create,
2496                     &arg, RFS4_DBS_VALID);
2497                 if (fp != NULL) {
2498                         rw_enter(&fp->rf_file_rwlock, RW_WRITER);
2499                         if (fp->rf_vp == NULL) {
2500                                 rw_exit(&fp->rf_file_rwlock);
2501                                 rfs4_file_rele(fp);
2502                                 *create = screate;
2503                                 goto retry;
2504                         }
2505                 }
2506         }
2507 
2508         return (fp);
2509 }
2510 
2511 static uint32_t
2512 lo_state_hash(void *key)
2513 {
2514         stateid_t *id = key;
2515 
2516         return (id->bits.ident+id->bits.pid);


2631         list_insert_tail(&sp->rs_lostatelist, lsp);
2632         rfs4_dbe_hold(sp->rs_dbe);
2633         rfs4_dbe_unlock(sp->rs_dbe);
2634 
2635         return (TRUE);
2636 }
2637 
2638 void
2639 rfs4_lo_state_rele(rfs4_lo_state_t *lsp, bool_t unlock_fp)
2640 {
2641         if (unlock_fp == TRUE)
2642                 rw_exit(&lsp->rls_state->rs_finfo->rf_file_rwlock);
2643         rfs4_dbe_rele(lsp->rls_dbe);
2644 }
2645 
2646 static rfs4_lo_state_t *
2647 rfs4_findlo_state(stateid_t *id, bool_t lock_fp)
2648 {
2649         rfs4_lo_state_t *lsp;
2650         bool_t create = FALSE;

2651 
2652         lsp = (rfs4_lo_state_t *)rfs4_dbsearch(rfs4_lo_state_idx, id,
2653             &create, NULL, RFS4_DBS_VALID);
2654         if (lock_fp == TRUE && lsp != NULL)
2655                 rw_enter(&lsp->rls_state->rs_finfo->rf_file_rwlock, RW_READER);
2656 
2657         return (lsp);
2658 }
2659 
2660 
2661 static uint32_t
2662 lo_state_lo_hash(void *key)
2663 {
2664         rfs4_lo_state_t *lsp = key;
2665 
2666         return (ADDRHASH(lsp->rls_locker) ^ ADDRHASH(lsp->rls_state));
2667 }
2668 
2669 static bool_t
2670 lo_state_lo_compare(rfs4_entry_t u_entry, void *key)
2671 {
2672         rfs4_lo_state_t *lsp = (rfs4_lo_state_t *)u_entry;
2673         rfs4_lo_state_t *keyp = key;
2674 
2675         return (keyp->rls_locker == lsp->rls_locker &&
2676             keyp->rls_state == lsp->rls_state);
2677 }
2678 
2679 static void *
2680 lo_state_lo_mkkey(rfs4_entry_t u_entry)
2681 {
2682         return (u_entry);
2683 }
2684 
2685 rfs4_lo_state_t *
2686 rfs4_findlo_state_by_owner(rfs4_lockowner_t *lo, rfs4_state_t *sp,
2687     bool_t *create)
2688 {
2689         rfs4_lo_state_t *lsp;
2690         rfs4_lo_state_t arg;

2691 
2692         arg.rls_locker = lo;
2693         arg.rls_state = sp;
2694 
2695         lsp = (rfs4_lo_state_t *)rfs4_dbsearch(rfs4_lo_state_owner_idx, &arg,
2696             create, &arg, RFS4_DBS_VALID);
2697 
2698         return (lsp);
2699 }
2700 
2701 static stateid_t
2702 get_stateid(id_t eid)
2703 {
2704         stateid_t id;

2705 
2706         id.bits.boottime = rfs4_start_time;


2707         id.bits.ident = eid;
2708         id.bits.chgseq = 0;
2709         id.bits.type = 0;
2710         id.bits.pid = 0;
2711 
2712         /*
2713          * If we are booted as a cluster node, embed our nodeid.
2714          * We've already done sanity checks in rfs4_client_create() so no
2715          * need to repeat them here.
2716          */
2717         id.bits.clnodeid = (cluster_bootflags & CLUSTER_BOOTED) ?
2718             clconf_get_nodeid() : 0;
2719 
2720         return (id);
2721 }
2722 
2723 /*
2724  * For use only when booted as a cluster node.
2725  * Returns TRUE if the embedded nodeid indicates that this stateid was
2726  * generated on another node.


2942 rfs4_deleg_state_destroy(rfs4_entry_t u_entry)
2943 {
2944         rfs4_deleg_state_t *dsp = (rfs4_deleg_state_t *)u_entry;
2945 
2946         /* return delegation if necessary */
2947         rfs4_return_deleg(dsp, FALSE);
2948 
2949         /* Were done with the file */
2950         rfs4_file_rele(dsp->rds_finfo);
2951         dsp->rds_finfo = NULL;
2952 
2953         /* And now with the openowner */
2954         rfs4_client_rele(dsp->rds_client);
2955         dsp->rds_client = NULL;
2956 }
2957 
2958 rfs4_deleg_state_t *
2959 rfs4_finddeleg(rfs4_state_t *sp, bool_t *create)
2960 {
2961         rfs4_deleg_state_t ds, *dsp;

2962 
2963         ds.rds_client = sp->rs_owner->ro_client;
2964         ds.rds_finfo = sp->rs_finfo;
2965 
2966         dsp = (rfs4_deleg_state_t *)rfs4_dbsearch(rfs4_deleg_idx, &ds,
2967             create, &ds, RFS4_DBS_VALID);
2968 
2969         return (dsp);
2970 }
2971 
2972 rfs4_deleg_state_t *
2973 rfs4_finddelegstate(stateid_t *id)
2974 {
2975         rfs4_deleg_state_t *dsp;
2976         bool_t create = FALSE;

2977 
2978         dsp = (rfs4_deleg_state_t *)rfs4_dbsearch(rfs4_deleg_state_idx, id,
2979             &create, NULL, RFS4_DBS_VALID);
2980 
2981         return (dsp);
2982 }
2983 
2984 void
2985 rfs4_deleg_state_rele(rfs4_deleg_state_t *dsp)
2986 {
2987         rfs4_dbe_rele(dsp->rds_dbe);
2988 }
2989 
2990 void
2991 rfs4_update_lock_sequence(rfs4_lo_state_t *lsp)
2992 {
2993 
2994         rfs4_dbe_lock(lsp->rls_dbe);
2995 
2996         /*
2997          * If we are skipping sequence id checking, this means that
2998          * this is the first lock request and therefore the sequence
2999          * id does not need to be updated.  This only happens on the


3078         if (sp->rs_closed == TRUE)
3079                 return (FALSE);
3080 
3081         return (fp == sp->rs_finfo);
3082 }
3083 
3084 static void *
3085 state_file_mkkey(rfs4_entry_t u_entry)
3086 {
3087         rfs4_state_t *sp = (rfs4_state_t *)u_entry;
3088 
3089         return (sp->rs_finfo);
3090 }
3091 
3092 rfs4_state_t *
3093 rfs4_findstate_by_owner_file(rfs4_openowner_t *oo, rfs4_file_t *fp,
3094         bool_t *create)
3095 {
3096         rfs4_state_t *sp;
3097         rfs4_state_t key;

3098 
3099         key.rs_owner = oo;
3100         key.rs_finfo = fp;
3101 
3102         sp = (rfs4_state_t *)rfs4_dbsearch(rfs4_state_owner_file_idx, &key,
3103             create, &key, RFS4_DBS_VALID);
3104 
3105         return (sp);
3106 }
3107 
3108 /* This returns ANY state struct that refers to this file */
3109 static rfs4_state_t *
3110 rfs4_findstate_by_file(rfs4_file_t *fp)
3111 {
3112         bool_t create = FALSE;

3113 
3114         return ((rfs4_state_t *)rfs4_dbsearch(rfs4_state_file_idx, fp,
3115             &create, fp, RFS4_DBS_VALID));
3116 }
3117 
3118 static bool_t
3119 rfs4_state_expiry(rfs4_entry_t u_entry)
3120 {
3121         rfs4_state_t *sp = (rfs4_state_t *)u_entry;
3122 
3123         if (rfs4_dbe_is_invalid(sp->rs_dbe))
3124                 return (TRUE);
3125 
3126         if (sp->rs_closed == TRUE &&
3127             ((gethrestime_sec() - rfs4_dbe_get_timerele(sp->rs_dbe))
3128             > rfs4_lease_time))
3129                 return (TRUE);
3130 
3131         return ((gethrestime_sec() - sp->rs_owner->ro_client->rc_last_access
3132             > rfs4_lease_time));
3133 }
3134 


3145         sp->rs_stateid.bits.type = OPENID;
3146         sp->rs_owner = oo;
3147         sp->rs_finfo = fp;
3148 
3149         list_create(&sp->rs_lostatelist, sizeof (rfs4_lo_state_t),
3150             offsetof(rfs4_lo_state_t, rls_node));
3151 
3152         /* Insert state on per open owner's list */
3153         rfs4_dbe_lock(oo->ro_dbe);
3154         list_insert_tail(&oo->ro_statelist, sp);
3155         rfs4_dbe_unlock(oo->ro_dbe);
3156 
3157         return (TRUE);
3158 }
3159 
3160 static rfs4_state_t *
3161 rfs4_findstate(stateid_t *id, rfs4_dbsearch_type_t find_invalid, bool_t lock_fp)
3162 {
3163         rfs4_state_t *sp;
3164         bool_t create = FALSE;

3165 
3166         sp = (rfs4_state_t *)rfs4_dbsearch(rfs4_state_idx, id,
3167             &create, NULL, find_invalid);
3168         if (lock_fp == TRUE && sp != NULL)
3169                 rw_enter(&sp->rs_finfo->rf_file_rwlock, RW_READER);
3170 
3171         return (sp);
3172 }
3173 
3174 void
3175 rfs4_state_close(rfs4_state_t *sp, bool_t lock_held, bool_t close_of_client,
3176     cred_t *cr)
3177 {
3178         /* Remove the associated lo_state owners */
3179         if (!lock_held)
3180                 rfs4_dbe_lock(sp->rs_dbe);
3181 
3182         /*
3183          * If refcnt == 0, the dbe is about to be destroyed.
3184          * lock state will be released by the reaper thread.
3185          */
3186 


3214 }
3215 
3216 void
3217 rfs4_client_close(rfs4_client_t *cp)
3218 {
3219         /* Mark client as going away. */
3220         rfs4_dbe_lock(cp->rc_dbe);
3221         rfs4_dbe_invalidate(cp->rc_dbe);
3222         rfs4_dbe_unlock(cp->rc_dbe);
3223 
3224         rfs4_client_state_remove(cp);
3225 
3226         /* Release the client */
3227         rfs4_client_rele(cp);
3228 }
3229 
3230 nfsstat4
3231 rfs4_check_clientid(clientid4 *cp, int setclid_confirm)
3232 {
3233         cid *cidp = (cid *) cp;

3234 


3235         /*
3236          * If we are booted as a cluster node, check the embedded nodeid.
3237          * If it indicates that this clientid was generated on another node,
3238          * inform the client accordingly.
3239          */
3240         if (cluster_bootflags & CLUSTER_BOOTED && foreign_clientid(cidp))
3241                 return (NFS4ERR_STALE_CLIENTID);
3242 
3243         /*
3244          * If the server start time matches the time provided
3245          * by the client (via the clientid) and this is NOT a
3246          * setclientid_confirm then return EXPIRED.
3247          */
3248         if (!setclid_confirm && cidp->impl_id.start_time == rfs4_start_time)

3249                 return (NFS4ERR_EXPIRED);
3250 
3251         return (NFS4ERR_STALE_CLIENTID);
3252 }
3253 
3254 /*
3255  * This is used when a stateid has not been found amongst the
3256  * current server's state.  Check the stateid to see if it
3257  * was from this server instantiation or not.
3258  */
3259 static nfsstat4
3260 what_stateid_error(stateid_t *id, stateid_type_t type)
3261 {




3262         /* If we are booted as a cluster node, was stateid locally generated? */
3263         if ((cluster_bootflags & CLUSTER_BOOTED) && foreign_stateid(id))
3264                 return (NFS4ERR_STALE_STATEID);
3265 
3266         /* If types don't match then no use checking further */
3267         if (type != id->bits.type)
3268                 return (NFS4ERR_BAD_STATEID);
3269 
3270         /* From a different server instantiation, return STALE */
3271         if (id->bits.boottime != rfs4_start_time)
3272                 return (NFS4ERR_STALE_STATEID);
3273 
3274         /*
3275          * From this server but the state is most likely beyond lease
3276          * timeout: return NFS4ERR_EXPIRED.  However, there is the
3277          * case of a delegation stateid.  For delegations, there is a
3278          * case where the state can be removed without the client's
3279          * knowledge/consent: revocation.  In the case of delegation
3280          * revocation, the delegation state will be removed and will
3281          * not be found.  If the client does something like a
3282          * DELEGRETURN or even a READ/WRITE with a delegatoin stateid
3283          * that has been revoked, the server should return BAD_STATEID
3284          * instead of the more common EXPIRED error.
3285          */
3286         if (id->bits.boottime == rfs4_start_time) {
3287                 if (type == DELEGID)
3288                         return (NFS4ERR_BAD_STATEID);
3289                 else
3290                         return (NFS4ERR_EXPIRED);
3291         }
3292 
3293         return (NFS4ERR_BAD_STATEID);
3294 }
3295 
3296 /*
3297  * Used later on to find the various state structs.  When called from
3298  * rfs4_check_stateid()->rfs4_get_all_state(), no file struct lock is
3299  * taken (it is not needed) and helps on the read/write path with
3300  * respect to performance.
3301  */
3302 static nfsstat4
3303 rfs4_get_state_lockit(stateid4 *stateid, rfs4_state_t **spp,
3304     rfs4_dbsearch_type_t find_invalid, bool_t lock_fp)
3305 {
3306         stateid_t *id = (stateid_t *)stateid;


3768 
3769 /*
3770  * This is a special function in that for the file struct provided the
3771  * server wants to remove/close all current state associated with the
3772  * file.  The prime use of this would be with OP_REMOVE to force the
3773  * release of state and particularly of file locks.
3774  *
3775  * There is an assumption that there is no delegations outstanding on
3776  * this file at this point.  The caller should have waited for those
3777  * to be returned or revoked.
3778  */
3779 void
3780 rfs4_close_all_state(rfs4_file_t *fp)
3781 {
3782         rfs4_state_t *sp;
3783 
3784         rfs4_dbe_lock(fp->rf_dbe);
3785 
3786 #ifdef DEBUG
3787         /* only applies when server is handing out delegations */
3788         if (rfs4_deleg_policy != SRV_NEVER_DELEGATE)
3789                 ASSERT(fp->rf_dinfo.rd_hold_grant > 0);
3790 #endif
3791 
3792         /* No delegations for this file */
3793         ASSERT(list_is_empty(&fp->rf_delegstatelist));
3794 
3795         /* Make sure that it can not be found */
3796         rfs4_dbe_invalidate(fp->rf_dbe);
3797 
3798         if (fp->rf_vp == NULL) {
3799                 rfs4_dbe_unlock(fp->rf_dbe);
3800                 return;
3801         }
3802         rfs4_dbe_unlock(fp->rf_dbe);
3803 
3804         /*
3805          * Hold as writer to prevent other server threads from
3806          * processing requests related to the file while all state is
3807          * being removed.
3808          */


3974                             OPEN_DELEGATE_WRITE) {
3975                                 (void) fem_uninstall(vp, deleg_wrops,
3976                                     (void *)fp);
3977                                 vn_open_downgrade(vp, FREAD|FWRITE);
3978                         }
3979                         mutex_enter(&vp->v_vsd_lock);
3980                         (void) vsd_set(vp, nfs4_srv_vkey, NULL);
3981                         mutex_exit(&vp->v_vsd_lock);
3982                         VN_RELE(vp);
3983                         fp->rf_vp = NULL;
3984                 }
3985                 rfs4_dbe_invalidate(fp->rf_dbe);
3986         }
3987 }
3988 
3989 /*
3990  * Given a directory that is being unexported, cleanup/release all
3991  * state in the server that refers to objects residing underneath this
3992  * particular export.  The ordering of the release is important.
3993  * Lock_owner, then state and then file.




3994  */
3995 void
3996 rfs4_clean_state_exi(struct exportinfo *exi)
3997 {
3998         mutex_enter(&rfs4_state_lock);

3999 
4000         if (rfs4_server_state == NULL) {
4001                 mutex_exit(&rfs4_state_lock);






4002                 return;
4003         }
4004 
4005         rfs4_dbe_walk(rfs4_lo_state_tab, rfs4_lo_state_walk_callout, exi);
4006         rfs4_dbe_walk(rfs4_state_tab, rfs4_state_walk_callout, exi);
4007         rfs4_dbe_walk(rfs4_deleg_state_tab, rfs4_deleg_state_walk_callout, exi);
4008         rfs4_dbe_walk(rfs4_file_tab, rfs4_file_walk_callout, exi);


4009 
4010         mutex_exit(&rfs4_state_lock);
4011 }
   1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.

  24  */
  25 
  26 /*
  27  * Copyright 2018 Nexenta Systems, Inc.
  28  * Copyright 2019 Nexenta by DDN, Inc.
  29  */
  30 
  31 #include <sys/systm.h>
  32 #include <sys/kmem.h>
  33 #include <sys/cmn_err.h>
  34 #include <sys/atomic.h>
  35 #include <sys/clconf.h>
  36 #include <sys/cladm.h>
  37 #include <sys/flock.h>
  38 #include <nfs/export.h>
  39 #include <nfs/nfs.h>
  40 #include <nfs/nfs4.h>
  41 #include <nfs/nfssys.h>
  42 #include <nfs/lm.h>
  43 #include <sys/pathname.h>
  44 #include <sys/sdt.h>
  45 #include <sys/nvpair.h>
  46 
  47 extern u_longlong_t nfs4_srv_caller_id;
  48 

  49 extern uint_t nfs4_srv_vkey;
  50 
  51 stateid4 special0 = {
  52         0,
  53         { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }
  54 };
  55 
  56 stateid4 special1 = {
  57         0xffffffff,
  58         {
  59                 (char)0xff, (char)0xff, (char)0xff, (char)0xff,
  60                 (char)0xff, (char)0xff, (char)0xff, (char)0xff,
  61                 (char)0xff, (char)0xff, (char)0xff, (char)0xff
  62         }
  63 };
  64 
  65 
  66 #define ISSPECIAL(id)  (stateid4_cmp(id, &special0) || \
  67                         stateid4_cmp(id, &special1))
  68 
  69 /* For embedding the cluster nodeid into our clientid */
  70 #define CLUSTER_NODEID_SHIFT    24
  71 #define CLUSTER_MAX_NODEID      255
  72 
  73 #ifdef DEBUG
  74 int rfs4_debug;
  75 #endif
  76 
  77 static uint32_t rfs4_database_debug = 0x00;
  78 
  79 /* CSTYLED */
  80 static void rfs4_ss_clid_write(nfs4_srv_t *nsrv4, rfs4_client_t *cp, char *leaf);
  81 static void rfs4_ss_clid_write_one(rfs4_client_t *cp, char *dir, char *leaf);
  82 static void rfs4_dss_clear_oldstate(rfs4_servinst_t *sip);
  83 static void rfs4_ss_chkclid_sip(rfs4_client_t *cp, rfs4_servinst_t *sip);
  84 
  85 /*
  86  * Couple of simple init/destroy functions for a general waiter
  87  */
  88 void
  89 rfs4_sw_init(rfs4_state_wait_t *swp)
  90 {
  91         mutex_init(swp->sw_cv_lock, NULL, MUTEX_DEFAULT, NULL);
  92         cv_init(swp->sw_cv, NULL, CV_DEFAULT, NULL);
  93         swp->sw_active = FALSE;
  94         swp->sw_wait_count = 0;
  95 }
  96 
  97 void
  98 rfs4_sw_destroy(rfs4_state_wait_t *swp)
  99 {
 100         mutex_destroy(swp->sw_cv_lock);


 109                 swp->sw_wait_count++;
 110                 cv_wait(swp->sw_cv, swp->sw_cv_lock);
 111                 swp->sw_wait_count--;
 112         }
 113         ASSERT(swp->sw_active == FALSE);
 114         swp->sw_active = TRUE;
 115         mutex_exit(swp->sw_cv_lock);
 116 }
 117 
 118 void
 119 rfs4_sw_exit(rfs4_state_wait_t *swp)
 120 {
 121         mutex_enter(swp->sw_cv_lock);
 122         ASSERT(swp->sw_active == TRUE);
 123         swp->sw_active = FALSE;
 124         if (swp->sw_wait_count != 0)
 125                 cv_broadcast(swp->sw_cv);
 126         mutex_exit(swp->sw_cv_lock);
 127 }
 128 





 129 static void
 130 deep_lock_copy(LOCK4res *dres, LOCK4res *sres)
 131 {
 132         lock_owner4 *slo = &sres->LOCK4res_u.denied.owner;
 133         lock_owner4 *dlo = &dres->LOCK4res_u.denied.owner;
 134 
 135         if (sres->status == NFS4ERR_DENIED) {
 136                 dlo->owner_val = kmem_alloc(slo->owner_len, KM_SLEEP);
 137                 bcopy(slo->owner_val, dlo->owner_val, slo->owner_len);
 138         }
 139 }
 140 
 141 /*
 142  * CPR callback id -- not related to v4 callbacks
 143  */
 144 static callb_id_t cpr_id = 0;
 145 
 146 static void
 147 deep_lock_free(LOCK4res *res)
 148 {
 149         lock_owner4 *lo = &res->LOCK4res_u.denied.owner;
 150 
 151         if (res->status == NFS4ERR_DENIED)
 152                 kmem_free(lo->owner_val, lo->owner_len);
 153 }
 154 
 155 static void
 156 deep_open_copy(OPEN4res *dres, OPEN4res *sres)
 157 {
 158         nfsace4 *sacep, *dacep;
 159 
 160         if (sres->status != NFS4_OK) {
 161                 return;
 162         }
 163 
 164         dres->attrset = sres->attrset;
 165 


 261 /*
 262  * This code is some what prototypical for now. Its purpose currently is to
 263  * implement the interfaces sufficiently to finish the higher protocol
 264  * elements. This will be replaced by a dynamically resizeable tables
 265  * backed by kmem_cache allocator. However synchronization is handled
 266  * correctly (I hope) and will not change by much.  The mutexes for
 267  * the hash buckets that can be used to create new instances of data
 268  * structures  might be good candidates to evolve into reader writer
 269  * locks. If it has to do a creation, it would be holding the
 270  * mutex across a kmem_alloc with KM_SLEEP specified.
 271  */
 272 
 273 #ifdef DEBUG
 274 #define TABSIZE 17
 275 #else
 276 #define TABSIZE 2047
 277 #endif
 278 
 279 #define ADDRHASH(key) ((unsigned long)(key) >> 3)
 280 



































 281 #define MAXTABSZ 1024*1024
 282 
 283 /* The values below are rfs4_lease_time units */
 284 
 285 #ifdef DEBUG
 286 #define CLIENT_CACHE_TIME 1
 287 #define OPENOWNER_CACHE_TIME 1
 288 #define STATE_CACHE_TIME 1
 289 #define LO_STATE_CACHE_TIME 1
 290 #define LOCKOWNER_CACHE_TIME 1
 291 #define FILE_CACHE_TIME 3
 292 #define DELEG_STATE_CACHE_TIME 1
 293 #else
 294 #define CLIENT_CACHE_TIME 10
 295 #define OPENOWNER_CACHE_TIME 5
 296 #define STATE_CACHE_TIME 1
 297 #define LO_STATE_CACHE_TIME 1
 298 #define LOCKOWNER_CACHE_TIME 3
 299 #define FILE_CACHE_TIME 40
 300 #define DELEG_STATE_CACHE_TIME 1
 301 #endif
 302 
 303 /*
 304  * NFSv4 server state databases
 305  *
 306  * Initilized when the module is loaded and used by NFSv4 state tables.
 307  * These kmem_cache databases are global, the tables that make use of these
 308  * are per zone.
 309  */
 310 kmem_cache_t *rfs4_client_mem_cache;
 311 kmem_cache_t *rfs4_clntIP_mem_cache;
 312 kmem_cache_t *rfs4_openown_mem_cache;
 313 kmem_cache_t *rfs4_openstID_mem_cache;
 314 kmem_cache_t *rfs4_lockstID_mem_cache;
 315 kmem_cache_t *rfs4_lockown_mem_cache;
 316 kmem_cache_t *rfs4_file_mem_cache;
 317 kmem_cache_t *rfs4_delegstID_mem_cache;
 318 
 319 /*
 320  * NFSv4 state table functions
 321  */






 322 static bool_t rfs4_client_create(rfs4_entry_t, void *);
 323 static void rfs4_dss_remove_cpleaf(rfs4_client_t *);
 324 static void rfs4_dss_remove_leaf(rfs4_servinst_t *, char *, char *);
 325 static void rfs4_client_destroy(rfs4_entry_t);
 326 static bool_t rfs4_client_expiry(rfs4_entry_t);
 327 static uint32_t clientid_hash(void *);
 328 static bool_t clientid_compare(rfs4_entry_t, void *);
 329 static void *clientid_mkkey(rfs4_entry_t);
 330 static uint32_t nfsclnt_hash(void *);
 331 static bool_t nfsclnt_compare(rfs4_entry_t, void *);
 332 static void *nfsclnt_mkkey(rfs4_entry_t);
 333 static bool_t rfs4_clntip_expiry(rfs4_entry_t);
 334 static void rfs4_clntip_destroy(rfs4_entry_t);
 335 static bool_t rfs4_clntip_create(rfs4_entry_t, void *);
 336 static uint32_t clntip_hash(void *);
 337 static bool_t clntip_compare(rfs4_entry_t, void *);
 338 static void *clntip_mkkey(rfs4_entry_t);
 339 static bool_t rfs4_openowner_create(rfs4_entry_t, void *);
 340 static void rfs4_openowner_destroy(rfs4_entry_t);
 341 static bool_t rfs4_openowner_expiry(rfs4_entry_t);


 667                                         cl_ss->ss_pn = rfs4_ss_movestate(
 668                                             statedir, destdir, dep->d_name);
 669                                 } else {
 670                                         cl_ss->ss_pn = ss_pn;
 671                                 }
 672                                 insque(cl_ss, oldstate);
 673                         } else {
 674                                 rfs4_ss_pnfree(ss_pn);
 675                         }
 676                 }
 677         }
 678 
 679 out:
 680         (void) VOP_CLOSE(dvp, FREAD, 1, (offset_t)0, CRED(), NULL);
 681         VN_RELE(dvp);
 682         if (dirt)
 683                 kmem_free((caddr_t)dirt, RFS4_SS_DIRSIZE);
 684 }
 685 
 686 static void
 687 rfs4_ss_init(nfs4_srv_t *nsrv4)
 688 {
 689         int npaths = 1;
 690         char *default_dss_path = NFS4_DSS_VAR_DIR;
 691 
 692         /* read the default stable storage state */
 693         rfs4_dss_readstate(nsrv4, npaths, &default_dss_path);
 694 
 695         rfs4_ss_enabled = 1;
 696 }
 697 
 698 static void
 699 rfs4_ss_fini(nfs4_srv_t *nsrv4)
 700 {
 701         rfs4_servinst_t *sip;
 702 
 703         mutex_enter(&nsrv4->servinst_lock);
 704         sip = nsrv4->nfs4_cur_servinst;
 705         while (sip != NULL) {
 706                 rfs4_dss_clear_oldstate(sip);
 707                 sip = sip->next;
 708         }
 709         mutex_exit(&nsrv4->servinst_lock);
 710 }
 711 
 712 /*
 713  * Remove all oldstate files referenced by this servinst.
 714  */
 715 static void
 716 rfs4_dss_clear_oldstate(rfs4_servinst_t *sip)
 717 {
 718         rfs4_oldstate_t *os_head, *osp;
 719 
 720         rw_enter(&sip->oldstate_lock, RW_WRITER);
 721         os_head = sip->oldstate;
 722 
 723         if (os_head == NULL) {
 724                 rw_exit(&sip->oldstate_lock);
 725                 return;
 726         }
 727 
 728         /* skip dummy entry */
 729         osp = os_head->next;


 733 
 734                 rfs4_dss_remove_leaf(sip, NFS4_DSS_OLDSTATE_LEAF, leaf);
 735 
 736                 if (osp->cl_id4.id_val)
 737                         kmem_free(osp->cl_id4.id_val, osp->cl_id4.id_len);
 738                 rfs4_ss_pnfree(osp->ss_pn);
 739 
 740                 os_next = osp->next;
 741                 remque(osp);
 742                 kmem_free(osp, sizeof (rfs4_oldstate_t));
 743                 osp = os_next;
 744         }
 745 
 746         rw_exit(&sip->oldstate_lock);
 747 }
 748 
 749 /*
 750  * Form the state and oldstate paths, and read in the stable storage files.
 751  */
 752 void
 753 rfs4_dss_readstate(nfs4_srv_t *nsrv4, int npaths, char **paths)
 754 {
 755         int i;
 756         char *state, *oldstate;
 757 
 758         state = kmem_alloc(MAXPATHLEN, KM_SLEEP);
 759         oldstate = kmem_alloc(MAXPATHLEN, KM_SLEEP);
 760 
 761         for (i = 0; i < npaths; i++) {
 762                 char *path = paths[i];
 763 
 764                 (void) sprintf(state, "%s/%s", path, NFS4_DSS_STATE_LEAF);
 765                 (void) sprintf(oldstate, "%s/%s", path, NFS4_DSS_OLDSTATE_LEAF);
 766 
 767                 /*
 768                  * Populate the current server instance's oldstate list.
 769                  *
 770                  * 1. Read stable storage data from old state directory,
 771                  *    leaving its contents alone.
 772                  *
 773                  * 2. Read stable storage data from state directory,
 774                  *    and move the latter's contents to old state
 775                  *    directory.
 776                  */
 777                 /* CSTYLED */
 778                 rfs4_ss_oldstate(nsrv4->nfs4_cur_servinst->oldstate, oldstate, NULL);
 779                 /* CSTYLED */
 780                 rfs4_ss_oldstate(nsrv4->nfs4_cur_servinst->oldstate, state, oldstate);
 781         }
 782 
 783         kmem_free(state, MAXPATHLEN);
 784         kmem_free(oldstate, MAXPATHLEN);
 785 }
 786 
 787 
 788 /*
 789  * Check if we are still in grace and if the client can be
 790  * granted permission to perform reclaims.
 791  */
 792 void
 793 rfs4_ss_chkclid(nfs4_srv_t *nsrv4, rfs4_client_t *cp)
 794 {
 795         rfs4_servinst_t *sip;
 796 
 797         /*
 798          * It should be sufficient to check the oldstate data for just
 799          * this client's instance. However, since our per-instance
 800          * client grouping is solely temporal, HA-NFSv4 RG failover
 801          * might result in clients of the same RG being partitioned into
 802          * separate instances.
 803          *
 804          * Until the client grouping is improved, we must check the
 805          * oldstate data for all instances with an active grace period.
 806          *
 807          * This also serves as the mechanism to remove stale oldstate data.
 808          * The first time we check an instance after its grace period has
 809          * expired, the oldstate data should be cleared.
 810          *
 811          * Start at the current instance, and walk the list backwards
 812          * to the first.
 813          */
 814         mutex_enter(&nsrv4->servinst_lock);
 815         for (sip = nsrv4->nfs4_cur_servinst; sip != NULL; sip = sip->prev) {
 816                 rfs4_ss_chkclid_sip(cp, sip);
 817 
 818                 /* if the above check found this client, we're done */
 819                 if (cp->rc_can_reclaim)
 820                         break;
 821         }
 822         mutex_exit(&nsrv4->servinst_lock);
 823 }
 824 
 825 static void
 826 rfs4_ss_chkclid_sip(rfs4_client_t *cp, rfs4_servinst_t *sip)
 827 {
 828         rfs4_oldstate_t *osp, *os_head;
 829 
 830         /* short circuit everything if this server instance has no oldstate */
 831         rw_enter(&sip->oldstate_lock, RW_READER);
 832         os_head = sip->oldstate;
 833         rw_exit(&sip->oldstate_lock);
 834         if (os_head == NULL)
 835                 return;
 836 
 837         /*
 838          * If this server instance is no longer in a grace period then
 839          * the client won't be able to reclaim. No further need for this
 840          * instance's oldstate data, so it can be cleared.
 841          */
 842         if (!rfs4_servinst_in_grace(sip))


 852         while (osp != os_head) {
 853                 if (osp->cl_id4.id_len == cp->rc_nfs_client.id_len) {
 854                         if (bcmp(osp->cl_id4.id_val, cp->rc_nfs_client.id_val,
 855                             osp->cl_id4.id_len) == 0) {
 856                                 cp->rc_can_reclaim = 1;
 857                                 break;
 858                         }
 859                 }
 860                 osp = osp->next;
 861         }
 862 
 863         rw_exit(&sip->oldstate_lock);
 864 }
 865 
 866 /*
 867  * Place client information into stable storage: 1/3.
 868  * First, generate the leaf filename, from the client's IP address and
 869  * the server-generated short-hand clientid.
 870  */
 871 void
 872 rfs4_ss_clid(nfs4_srv_t *nsrv4, rfs4_client_t *cp)
 873 {
 874         const char *kinet_ntop6(uchar_t *, char *, size_t);
 875         char leaf[MAXNAMELEN], buf[INET6_ADDRSTRLEN];
 876         struct sockaddr *ca;
 877         uchar_t *b;
 878 
 879         if (rfs4_ss_enabled == 0) {
 880                 return;
 881         }
 882 
 883         buf[0] = 0;
 884 
 885         ca = (struct sockaddr *)&cp->rc_addr;
 886 
 887         /*
 888          * Convert the caller's IP address to a dotted string
 889          */
 890         if (ca->sa_family == AF_INET) {
 891                 b = (uchar_t *)&((struct sockaddr_in *)ca)->sin_addr;
 892                 (void) sprintf(buf, "%03d.%03d.%03d.%03d", b[0] & 0xFF,
 893                     b[1] & 0xFF, b[2] & 0xFF, b[3] & 0xFF);
 894         } else if (ca->sa_family == AF_INET6) {
 895                 struct sockaddr_in6 *sin6;
 896 
 897                 sin6 = (struct sockaddr_in6 *)ca;
 898                 (void) kinet_ntop6((uchar_t *)&sin6->sin6_addr,
 899                     buf, INET6_ADDRSTRLEN);
 900         }
 901 
 902         (void) snprintf(leaf, MAXNAMELEN, "%s-%llx", buf,
 903             (longlong_t)cp->rc_clientid);
 904         rfs4_ss_clid_write(nsrv4, cp, leaf);
 905 }
 906 
 907 /*
 908  * Place client information into stable storage: 2/3.
 909  * DSS: distributed stable storage: the file may need to be written to
 910  * multiple directories.
 911  */
 912 static void
 913 rfs4_ss_clid_write(nfs4_srv_t *nsrv4, rfs4_client_t *cp, char *leaf)
 914 {
 915         rfs4_servinst_t *sip;
 916 
 917         /*
 918          * It should be sufficient to write the leaf file to (all) DSS paths
 919          * associated with just this client's instance. However, since our
 920          * per-instance client grouping is solely temporal, HA-NFSv4 RG
 921          * failover might result in us losing DSS data.
 922          *
 923          * Until the client grouping is improved, we must write the DSS data
 924          * to all instances' paths. Start at the current instance, and
 925          * walk the list backwards to the first.
 926          */
 927         mutex_enter(&nsrv4->servinst_lock);
 928         for (sip = nsrv4->nfs4_cur_servinst; sip != NULL; sip = sip->prev) {
 929                 int i, npaths = sip->dss_npaths;
 930 
 931                 /* write the leaf file to all DSS paths */
 932                 for (i = 0; i < npaths; i++) {
 933                         rfs4_dss_path_t *dss_path = sip->dss_paths[i];
 934 
 935                         /* HA-NFSv4 path might have been failed-away from us */
 936                         if (dss_path == NULL)
 937                                 continue;
 938 
 939                         rfs4_ss_clid_write_one(cp, dss_path->path, leaf);
 940                 }
 941         }
 942         mutex_exit(&nsrv4->servinst_lock);
 943 }
 944 
 945 /*
 946  * Place client information into stable storage: 3/3.
 947  * Write the stable storage data to the requested file.
 948  */
 949 static void
 950 rfs4_ss_clid_write_one(rfs4_client_t *cp, char *dss_path, char *leaf)
 951 {
 952         int ioflag;
 953         int file_vers = NFS4_SS_VERSION;
 954         size_t dirlen;
 955         struct uio uio;
 956         struct iovec iov[4];
 957         char *dir;
 958         rfs4_ss_pn_t *ss_pn;
 959         vnode_t *vp;
 960         nfs_client_id4 *cl_id4 = &(cp->rc_nfs_client);
 961 
 962         /* allow 2 extra bytes for '/' & NUL */


1115                  * for forced expiration
1116                  */
1117                 if (ent_sin->sin_addr.s_addr == clr_in.s_addr) {
1118                         cp->rc_forced_expire = 1;
1119                 }
1120                 break;
1121 
1122         default:
1123                 /* force this assert to fail */
1124                 ASSERT(clr->addr_type != clr->addr_type);
1125         }
1126 }
1127 
1128 /*
1129  * This is called from nfssys() in order to clear server state
1130  * for the specified client IP Address.
1131  */
1132 void
1133 rfs4_clear_client_state(struct nfs4clrst_args *clr)
1134 {
1135         nfs4_srv_t *nsrv4;
1136         nsrv4 = nfs4_get_srv();
1137         (void) rfs4_dbe_walk(nsrv4->rfs4_client_tab, rfs4_client_scrub, clr);
1138 }
1139 
1140 /*
1141  * Used to initialize the NFSv4 server's state or database.  All of
1142  * the tables are created and timers are set.

1143  */
1144 void
1145 rfs4_state_g_init()
1146 {

1147         extern boolean_t rfs4_cpr_callb(void *, int);
1148         /*
1149          * Add a CPR callback so that we can update client
1150          * access times to extend the lease after a suspend
1151          * and resume (using the same class as rpcmod/connmgr)
1152          */
1153         cpr_id = callb_add(rfs4_cpr_callb, 0, CB_CL_CPR_RPC, "rfs4");
1154 
1155         /*
1156          * NFSv4 server state databases
1157          *
1158          * Initialized when the module is loaded and used by NFSv4 state
1159          * tables.  These kmem_cache free pools are used globally, the NFSv4
1160          * state tables which make use of these kmem_cache free pools are per
1161          * zone.
1162          *
1163          * initialize the global kmem_cache free pools which will be used by
1164          * the NFSv4 state tables.
1165          */
1166         /* CSTYLED */
1167         rfs4_client_mem_cache = nfs4_init_mem_cache("Client_entry_cache", 2, sizeof (rfs4_client_t), 0);
1168         /* CSTYLED */
1169         rfs4_clntIP_mem_cache = nfs4_init_mem_cache("ClntIP_entry_cache", 1, sizeof (rfs4_clntip_t), 1);
1170         /* CSTYLED */
1171         rfs4_openown_mem_cache = nfs4_init_mem_cache("OpenOwner_entry_cache", 1, sizeof (rfs4_openowner_t), 2);
1172         /* CSTYLED */
1173         rfs4_openstID_mem_cache = nfs4_init_mem_cache("OpenStateID_entry_cache", 3, sizeof (rfs4_state_t), 3);
1174         /* CSTYLED */
1175         rfs4_lockstID_mem_cache = nfs4_init_mem_cache("LockStateID_entry_cache", 3, sizeof (rfs4_lo_state_t), 4);
1176         /* CSTYLED */
1177         rfs4_lockown_mem_cache = nfs4_init_mem_cache("Lockowner_entry_cache", 2, sizeof (rfs4_lockowner_t), 5);
1178         /* CSTYLED */
1179         rfs4_file_mem_cache = nfs4_init_mem_cache("File_entry_cache", 1, sizeof (rfs4_file_t), 6);
1180         /* CSTYLED */
1181         rfs4_delegstID_mem_cache = nfs4_init_mem_cache("DelegStateID_entry_cache", 2, sizeof (rfs4_deleg_state_t), 7);
1182 
1183         rfs4_client_clrst = rfs4_clear_client_state;
1184 }
1185 
1186 
1187 /*
1188  * Used at server shutdown to cleanup all of the NFSv4 server's structures
1189  * and other state.
1190  */
1191 void
1192 rfs4_state_g_fini()
1193 {
1194         int i;
1195         /*
1196          * Cleanup the CPR callback.

1197          */
1198         if (cpr_id)
1199                 (void) callb_delete(cpr_id);
1200 
1201         rfs4_client_clrst = NULL;
1202 
1203         /* free the NFSv4 state databases */
1204         for (i = 0; i < RFS4_DB_MEM_CACHE_NUM; i++) {
1205                 kmem_cache_destroy(rfs4_db_mem_cache_table[i].r_db_mem_cache);
1206                 rfs4_db_mem_cache_table[i].r_db_mem_cache = NULL;
1207         }
1208 
1209         rfs4_client_mem_cache = NULL;
1210         rfs4_clntIP_mem_cache = NULL;
1211         rfs4_openown_mem_cache = NULL;
1212         rfs4_openstID_mem_cache = NULL;
1213         rfs4_lockstID_mem_cache = NULL;
1214         rfs4_lockown_mem_cache = NULL;
1215         rfs4_file_mem_cache = NULL;
1216         rfs4_delegstID_mem_cache = NULL;
1217 
1218         /* DSS: distributed stable storage */
1219         nvlist_free(rfs4_dss_oldpaths);
1220         nvlist_free(rfs4_dss_paths);
1221         rfs4_dss_paths = rfs4_dss_oldpaths = NULL;
1222 }
1223 
1224 /*
1225  * Used to initialize the per zone NFSv4 server's state
1226  */
1227 void
1228 rfs4_state_zone_init(nfs4_srv_t *nsrv4)
1229 {
1230         time_t start_time;
1231         int start_grace;
1232         char *dss_path = NFS4_DSS_VAR_DIR;
1233 
1234         /* DSS: distributed stable storage: initialise served paths list */
1235         nsrv4->dss_pathlist = NULL;
1236 
1237         /*
1238          * Set the boot time.  If the server
1239          * has been restarted quickly and has had the opportunity to
1240          * service clients, then the start_time needs to be bumped
1241          * regardless.  A small window but it exists...
1242          */
1243         start_time = gethrestime_sec();
1244         if (nsrv4->rfs4_start_time < start_time)
1245                 nsrv4->rfs4_start_time = start_time;
1246         else
1247                 nsrv4->rfs4_start_time++;
1248 



1249         /*
1250          * Create the first server instance, or a new one if the server has
1251          * been restarted; see above comments on rfs4_start_time. Don't
1252          * start its grace period; that will be done later, to maximise the
1253          * clients' recovery window.
1254          */
1255         start_grace = 0;
1256         if (curzone == global_zone && rfs4_dss_numnewpaths > 0) {
1257                 int i;
1258                 char **dss_allpaths = NULL;
1259                 dss_allpaths = kmem_alloc(sizeof (char *) *
1260                     (rfs4_dss_numnewpaths + 1), KM_SLEEP);
1261                 /*
1262                  * Add the default path into the list of paths for saving
1263                  * state informantion.
1264                  */
1265                 dss_allpaths[0] = dss_path;
1266                 for (i = 0; i < rfs4_dss_numnewpaths; i++) {
1267                         dss_allpaths[i + 1] = rfs4_dss_newpaths[i];
1268                 }
1269                 rfs4_servinst_create(nsrv4, start_grace,
1270                     (rfs4_dss_numnewpaths + 1), dss_allpaths);
1271                 kmem_free(dss_allpaths,
1272                     (sizeof (char *) * (rfs4_dss_numnewpaths + 1)));
1273         } else {
1274                 rfs4_servinst_create(nsrv4, start_grace, 1, &dss_path);
1275         }
1276 
1277         /* reset the "first NFSv4 request" status */
1278         nsrv4->seen_first_compound = 0;
1279 
1280         mutex_enter(&nsrv4->state_lock);
1281 
1282         /*
1283          * If the server state database has already been initialized,
1284          * skip it

1285          */
1286         if (nsrv4->nfs4_server_state != NULL) {
1287                 mutex_exit(&nsrv4->state_lock);
1288                 return;
1289         }
1290 
1291         rw_init(&nsrv4->rfs4_findclient_lock, NULL, RW_DEFAULT, NULL);
1292 
1293         /* set the various cache timers for table creation */
1294         if (nsrv4->rfs4_client_cache_time == 0)
1295                 nsrv4->rfs4_client_cache_time = CLIENT_CACHE_TIME;
1296         if (nsrv4->rfs4_openowner_cache_time == 0)
1297                 nsrv4->rfs4_openowner_cache_time = OPENOWNER_CACHE_TIME;
1298         if (nsrv4->rfs4_state_cache_time == 0)
1299                 nsrv4->rfs4_state_cache_time = STATE_CACHE_TIME;
1300         if (nsrv4->rfs4_lo_state_cache_time == 0)
1301                 nsrv4->rfs4_lo_state_cache_time = LO_STATE_CACHE_TIME;
1302         if (nsrv4->rfs4_lockowner_cache_time == 0)
1303                 nsrv4->rfs4_lockowner_cache_time = LOCKOWNER_CACHE_TIME;
1304         if (nsrv4->rfs4_file_cache_time == 0)
1305                 nsrv4->rfs4_file_cache_time = FILE_CACHE_TIME;
1306         if (nsrv4->rfs4_deleg_state_cache_time == 0)
1307                 nsrv4->rfs4_deleg_state_cache_time = DELEG_STATE_CACHE_TIME;
1308 
1309         /* Create the overall database to hold all server state */
1310         nsrv4->nfs4_server_state = rfs4_database_create(rfs4_database_debug);
1311 
1312         /* Now create the individual tables */
1313         nsrv4->rfs4_client_cache_time *= rfs4_lease_time;
1314         nsrv4->rfs4_client_tab = rfs4_table_create(nsrv4->nfs4_server_state,
1315             "Client",
1316             nsrv4->rfs4_client_cache_time,
1317             2,
1318             rfs4_client_create,
1319             rfs4_client_destroy,
1320             rfs4_client_expiry,
1321             sizeof (rfs4_client_t),
1322             TABSIZE,
1323             MAXTABSZ/8, 100);
1324         nsrv4->rfs4_nfsclnt_idx = rfs4_index_create(nsrv4->rfs4_client_tab,
1325             "nfs_client_id4", nfsclnt_hash,
1326             nfsclnt_compare, nfsclnt_mkkey,
1327             TRUE);
1328         nsrv4->rfs4_clientid_idx = rfs4_index_create(nsrv4->rfs4_client_tab,
1329             "client_id", clientid_hash,
1330             clientid_compare, clientid_mkkey,
1331             FALSE);
1332 
1333         nsrv4->rfs4_clntip_cache_time = 86400 * 365; /* about a year */
1334         nsrv4->rfs4_clntip_tab = rfs4_table_create(nsrv4->nfs4_server_state,
1335             "ClntIP",
1336             nsrv4->rfs4_clntip_cache_time,
1337             1,
1338             rfs4_clntip_create,
1339             rfs4_clntip_destroy,
1340             rfs4_clntip_expiry,
1341             sizeof (rfs4_clntip_t),
1342             TABSIZE,
1343             MAXTABSZ, 100);
1344         nsrv4->rfs4_clntip_idx = rfs4_index_create(nsrv4->rfs4_clntip_tab,
1345             "client_ip", clntip_hash,
1346             clntip_compare, clntip_mkkey,
1347             TRUE);
1348 
1349         nsrv4->rfs4_openowner_cache_time *= rfs4_lease_time;
1350         nsrv4->rfs4_openowner_tab = rfs4_table_create(nsrv4->nfs4_server_state,
1351             "OpenOwner",
1352             nsrv4->rfs4_openowner_cache_time,
1353             1,
1354             rfs4_openowner_create,
1355             rfs4_openowner_destroy,
1356             rfs4_openowner_expiry,
1357             sizeof (rfs4_openowner_t),
1358             TABSIZE,
1359             MAXTABSZ, 100);
1360         nsrv4->rfs4_openowner_idx = rfs4_index_create(nsrv4->rfs4_openowner_tab,
1361             "open_owner4", openowner_hash,
1362             openowner_compare,
1363             openowner_mkkey, TRUE);
1364 
1365         nsrv4->rfs4_state_cache_time *= rfs4_lease_time;
1366         nsrv4->rfs4_state_tab = rfs4_table_create(nsrv4->nfs4_server_state,
1367             "OpenStateID",
1368             nsrv4->rfs4_state_cache_time,
1369             3,
1370             rfs4_state_create,
1371             rfs4_state_destroy,
1372             rfs4_state_expiry,
1373             sizeof (rfs4_state_t),
1374             TABSIZE,
1375             MAXTABSZ, 100);
1376 
1377         /* CSTYLED */
1378         nsrv4->rfs4_state_owner_file_idx = rfs4_index_create(nsrv4->rfs4_state_tab,
1379             "Openowner-File",
1380             state_owner_file_hash,
1381             state_owner_file_compare,
1382             state_owner_file_mkkey, TRUE);
1383 
1384         nsrv4->rfs4_state_idx = rfs4_index_create(nsrv4->rfs4_state_tab,
1385             "State-id", state_hash,
1386             state_compare, state_mkkey, FALSE);
1387 
1388         nsrv4->rfs4_state_file_idx = rfs4_index_create(nsrv4->rfs4_state_tab,
1389             "File", state_file_hash,
1390             state_file_compare, state_file_mkkey,
1391             FALSE);
1392 
1393         nsrv4->rfs4_lo_state_cache_time *= rfs4_lease_time;
1394         nsrv4->rfs4_lo_state_tab = rfs4_table_create(nsrv4->nfs4_server_state,
1395             "LockStateID",
1396             nsrv4->rfs4_lo_state_cache_time,
1397             2,
1398             rfs4_lo_state_create,
1399             rfs4_lo_state_destroy,
1400             rfs4_lo_state_expiry,
1401             sizeof (rfs4_lo_state_t),
1402             TABSIZE,
1403             MAXTABSZ, 100);
1404 
1405         /* CSTYLED */
1406         nsrv4->rfs4_lo_state_owner_idx = rfs4_index_create(nsrv4->rfs4_lo_state_tab,
1407             "lockownerxstate",
1408             lo_state_lo_hash,
1409             lo_state_lo_compare,
1410             lo_state_lo_mkkey, TRUE);
1411 
1412         nsrv4->rfs4_lo_state_idx = rfs4_index_create(nsrv4->rfs4_lo_state_tab,
1413             "State-id",
1414             lo_state_hash, lo_state_compare,
1415             lo_state_mkkey, FALSE);
1416 
1417         nsrv4->rfs4_lockowner_cache_time *= rfs4_lease_time;
1418 
1419         nsrv4->rfs4_lockowner_tab = rfs4_table_create(nsrv4->nfs4_server_state,
1420             "Lockowner",
1421             nsrv4->rfs4_lockowner_cache_time,
1422             2,
1423             rfs4_lockowner_create,
1424             rfs4_lockowner_destroy,
1425             rfs4_lockowner_expiry,
1426             sizeof (rfs4_lockowner_t),
1427             TABSIZE,
1428             MAXTABSZ, 100);
1429 
1430         nsrv4->rfs4_lockowner_idx = rfs4_index_create(nsrv4->rfs4_lockowner_tab,
1431             "lock_owner4", lockowner_hash,
1432             lockowner_compare,
1433             lockowner_mkkey, TRUE);
1434 
1435         /* CSTYLED */
1436         nsrv4->rfs4_lockowner_pid_idx = rfs4_index_create(nsrv4->rfs4_lockowner_tab,
1437             "pid", pid_hash,
1438             pid_compare, pid_mkkey,
1439             FALSE);
1440 
1441         nsrv4->rfs4_file_cache_time *= rfs4_lease_time;
1442         nsrv4->rfs4_file_tab = rfs4_table_create(nsrv4->nfs4_server_state,
1443             "File",
1444             nsrv4->rfs4_file_cache_time,
1445             1,
1446             rfs4_file_create,
1447             rfs4_file_destroy,
1448             NULL,
1449             sizeof (rfs4_file_t),
1450             TABSIZE,
1451             MAXTABSZ, -1);
1452 
1453         nsrv4->rfs4_file_idx = rfs4_index_create(nsrv4->rfs4_file_tab,
1454             "Filehandle", file_hash,
1455             file_compare, file_mkkey, TRUE);
1456 
1457         nsrv4->rfs4_deleg_state_cache_time *= rfs4_lease_time;
1458         /* CSTYLED */
1459         nsrv4->rfs4_deleg_state_tab = rfs4_table_create(nsrv4->nfs4_server_state,
1460             "DelegStateID",
1461             nsrv4->rfs4_deleg_state_cache_time,
1462             2,
1463             rfs4_deleg_state_create,
1464             rfs4_deleg_state_destroy,
1465             rfs4_deleg_state_expiry,
1466             sizeof (rfs4_deleg_state_t),
1467             TABSIZE,
1468             MAXTABSZ, 100);
1469         nsrv4->rfs4_deleg_idx = rfs4_index_create(nsrv4->rfs4_deleg_state_tab,
1470             "DelegByFileClient",
1471             deleg_hash,
1472             deleg_compare,
1473             deleg_mkkey, TRUE);
1474 
1475         /* CSTYLED */
1476         nsrv4->rfs4_deleg_state_idx = rfs4_index_create(nsrv4->rfs4_deleg_state_tab,
1477             "DelegState",
1478             deleg_state_hash,
1479             deleg_state_compare,
1480             deleg_state_mkkey, FALSE);
1481 
1482         mutex_exit(&nsrv4->state_lock);
1483 
1484         /*
1485          * Init the stable storage.
1486          */
1487         rfs4_ss_init(nsrv4);




1488 }
1489 

1490 /*
1491  * Used at server shutdown to cleanup all of NFSv4 server's zone structures
1492  * and state.
1493  */
1494 void
1495 rfs4_state_zone_fini()
1496 {
1497         rfs4_database_t *dbp;
1498         nfs4_srv_t *nsrv4;
1499         nsrv4 = nfs4_get_srv();
1500 
1501         rfs4_set_deleg_policy(nsrv4, SRV_NEVER_DELEGATE);
1502 
1503         /*
1504          * Clean up any dangling stable storage structures BEFORE calling
1505          * rfs4_servinst_destroy_all() so there are no dangling structures
1506          * (i.e. the srvinsts are all cleared of danglers BEFORE they get
1507          * freed).
1508          */
1509         rfs4_ss_fini(nsrv4);
1510 
1511         mutex_enter(&nsrv4->state_lock);
1512 
1513         if (nsrv4->nfs4_server_state == NULL) {
1514                 mutex_exit(&nsrv4->state_lock);
1515                 return;
1516         }
1517 
1518         /* destroy server instances and current instance ptr */
1519         rfs4_servinst_destroy_all(nsrv4);
1520 
1521         /* reset the "first NFSv4 request" status */
1522         nsrv4->seen_first_compound = 0;

1523 
1524         dbp = nsrv4->nfs4_server_state;
1525         nsrv4->nfs4_server_state = NULL;



1526 
1527         rw_destroy(&nsrv4->rfs4_findclient_lock);
1528 
1529         /* First stop all of the reaper threads in the database */
1530         rfs4_database_shutdown(dbp);
1531 
1532         /*
1533          * WARNING: There may be consumers of the rfs4 database still
1534          * active as we destroy these.  IF that's the case, consider putting
1535          * some of their _zone_fini()-like functions into the zsd key as
1536          * ~~SHUTDOWN~~ functions instead of ~~DESTROY~~ functions.  We can
1537          * maintain some ordering guarantees better that way.
1538          */
1539         /* Now destroy/release the database tables */
1540         rfs4_database_destroy(dbp);
1541 
1542         /* Reset the cache timers for next time */
1543         nsrv4->rfs4_client_cache_time = 0;
1544         nsrv4->rfs4_openowner_cache_time = 0;
1545         nsrv4->rfs4_state_cache_time = 0;
1546         nsrv4->rfs4_lo_state_cache_time = 0;
1547         nsrv4->rfs4_lockowner_cache_time = 0;
1548         nsrv4->rfs4_file_cache_time = 0;
1549         nsrv4->rfs4_deleg_state_cache_time = 0;
1550 
1551         mutex_exit(&nsrv4->state_lock);











1552 }
1553 
1554 typedef union {
1555         struct {
1556                 uint32_t start_time;
1557                 uint32_t c_id;
1558         } impl_id;
1559         clientid4 id4;
1560 } cid;
1561 
1562 static int foreign_stateid(stateid_t *id);
1563 static int foreign_clientid(cid *cidp);
1564 static void embed_nodeid(cid *cidp);
1565 
1566 typedef union {
1567         struct {
1568                 uint32_t c_id;
1569                 uint32_t gen_num;
1570         } cv_impl;
1571         verifier4       confirm_verf;


1646          * If the sysadmin has used clear_locks for this
1647          * entry then forced_expire will be set and we
1648          * want this entry to be reaped. Or the entry
1649          * has exceeded its lease period.
1650          */
1651         cp_expired = (cp->rc_forced_expire ||
1652             (gethrestime_sec() - cp->rc_last_access
1653             > rfs4_lease_time));
1654 
1655         if (!cp->rc_ss_remove && cp_expired)
1656                 cp->rc_ss_remove = 1;
1657         return (cp_expired);
1658 }
1659 
1660 /*
1661  * Remove the leaf file from all distributed stable storage paths.
1662  */
1663 static void
1664 rfs4_dss_remove_cpleaf(rfs4_client_t *cp)
1665 {
1666         nfs4_srv_t *nsrv4;
1667         rfs4_servinst_t *sip;
1668         char *leaf = cp->rc_ss_pn->leaf;
1669 
1670         /*
1671          * since the state files are written to all DSS
1672          * paths we must remove this leaf file instance
1673          * from all server instances.
1674          */
1675 
1676         nsrv4 = nfs4_get_srv();
1677         mutex_enter(&nsrv4->servinst_lock);
1678         for (sip = nsrv4->nfs4_cur_servinst; sip != NULL; sip = sip->prev) {
1679                 /* remove the leaf file associated with this server instance */
1680                 rfs4_dss_remove_leaf(sip, NFS4_DSS_STATE_LEAF, leaf);
1681         }
1682         mutex_exit(&nsrv4->servinst_lock);
1683 }
1684 
1685 static void
1686 rfs4_dss_remove_leaf(rfs4_servinst_t *sip, char *dir_leaf, char *leaf)
1687 {
1688         int i, npaths = sip->dss_npaths;
1689 
1690         for (i = 0; i < npaths; i++) {
1691                 rfs4_dss_path_t *dss_path = sip->dss_paths[i];
1692                 char *path, *dir;
1693                 size_t pathlen;
1694 
1695                 /* the HA-NFSv4 path might have been failed-over away from us */
1696                 if (dss_path == NULL)
1697                         continue;
1698 
1699                 dir = dss_path->path;
1700 
1701                 /* allow 3 extra bytes for two '/' & a NUL */
1702                 pathlen = strlen(dir) + strlen(dir_leaf) + strlen(leaf) + 3;


1730                 if (cp->rc_ss_remove)
1731                         rfs4_dss_remove_cpleaf(cp);
1732                 rfs4_ss_pnfree(cp->rc_ss_pn);
1733         }
1734 
1735         /* Free the client supplied client id */
1736         kmem_free(cp->rc_nfs_client.id_val, cp->rc_nfs_client.id_len);
1737 
1738         if (cp->rc_sysidt != LM_NOSYSID)
1739                 lm_free_sysidt(cp->rc_sysidt);
1740 }
1741 
1742 static bool_t
1743 rfs4_client_create(rfs4_entry_t u_entry, void *arg)
1744 {
1745         rfs4_client_t *cp = (rfs4_client_t *)u_entry;
1746         nfs_client_id4 *client = (nfs_client_id4 *)arg;
1747         struct sockaddr *ca;
1748         cid *cidp;
1749         scid_confirm_verf *scvp;
1750         nfs4_srv_t *nsrv4;
1751 
1752         nsrv4 = nfs4_get_srv();
1753 
1754         /* Get a clientid to give to the client */
1755         cidp = (cid *)&cp->rc_clientid;
1756         cidp->impl_id.start_time = nsrv4->rfs4_start_time;
1757         cidp->impl_id.c_id = (uint32_t)rfs4_dbe_getid(cp->rc_dbe);
1758 
1759         /* If we are booted as a cluster node, embed our nodeid */
1760         if (cluster_bootflags & CLUSTER_BOOTED)
1761                 embed_nodeid(cidp);
1762 
1763         /* Allocate and copy client's client id value */
1764         cp->rc_nfs_client.id_val = kmem_alloc(client->id_len, KM_SLEEP);
1765         cp->rc_nfs_client.id_len = client->id_len;
1766         bcopy(client->id_val, cp->rc_nfs_client.id_val, client->id_len);
1767         cp->rc_nfs_client.verifier = client->verifier;
1768 
1769         /* Copy client's IP address */
1770         ca = client->cl_addr;
1771         if (ca->sa_family == AF_INET)
1772                 bcopy(ca, &cp->rc_addr, sizeof (struct sockaddr_in));
1773         else if (ca->sa_family == AF_INET6)
1774                 bcopy(ca, &cp->rc_addr, sizeof (struct sockaddr_in6));
1775         cp->rc_nfs_client.cl_addr = (struct sockaddr *)&cp->rc_addr;
1776 


1794 
1795         cp->rc_cr_set = NULL;
1796 
1797         cp->rc_sysidt = LM_NOSYSID;
1798 
1799         list_create(&cp->rc_openownerlist, sizeof (rfs4_openowner_t),
1800             offsetof(rfs4_openowner_t, ro_node));
1801 
1802         /* set up the callback control structure */
1803         cp->rc_cbinfo.cb_state = CB_UNINIT;
1804         mutex_init(cp->rc_cbinfo.cb_lock, NULL, MUTEX_DEFAULT, NULL);
1805         cv_init(cp->rc_cbinfo.cb_cv, NULL, CV_DEFAULT, NULL);
1806         cv_init(cp->rc_cbinfo.cb_cv_nullcaller, NULL, CV_DEFAULT, NULL);
1807 
1808         /*
1809          * Associate the client_t with the current server instance.
1810          * The hold is solely to satisfy the calling requirement of
1811          * rfs4_servinst_assign(). In this case it's not strictly necessary.
1812          */
1813         rfs4_dbe_hold(cp->rc_dbe);
1814         rfs4_servinst_assign(nsrv4, cp, nsrv4->nfs4_cur_servinst);
1815         rfs4_dbe_rele(cp->rc_dbe);
1816 
1817         return (TRUE);
1818 }
1819 
1820 /*
1821  * Caller wants to generate/update the setclientid_confirm verifier
1822  * associated with a client.  This is done during the SETCLIENTID
1823  * processing.
1824  */
1825 void
1826 rfs4_client_scv_next(rfs4_client_t *cp)
1827 {
1828         scid_confirm_verf *scvp;
1829 
1830         /* Init the value for the SETCLIENTID_CONFIRM verifier */
1831         scvp = (scid_confirm_verf *)&cp->rc_confirm_verf;
1832         scvp->cv_impl.gen_num++;
1833 }
1834 
1835 void
1836 rfs4_client_rele(rfs4_client_t *cp)
1837 {
1838         rfs4_dbe_rele(cp->rc_dbe);
1839 }
1840 
1841 rfs4_client_t *
1842 rfs4_findclient(nfs_client_id4 *client, bool_t *create, rfs4_client_t *oldcp)
1843 {
1844         rfs4_client_t *cp;
1845         nfs4_srv_t *nsrv4;
1846         nsrv4 = nfs4_get_srv();
1847 
1848 
1849         if (oldcp) {
1850                 rw_enter(&nsrv4->rfs4_findclient_lock, RW_WRITER);
1851                 rfs4_dbe_hide(oldcp->rc_dbe);
1852         } else {
1853                 rw_enter(&nsrv4->rfs4_findclient_lock, RW_READER);
1854         }
1855 
1856         cp = (rfs4_client_t *)rfs4_dbsearch(nsrv4->rfs4_nfsclnt_idx, client,
1857             create, (void *)client, RFS4_DBS_VALID);
1858 
1859         if (oldcp)
1860                 rfs4_dbe_unhide(oldcp->rc_dbe);
1861 
1862         rw_exit(&nsrv4->rfs4_findclient_lock);
1863 
1864         return (cp);
1865 }
1866 
1867 rfs4_client_t *
1868 rfs4_findclient_by_id(clientid4 clientid, bool_t find_unconfirmed)
1869 {
1870         rfs4_client_t *cp;
1871         bool_t create = FALSE;
1872         cid *cidp = (cid *)&clientid;
1873         nfs4_srv_t *nsrv4 = nfs4_get_srv();
1874 
1875         /* If we're a cluster and the nodeid isn't right, short-circuit */
1876         if (cluster_bootflags & CLUSTER_BOOTED && foreign_clientid(cidp))
1877                 return (NULL);
1878 
1879         rw_enter(&nsrv4->rfs4_findclient_lock, RW_READER);
1880 
1881         cp = (rfs4_client_t *)rfs4_dbsearch(nsrv4->rfs4_clientid_idx, &clientid,
1882             &create, NULL, RFS4_DBS_VALID);
1883 
1884         rw_exit(&nsrv4->rfs4_findclient_lock);
1885 
1886         if (cp && cp->rc_need_confirm && find_unconfirmed == FALSE) {
1887                 rfs4_client_rele(cp);
1888                 return (NULL);
1889         } else {
1890                 return (cp);
1891         }
1892 }
1893 
1894 static uint32_t
1895 clntip_hash(void *key)
1896 {
1897         struct sockaddr *addr = key;
1898         int i, len = 0;
1899         uint32_t hash = 0;
1900         char *ptr;
1901 
1902         if (addr->sa_family == AF_INET) {
1903                 struct sockaddr_in *a = (struct sockaddr_in *)addr;
1904                 len = sizeof (struct in_addr);


1972 {
1973         rfs4_clntip_t *cp = (rfs4_clntip_t *)u_entry;
1974         struct sockaddr *ca = (struct sockaddr *)arg;
1975 
1976         /* Copy client's IP address */
1977         if (ca->sa_family == AF_INET)
1978                 bcopy(ca, &cp->ri_addr, sizeof (struct sockaddr_in));
1979         else if (ca->sa_family == AF_INET6)
1980                 bcopy(ca, &cp->ri_addr, sizeof (struct sockaddr_in6));
1981         else
1982                 return (FALSE);
1983         cp->ri_no_referrals = 1;
1984 
1985         return (TRUE);
1986 }
1987 
1988 rfs4_clntip_t *
1989 rfs4_find_clntip(struct sockaddr *addr, bool_t *create)
1990 {
1991         rfs4_clntip_t *cp;
1992         nfs4_srv_t *nsrv4;
1993 
1994         nsrv4 = nfs4_get_srv();
1995 
1996         rw_enter(&nsrv4->rfs4_findclient_lock, RW_READER);
1997 
1998         cp = (rfs4_clntip_t *)rfs4_dbsearch(nsrv4->rfs4_clntip_idx, addr,
1999             create, addr, RFS4_DBS_VALID);
2000 
2001         rw_exit(&nsrv4->rfs4_findclient_lock);
2002 
2003         return (cp);
2004 }
2005 
2006 void
2007 rfs4_invalidate_clntip(struct sockaddr *addr)
2008 {
2009         rfs4_clntip_t *cp;
2010         bool_t create = FALSE;
2011         nfs4_srv_t *nsrv4 = nfs4_get_srv();
2012 
2013         rw_enter(&nsrv4->rfs4_findclient_lock, RW_READER);
2014 
2015         cp = (rfs4_clntip_t *)rfs4_dbsearch(nsrv4->rfs4_clntip_idx, addr,
2016             &create, NULL, RFS4_DBS_VALID);
2017         if (cp == NULL) {
2018                 rw_exit(&nsrv4->rfs4_findclient_lock);
2019                 return;
2020         }
2021         rfs4_dbe_invalidate(cp->ri_dbe);
2022         rfs4_dbe_rele(cp->ri_dbe);
2023 
2024         rw_exit(&nsrv4->rfs4_findclient_lock);
2025 }
2026 
2027 bool_t
2028 rfs4_lease_expired(rfs4_client_t *cp)
2029 {
2030         bool_t rc;
2031 
2032         rfs4_dbe_lock(cp->rc_dbe);
2033 
2034         /*
2035          * If the admin has executed clear_locks for this
2036          * client id, force expire will be set, so no need
2037          * to calculate anything because it's "outa here".
2038          */
2039         if (cp->rc_forced_expire) {
2040                 rc = TRUE;
2041         } else {
2042                 rc = (gethrestime_sec() - cp->rc_last_access > rfs4_lease_time);
2043         }
2044 


2152 
2153         /* Free the lock owner id */
2154         kmem_free(oo->ro_owner.owner_val, oo->ro_owner.owner_len);
2155 }
2156 
2157 void
2158 rfs4_openowner_rele(rfs4_openowner_t *oo)
2159 {
2160         rfs4_dbe_rele(oo->ro_dbe);
2161 }
2162 
2163 static bool_t
2164 rfs4_openowner_create(rfs4_entry_t u_entry, void *arg)
2165 {
2166         rfs4_openowner_t *oo = (rfs4_openowner_t *)u_entry;
2167         rfs4_openowner_t *argp = (rfs4_openowner_t *)arg;
2168         open_owner4 *openowner = &argp->ro_owner;
2169         seqid4 seqid = argp->ro_open_seqid;
2170         rfs4_client_t *cp;
2171         bool_t create = FALSE;
2172         nfs4_srv_t *nsrv4 = nfs4_get_srv();
2173 
2174         rw_enter(&nsrv4->rfs4_findclient_lock, RW_READER);
2175 
2176         cp = (rfs4_client_t *)rfs4_dbsearch(nsrv4->rfs4_clientid_idx,
2177             &openowner->clientid,
2178             &create, NULL, RFS4_DBS_VALID);
2179 
2180         rw_exit(&nsrv4->rfs4_findclient_lock);
2181 
2182         if (cp == NULL)
2183                 return (FALSE);
2184 
2185         oo->ro_reply_fh.nfs_fh4_len = 0;
2186         oo->ro_reply_fh.nfs_fh4_val = NULL;
2187 
2188         oo->ro_owner.clientid = openowner->clientid;
2189         oo->ro_owner.owner_val =
2190             kmem_alloc(openowner->owner_len, KM_SLEEP);
2191 
2192         bcopy(openowner->owner_val,
2193             oo->ro_owner.owner_val, openowner->owner_len);
2194 
2195         oo->ro_owner.owner_len = openowner->owner_len;
2196 
2197         oo->ro_need_confirm = TRUE;
2198 
2199         rfs4_sw_init(&oo->ro_sw);
2200 


2202         bzero(&oo->ro_reply, sizeof (nfs_resop4));
2203         oo->ro_client = cp;
2204         oo->ro_cr_set = NULL;
2205 
2206         list_create(&oo->ro_statelist, sizeof (rfs4_state_t),
2207             offsetof(rfs4_state_t, rs_node));
2208 
2209         /* Insert openowner into client's open owner list */
2210         rfs4_dbe_lock(cp->rc_dbe);
2211         list_insert_tail(&cp->rc_openownerlist, oo);
2212         rfs4_dbe_unlock(cp->rc_dbe);
2213 
2214         return (TRUE);
2215 }
2216 
2217 rfs4_openowner_t *
2218 rfs4_findopenowner(open_owner4 *openowner, bool_t *create, seqid4 seqid)
2219 {
2220         rfs4_openowner_t *oo;
2221         rfs4_openowner_t arg;
2222         nfs4_srv_t *nsrv4 = nfs4_get_srv();
2223 
2224         arg.ro_owner = *openowner;
2225         arg.ro_open_seqid = seqid;
2226         /* CSTYLED */
2227         oo = (rfs4_openowner_t *)rfs4_dbsearch(nsrv4->rfs4_openowner_idx, openowner,
2228             create, &arg, RFS4_DBS_VALID);
2229 
2230         return (oo);
2231 }
2232 
2233 void
2234 rfs4_update_open_sequence(rfs4_openowner_t *oo)
2235 {
2236 
2237         rfs4_dbe_lock(oo->ro_dbe);
2238 
2239         oo->ro_open_seqid++;
2240 
2241         rfs4_dbe_unlock(oo->ro_dbe);
2242 }
2243 
2244 void
2245 rfs4_update_open_resp(rfs4_openowner_t *oo, nfs_resop4 *resp, nfs_fh4 *fh)
2246 {
2247 


2350 }
2351 
2352 /* ARGSUSED */
2353 static bool_t
2354 rfs4_lockowner_expiry(rfs4_entry_t u_entry)
2355 {
2356         /*
2357          * Since expiry is called with no other references on
2358          * this struct, go ahead and have it removed.
2359          */
2360         return (TRUE);
2361 }
2362 
2363 static bool_t
2364 rfs4_lockowner_create(rfs4_entry_t u_entry, void *arg)
2365 {
2366         rfs4_lockowner_t *lo = (rfs4_lockowner_t *)u_entry;
2367         lock_owner4 *lockowner = (lock_owner4 *)arg;
2368         rfs4_client_t *cp;
2369         bool_t create = FALSE;
2370         nfs4_srv_t *nsrv4 = nfs4_get_srv();
2371 
2372         rw_enter(&nsrv4->rfs4_findclient_lock, RW_READER);
2373 
2374         cp = (rfs4_client_t *)rfs4_dbsearch(nsrv4->rfs4_clientid_idx,
2375             &lockowner->clientid,
2376             &create, NULL, RFS4_DBS_VALID);
2377 
2378         rw_exit(&nsrv4->rfs4_findclient_lock);
2379 
2380         if (cp == NULL)
2381                 return (FALSE);
2382 
2383         /* Reference client */
2384         lo->rl_client = cp;
2385         lo->rl_owner.clientid = lockowner->clientid;
2386         lo->rl_owner.owner_val = kmem_alloc(lockowner->owner_len, KM_SLEEP);
2387         bcopy(lockowner->owner_val, lo->rl_owner.owner_val,
2388             lockowner->owner_len);
2389         lo->rl_owner.owner_len = lockowner->owner_len;
2390         lo->rl_pid = rfs4_dbe_getid(lo->rl_dbe);
2391 
2392         return (TRUE);
2393 }
2394 
2395 rfs4_lockowner_t *
2396 rfs4_findlockowner(lock_owner4 *lockowner, bool_t *create)
2397 {
2398         rfs4_lockowner_t *lo;
2399         nfs4_srv_t *nsrv4 = nfs4_get_srv();
2400 
2401         /* CSTYLED */
2402         lo = (rfs4_lockowner_t *)rfs4_dbsearch(nsrv4->rfs4_lockowner_idx, lockowner,
2403             create, lockowner, RFS4_DBS_VALID);
2404 
2405         return (lo);
2406 }
2407 
2408 rfs4_lockowner_t *
2409 rfs4_findlockowner_by_pid(pid_t pid)
2410 {
2411         rfs4_lockowner_t *lo;
2412         bool_t create = FALSE;
2413         nfs4_srv_t *nsrv4 = nfs4_get_srv();
2414 
2415         lo = (rfs4_lockowner_t *)rfs4_dbsearch(nsrv4->rfs4_lockowner_pid_idx,
2416             (void *)(uintptr_t)pid, &create, NULL, RFS4_DBS_VALID);
2417 
2418         return (lo);
2419 }
2420 
2421 
2422 static uint32_t
2423 file_hash(void *key)
2424 {
2425         return (ADDRHASH(key));
2426 }
2427 
2428 static void *
2429 file_mkkey(rfs4_entry_t u_entry)
2430 {
2431         rfs4_file_t *fp = (rfs4_file_t *)u_entry;
2432 
2433         return (fp->rf_vp);
2434 }
2435 


2506 
2507         mutex_init(fp->rf_dinfo.rd_recall_lock, NULL, MUTEX_DEFAULT, NULL);
2508         cv_init(fp->rf_dinfo.rd_recall_cv, NULL, CV_DEFAULT, NULL);
2509 
2510         fp->rf_dinfo.rd_dtype = OPEN_DELEGATE_NONE;
2511 
2512         rw_init(&fp->rf_file_rwlock, NULL, RW_DEFAULT, NULL);
2513 
2514         mutex_enter(&vp->v_vsd_lock);
2515         VERIFY(vsd_set(vp, nfs4_srv_vkey, (void *)fp) == 0);
2516         mutex_exit(&vp->v_vsd_lock);
2517 
2518         return (TRUE);
2519 }
2520 
2521 rfs4_file_t *
2522 rfs4_findfile(vnode_t *vp, nfs_fh4 *fh, bool_t *create)
2523 {
2524         rfs4_file_t *fp;
2525         rfs4_fcreate_arg arg;
2526         nfs4_srv_t *nsrv4 = nfs4_get_srv();
2527 
2528         arg.vp = vp;
2529         arg.fh = fh;
2530 
2531         if (*create == TRUE)
2532                 /* CSTYLED */
2533                 fp = (rfs4_file_t *)rfs4_dbsearch(nsrv4->rfs4_file_idx, vp, create,
2534                     &arg, RFS4_DBS_VALID);
2535         else {
2536                 mutex_enter(&vp->v_vsd_lock);
2537                 fp = (rfs4_file_t *)vsd_get(vp, nfs4_srv_vkey);
2538                 if (fp) {
2539                         rfs4_dbe_lock(fp->rf_dbe);
2540                         if (rfs4_dbe_is_invalid(fp->rf_dbe) ||
2541                             (rfs4_dbe_refcnt(fp->rf_dbe) == 0)) {
2542                                 rfs4_dbe_unlock(fp->rf_dbe);
2543                                 fp = NULL;
2544                         } else {
2545                                 rfs4_dbe_hold(fp->rf_dbe);
2546                                 rfs4_dbe_unlock(fp->rf_dbe);
2547                         }
2548                 }
2549                 mutex_exit(&vp->v_vsd_lock);
2550         }
2551         return (fp);
2552 }
2553 
2554 /*
2555  * Find a file in the db and once it is located, take the rw lock.
2556  * Need to check the vnode pointer and if it does not exist (it was
2557  * removed between the db location and check) redo the find.  This
2558  * assumes that a file struct that has a NULL vnode pointer is marked
2559  * at 'invalid' and will not be found in the db the second time
2560  * around.
2561  */
2562 rfs4_file_t *
2563 rfs4_findfile_withlock(vnode_t *vp, nfs_fh4 *fh, bool_t *create)
2564 {
2565         rfs4_file_t *fp;
2566         rfs4_fcreate_arg arg;
2567         bool_t screate = *create;
2568         nfs4_srv_t *nsrv4 = nfs4_get_srv();
2569 
2570         if (screate == FALSE) {
2571                 mutex_enter(&vp->v_vsd_lock);
2572                 fp = (rfs4_file_t *)vsd_get(vp, nfs4_srv_vkey);
2573                 if (fp) {
2574                         rfs4_dbe_lock(fp->rf_dbe);
2575                         if (rfs4_dbe_is_invalid(fp->rf_dbe) ||
2576                             (rfs4_dbe_refcnt(fp->rf_dbe) == 0)) {
2577                                 rfs4_dbe_unlock(fp->rf_dbe);
2578                                 mutex_exit(&vp->v_vsd_lock);
2579                                 fp = NULL;
2580                         } else {
2581                                 rfs4_dbe_hold(fp->rf_dbe);
2582                                 rfs4_dbe_unlock(fp->rf_dbe);
2583                                 mutex_exit(&vp->v_vsd_lock);
2584                                 rw_enter(&fp->rf_file_rwlock, RW_WRITER);
2585                                 if (fp->rf_vp == NULL) {
2586                                         rw_exit(&fp->rf_file_rwlock);
2587                                         rfs4_file_rele(fp);
2588                                         fp = NULL;
2589                                 }
2590                         }
2591                 } else {
2592                         mutex_exit(&vp->v_vsd_lock);
2593                 }
2594         } else {
2595 retry:
2596                 arg.vp = vp;
2597                 arg.fh = fh;
2598 
2599                 fp = (rfs4_file_t *)rfs4_dbsearch(nsrv4->rfs4_file_idx, vp,
2600                     create, &arg, RFS4_DBS_VALID);
2601                 if (fp != NULL) {
2602                         rw_enter(&fp->rf_file_rwlock, RW_WRITER);
2603                         if (fp->rf_vp == NULL) {
2604                                 rw_exit(&fp->rf_file_rwlock);
2605                                 rfs4_file_rele(fp);
2606                                 *create = screate;
2607                                 goto retry;
2608                         }
2609                 }
2610         }
2611 
2612         return (fp);
2613 }
2614 
2615 static uint32_t
2616 lo_state_hash(void *key)
2617 {
2618         stateid_t *id = key;
2619 
2620         return (id->bits.ident+id->bits.pid);


2735         list_insert_tail(&sp->rs_lostatelist, lsp);
2736         rfs4_dbe_hold(sp->rs_dbe);
2737         rfs4_dbe_unlock(sp->rs_dbe);
2738 
2739         return (TRUE);
2740 }
2741 
2742 void
2743 rfs4_lo_state_rele(rfs4_lo_state_t *lsp, bool_t unlock_fp)
2744 {
2745         if (unlock_fp == TRUE)
2746                 rw_exit(&lsp->rls_state->rs_finfo->rf_file_rwlock);
2747         rfs4_dbe_rele(lsp->rls_dbe);
2748 }
2749 
2750 static rfs4_lo_state_t *
2751 rfs4_findlo_state(stateid_t *id, bool_t lock_fp)
2752 {
2753         rfs4_lo_state_t *lsp;
2754         bool_t create = FALSE;
2755         nfs4_srv_t *nsrv4 = nfs4_get_srv();
2756 
2757         lsp = (rfs4_lo_state_t *)rfs4_dbsearch(nsrv4->rfs4_lo_state_idx, id,
2758             &create, NULL, RFS4_DBS_VALID);
2759         if (lock_fp == TRUE && lsp != NULL)
2760                 rw_enter(&lsp->rls_state->rs_finfo->rf_file_rwlock, RW_READER);
2761 
2762         return (lsp);
2763 }
2764 
2765 
2766 static uint32_t
2767 lo_state_lo_hash(void *key)
2768 {
2769         rfs4_lo_state_t *lsp = key;
2770 
2771         return (ADDRHASH(lsp->rls_locker) ^ ADDRHASH(lsp->rls_state));
2772 }
2773 
2774 static bool_t
2775 lo_state_lo_compare(rfs4_entry_t u_entry, void *key)
2776 {
2777         rfs4_lo_state_t *lsp = (rfs4_lo_state_t *)u_entry;
2778         rfs4_lo_state_t *keyp = key;
2779 
2780         return (keyp->rls_locker == lsp->rls_locker &&
2781             keyp->rls_state == lsp->rls_state);
2782 }
2783 
2784 static void *
2785 lo_state_lo_mkkey(rfs4_entry_t u_entry)
2786 {
2787         return (u_entry);
2788 }
2789 
2790 rfs4_lo_state_t *
2791 rfs4_findlo_state_by_owner(rfs4_lockowner_t *lo, rfs4_state_t *sp,
2792     bool_t *create)
2793 {
2794         rfs4_lo_state_t *lsp;
2795         rfs4_lo_state_t arg;
2796         nfs4_srv_t *nsrv4 = nfs4_get_srv();
2797 
2798         arg.rls_locker = lo;
2799         arg.rls_state = sp;
2800 
2801         lsp = (rfs4_lo_state_t *)rfs4_dbsearch(nsrv4->rfs4_lo_state_owner_idx,
2802             &arg, create, &arg, RFS4_DBS_VALID);
2803 
2804         return (lsp);
2805 }
2806 
2807 static stateid_t
2808 get_stateid(id_t eid)
2809 {
2810         stateid_t id;
2811         nfs4_srv_t *nsrv4;
2812 
2813         nsrv4 = nfs4_get_srv();
2814 
2815         id.bits.boottime = nsrv4->rfs4_start_time;
2816         id.bits.ident = eid;
2817         id.bits.chgseq = 0;
2818         id.bits.type = 0;
2819         id.bits.pid = 0;
2820 
2821         /*
2822          * If we are booted as a cluster node, embed our nodeid.
2823          * We've already done sanity checks in rfs4_client_create() so no
2824          * need to repeat them here.
2825          */
2826         id.bits.clnodeid = (cluster_bootflags & CLUSTER_BOOTED) ?
2827             clconf_get_nodeid() : 0;
2828 
2829         return (id);
2830 }
2831 
2832 /*
2833  * For use only when booted as a cluster node.
2834  * Returns TRUE if the embedded nodeid indicates that this stateid was
2835  * generated on another node.


3051 rfs4_deleg_state_destroy(rfs4_entry_t u_entry)
3052 {
3053         rfs4_deleg_state_t *dsp = (rfs4_deleg_state_t *)u_entry;
3054 
3055         /* return delegation if necessary */
3056         rfs4_return_deleg(dsp, FALSE);
3057 
3058         /* Were done with the file */
3059         rfs4_file_rele(dsp->rds_finfo);
3060         dsp->rds_finfo = NULL;
3061 
3062         /* And now with the openowner */
3063         rfs4_client_rele(dsp->rds_client);
3064         dsp->rds_client = NULL;
3065 }
3066 
3067 rfs4_deleg_state_t *
3068 rfs4_finddeleg(rfs4_state_t *sp, bool_t *create)
3069 {
3070         rfs4_deleg_state_t ds, *dsp;
3071         nfs4_srv_t *nsrv4 = nfs4_get_srv();
3072 
3073         ds.rds_client = sp->rs_owner->ro_client;
3074         ds.rds_finfo = sp->rs_finfo;
3075 
3076         dsp = (rfs4_deleg_state_t *)rfs4_dbsearch(nsrv4->rfs4_deleg_idx, &ds,
3077             create, &ds, RFS4_DBS_VALID);
3078 
3079         return (dsp);
3080 }
3081 
3082 rfs4_deleg_state_t *
3083 rfs4_finddelegstate(stateid_t *id)
3084 {
3085         rfs4_deleg_state_t *dsp;
3086         bool_t create = FALSE;
3087         nfs4_srv_t *nsrv4 = nfs4_get_srv();
3088 
3089         dsp = (rfs4_deleg_state_t *)rfs4_dbsearch(nsrv4->rfs4_deleg_state_idx,
3090             id, &create, NULL, RFS4_DBS_VALID);
3091 
3092         return (dsp);
3093 }
3094 
3095 void
3096 rfs4_deleg_state_rele(rfs4_deleg_state_t *dsp)
3097 {
3098         rfs4_dbe_rele(dsp->rds_dbe);
3099 }
3100 
3101 void
3102 rfs4_update_lock_sequence(rfs4_lo_state_t *lsp)
3103 {
3104 
3105         rfs4_dbe_lock(lsp->rls_dbe);
3106 
3107         /*
3108          * If we are skipping sequence id checking, this means that
3109          * this is the first lock request and therefore the sequence
3110          * id does not need to be updated.  This only happens on the


3189         if (sp->rs_closed == TRUE)
3190                 return (FALSE);
3191 
3192         return (fp == sp->rs_finfo);
3193 }
3194 
3195 static void *
3196 state_file_mkkey(rfs4_entry_t u_entry)
3197 {
3198         rfs4_state_t *sp = (rfs4_state_t *)u_entry;
3199 
3200         return (sp->rs_finfo);
3201 }
3202 
3203 rfs4_state_t *
3204 rfs4_findstate_by_owner_file(rfs4_openowner_t *oo, rfs4_file_t *fp,
3205     bool_t *create)
3206 {
3207         rfs4_state_t *sp;
3208         rfs4_state_t key;
3209         nfs4_srv_t *nsrv4 = nfs4_get_srv();
3210 
3211         key.rs_owner = oo;
3212         key.rs_finfo = fp;
3213 
3214         sp = (rfs4_state_t *)rfs4_dbsearch(nsrv4->rfs4_state_owner_file_idx,
3215             &key, create, &key, RFS4_DBS_VALID);
3216 
3217         return (sp);
3218 }
3219 
3220 /* This returns ANY state struct that refers to this file */
3221 static rfs4_state_t *
3222 rfs4_findstate_by_file(rfs4_file_t *fp)
3223 {
3224         bool_t create = FALSE;
3225         nfs4_srv_t *nsrv4 = nfs4_get_srv();
3226 
3227         return ((rfs4_state_t *)rfs4_dbsearch(nsrv4->rfs4_state_file_idx, fp,
3228             &create, fp, RFS4_DBS_VALID));
3229 }
3230 
3231 static bool_t
3232 rfs4_state_expiry(rfs4_entry_t u_entry)
3233 {
3234         rfs4_state_t *sp = (rfs4_state_t *)u_entry;
3235 
3236         if (rfs4_dbe_is_invalid(sp->rs_dbe))
3237                 return (TRUE);
3238 
3239         if (sp->rs_closed == TRUE &&
3240             ((gethrestime_sec() - rfs4_dbe_get_timerele(sp->rs_dbe))
3241             > rfs4_lease_time))
3242                 return (TRUE);
3243 
3244         return ((gethrestime_sec() - sp->rs_owner->ro_client->rc_last_access
3245             > rfs4_lease_time));
3246 }
3247 


3258         sp->rs_stateid.bits.type = OPENID;
3259         sp->rs_owner = oo;
3260         sp->rs_finfo = fp;
3261 
3262         list_create(&sp->rs_lostatelist, sizeof (rfs4_lo_state_t),
3263             offsetof(rfs4_lo_state_t, rls_node));
3264 
3265         /* Insert state on per open owner's list */
3266         rfs4_dbe_lock(oo->ro_dbe);
3267         list_insert_tail(&oo->ro_statelist, sp);
3268         rfs4_dbe_unlock(oo->ro_dbe);
3269 
3270         return (TRUE);
3271 }
3272 
3273 static rfs4_state_t *
3274 rfs4_findstate(stateid_t *id, rfs4_dbsearch_type_t find_invalid, bool_t lock_fp)
3275 {
3276         rfs4_state_t *sp;
3277         bool_t create = FALSE;
3278         nfs4_srv_t *nsrv4 = nfs4_get_srv();
3279 
3280         sp = (rfs4_state_t *)rfs4_dbsearch(nsrv4->rfs4_state_idx, id,
3281             &create, NULL, find_invalid);
3282         if (lock_fp == TRUE && sp != NULL)
3283                 rw_enter(&sp->rs_finfo->rf_file_rwlock, RW_READER);
3284 
3285         return (sp);
3286 }
3287 
3288 void
3289 rfs4_state_close(rfs4_state_t *sp, bool_t lock_held, bool_t close_of_client,
3290     cred_t *cr)
3291 {
3292         /* Remove the associated lo_state owners */
3293         if (!lock_held)
3294                 rfs4_dbe_lock(sp->rs_dbe);
3295 
3296         /*
3297          * If refcnt == 0, the dbe is about to be destroyed.
3298          * lock state will be released by the reaper thread.
3299          */
3300 


3328 }
3329 
3330 void
3331 rfs4_client_close(rfs4_client_t *cp)
3332 {
3333         /* Mark client as going away. */
3334         rfs4_dbe_lock(cp->rc_dbe);
3335         rfs4_dbe_invalidate(cp->rc_dbe);
3336         rfs4_dbe_unlock(cp->rc_dbe);
3337 
3338         rfs4_client_state_remove(cp);
3339 
3340         /* Release the client */
3341         rfs4_client_rele(cp);
3342 }
3343 
3344 nfsstat4
3345 rfs4_check_clientid(clientid4 *cp, int setclid_confirm)
3346 {
3347         cid *cidp = (cid *) cp;
3348         nfs4_srv_t *nsrv4;
3349 
3350         nsrv4 = nfs4_get_srv();
3351 
3352         /*
3353          * If we are booted as a cluster node, check the embedded nodeid.
3354          * If it indicates that this clientid was generated on another node,
3355          * inform the client accordingly.
3356          */
3357         if (cluster_bootflags & CLUSTER_BOOTED && foreign_clientid(cidp))
3358                 return (NFS4ERR_STALE_CLIENTID);
3359 
3360         /*
3361          * If the server start time matches the time provided
3362          * by the client (via the clientid) and this is NOT a
3363          * setclientid_confirm then return EXPIRED.
3364          */
3365         if (!setclid_confirm &&
3366             cidp->impl_id.start_time == nsrv4->rfs4_start_time)
3367                 return (NFS4ERR_EXPIRED);
3368 
3369         return (NFS4ERR_STALE_CLIENTID);
3370 }
3371 
3372 /*
3373  * This is used when a stateid has not been found amongst the
3374  * current server's state.  Check the stateid to see if it
3375  * was from this server instantiation or not.
3376  */
3377 static nfsstat4
3378 what_stateid_error(stateid_t *id, stateid_type_t type)
3379 {
3380         nfs4_srv_t *nsrv4;
3381 
3382         nsrv4 = nfs4_get_srv();
3383 
3384         /* If we are booted as a cluster node, was stateid locally generated? */
3385         if ((cluster_bootflags & CLUSTER_BOOTED) && foreign_stateid(id))
3386                 return (NFS4ERR_STALE_STATEID);
3387 
3388         /* If types don't match then no use checking further */
3389         if (type != id->bits.type)
3390                 return (NFS4ERR_BAD_STATEID);
3391 
3392         /* From a different server instantiation, return STALE */
3393         if (id->bits.boottime != nsrv4->rfs4_start_time)
3394                 return (NFS4ERR_STALE_STATEID);
3395 
3396         /*
3397          * From this server but the state is most likely beyond lease
3398          * timeout: return NFS4ERR_EXPIRED.  However, there is the
3399          * case of a delegation stateid.  For delegations, there is a
3400          * case where the state can be removed without the client's
3401          * knowledge/consent: revocation.  In the case of delegation
3402          * revocation, the delegation state will be removed and will
3403          * not be found.  If the client does something like a
3404          * DELEGRETURN or even a READ/WRITE with a delegatoin stateid
3405          * that has been revoked, the server should return BAD_STATEID
3406          * instead of the more common EXPIRED error.
3407          */
3408         if (id->bits.boottime == nsrv4->rfs4_start_time) {
3409                 if (type == DELEGID)
3410                         return (NFS4ERR_BAD_STATEID);
3411                 else
3412                         return (NFS4ERR_EXPIRED);
3413         }
3414 
3415         return (NFS4ERR_BAD_STATEID);
3416 }
3417 
3418 /*
3419  * Used later on to find the various state structs.  When called from
3420  * rfs4_check_stateid()->rfs4_get_all_state(), no file struct lock is
3421  * taken (it is not needed) and helps on the read/write path with
3422  * respect to performance.
3423  */
3424 static nfsstat4
3425 rfs4_get_state_lockit(stateid4 *stateid, rfs4_state_t **spp,
3426     rfs4_dbsearch_type_t find_invalid, bool_t lock_fp)
3427 {
3428         stateid_t *id = (stateid_t *)stateid;


3890 
3891 /*
3892  * This is a special function in that for the file struct provided the
3893  * server wants to remove/close all current state associated with the
3894  * file.  The prime use of this would be with OP_REMOVE to force the
3895  * release of state and particularly of file locks.
3896  *
3897  * There is an assumption that there is no delegations outstanding on
3898  * this file at this point.  The caller should have waited for those
3899  * to be returned or revoked.
3900  */
3901 void
3902 rfs4_close_all_state(rfs4_file_t *fp)
3903 {
3904         rfs4_state_t *sp;
3905 
3906         rfs4_dbe_lock(fp->rf_dbe);
3907 
3908 #ifdef DEBUG
3909         /* only applies when server is handing out delegations */
3910         if (nfs4_get_deleg_policy() != SRV_NEVER_DELEGATE)
3911                 ASSERT(fp->rf_dinfo.rd_hold_grant > 0);
3912 #endif
3913 
3914         /* No delegations for this file */
3915         ASSERT(list_is_empty(&fp->rf_delegstatelist));
3916 
3917         /* Make sure that it can not be found */
3918         rfs4_dbe_invalidate(fp->rf_dbe);
3919 
3920         if (fp->rf_vp == NULL) {
3921                 rfs4_dbe_unlock(fp->rf_dbe);
3922                 return;
3923         }
3924         rfs4_dbe_unlock(fp->rf_dbe);
3925 
3926         /*
3927          * Hold as writer to prevent other server threads from
3928          * processing requests related to the file while all state is
3929          * being removed.
3930          */


4096                             OPEN_DELEGATE_WRITE) {
4097                                 (void) fem_uninstall(vp, deleg_wrops,
4098                                     (void *)fp);
4099                                 vn_open_downgrade(vp, FREAD|FWRITE);
4100                         }
4101                         mutex_enter(&vp->v_vsd_lock);
4102                         (void) vsd_set(vp, nfs4_srv_vkey, NULL);
4103                         mutex_exit(&vp->v_vsd_lock);
4104                         VN_RELE(vp);
4105                         fp->rf_vp = NULL;
4106                 }
4107                 rfs4_dbe_invalidate(fp->rf_dbe);
4108         }
4109 }
4110 
4111 /*
4112  * Given a directory that is being unexported, cleanup/release all
4113  * state in the server that refers to objects residing underneath this
4114  * particular export.  The ordering of the release is important.
4115  * Lock_owner, then state and then file.
4116  *
4117  * NFS zones note: nfs_export.c:unexport() calls this from a
4118  * thread in the global zone for NGZ data structures, so we
4119  * CANNOT use zone_getspecific anywhere in this code path.
4120  */
4121 void
4122 rfs4_clean_state_exi(nfs_export_t *ne, struct exportinfo *exi)
4123 {
4124         nfs_globals_t *ng;
4125         nfs4_srv_t *nsrv4;
4126 
4127         ng = ne->ne_globals;
4128         ASSERT(ng->nfs_zoneid == exi->exi_zoneid);
4129         nsrv4 = ng->nfs4_srv;
4130 
4131         mutex_enter(&nsrv4->state_lock);
4132 
4133         if (nsrv4->nfs4_server_state == NULL) {
4134                 mutex_exit(&nsrv4->state_lock);
4135                 return;
4136         }
4137 
4138         rfs4_dbe_walk(nsrv4->rfs4_lo_state_tab,
4139             rfs4_lo_state_walk_callout, exi);
4140         rfs4_dbe_walk(nsrv4->rfs4_state_tab, rfs4_state_walk_callout, exi);
4141         rfs4_dbe_walk(nsrv4->rfs4_deleg_state_tab,
4142             rfs4_deleg_state_walk_callout, exi);
4143         rfs4_dbe_walk(nsrv4->rfs4_file_tab, rfs4_file_walk_callout, exi);
4144 
4145         mutex_exit(&nsrv4->state_lock);
4146 }