1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright 2014 Nexenta Systems, Inc. All rights reserved.
24 */
25
26 #include <sys/systm.h>
27 #include <sys/kmem.h>
28 #include <sys/cmn_err.h>
29 #include <sys/atomic.h>
30 #include <sys/clconf.h>
31 #include <sys/cladm.h>
32 #include <sys/flock.h>
33 #include <nfs/export.h>
34 #include <nfs/nfs.h>
35 #include <nfs/nfs4.h>
36 #include <nfs/nfssys.h>
37 #include <nfs/lm.h>
38 #include <sys/pathname.h>
39 #include <sys/sdt.h>
40 #include <sys/nvpair.h>
41
42 extern u_longlong_t nfs4_srv_caller_id;
43
44 extern time_t rfs4_start_time;
45 extern uint_t nfs4_srv_vkey;
46
47 stateid4 special0 = {
48 0,
49 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }
50 };
51
52 stateid4 special1 = {
53 0xffffffff,
54 {
55 (char)0xff, (char)0xff, (char)0xff, (char)0xff,
56 (char)0xff, (char)0xff, (char)0xff, (char)0xff,
57 (char)0xff, (char)0xff, (char)0xff, (char)0xff
58 }
59 };
60
61
62 #define ISSPECIAL(id) (stateid4_cmp(id, &special0) || \
63 stateid4_cmp(id, &special1))
64
65 /* For embedding the cluster nodeid into our clientid */
66 #define CLUSTER_NODEID_SHIFT 24
67 #define CLUSTER_MAX_NODEID 255
68
69 #ifdef DEBUG
70 int rfs4_debug;
71 #endif
72
73 static uint32_t rfs4_database_debug = 0x00;
74
75 static void rfs4_ss_clid_write(rfs4_client_t *cp, char *leaf);
76 static void rfs4_ss_clid_write_one(rfs4_client_t *cp, char *dir, char *leaf);
77 static void rfs4_dss_clear_oldstate(rfs4_servinst_t *sip);
78 static void rfs4_ss_chkclid_sip(rfs4_client_t *cp, rfs4_servinst_t *sip);
79
80 /*
81 * Couple of simple init/destroy functions for a general waiter
82 */
83 void
84 rfs4_sw_init(rfs4_state_wait_t *swp)
85 {
86 mutex_init(swp->sw_cv_lock, NULL, MUTEX_DEFAULT, NULL);
87 cv_init(swp->sw_cv, NULL, CV_DEFAULT, NULL);
88 swp->sw_active = FALSE;
89 swp->sw_wait_count = 0;
90 }
91
92 void
93 rfs4_sw_destroy(rfs4_state_wait_t *swp)
94 {
95 mutex_destroy(swp->sw_cv_lock);
104 swp->sw_wait_count++;
105 cv_wait(swp->sw_cv, swp->sw_cv_lock);
106 swp->sw_wait_count--;
107 }
108 ASSERT(swp->sw_active == FALSE);
109 swp->sw_active = TRUE;
110 mutex_exit(swp->sw_cv_lock);
111 }
112
113 void
114 rfs4_sw_exit(rfs4_state_wait_t *swp)
115 {
116 mutex_enter(swp->sw_cv_lock);
117 ASSERT(swp->sw_active == TRUE);
118 swp->sw_active = FALSE;
119 if (swp->sw_wait_count != 0)
120 cv_broadcast(swp->sw_cv);
121 mutex_exit(swp->sw_cv_lock);
122 }
123
124 /*
125 * CPR callback id -- not related to v4 callbacks
126 */
127 static callb_id_t cpr_id = 0;
128
129 static void
130 deep_lock_copy(LOCK4res *dres, LOCK4res *sres)
131 {
132 lock_owner4 *slo = &sres->LOCK4res_u.denied.owner;
133 lock_owner4 *dlo = &dres->LOCK4res_u.denied.owner;
134
135 if (sres->status == NFS4ERR_DENIED) {
136 dlo->owner_val = kmem_alloc(slo->owner_len, KM_SLEEP);
137 bcopy(slo->owner_val, dlo->owner_val, slo->owner_len);
138 }
139 }
140
141 static void
142 deep_lock_free(LOCK4res *res)
143 {
144 lock_owner4 *lo = &res->LOCK4res_u.denied.owner;
145
146 if (res->status == NFS4ERR_DENIED)
147 kmem_free(lo->owner_val, lo->owner_len);
148 }
149
150 static void
151 deep_open_copy(OPEN4res *dres, OPEN4res *sres)
152 {
153 nfsace4 *sacep, *dacep;
154
155 if (sres->status != NFS4_OK) {
156 return;
157 }
158
159 dres->attrset = sres->attrset;
160
256 /*
257 * This code is some what prototypical for now. Its purpose currently is to
258 * implement the interfaces sufficiently to finish the higher protocol
259 * elements. This will be replaced by a dynamically resizeable tables
260 * backed by kmem_cache allocator. However synchronization is handled
261 * correctly (I hope) and will not change by much. The mutexes for
262 * the hash buckets that can be used to create new instances of data
263 * structures might be good candidates to evolve into reader writer
264 * locks. If it has to do a creation, it would be holding the
265 * mutex across a kmem_alloc with KM_SLEEP specified.
266 */
267
268 #ifdef DEBUG
269 #define TABSIZE 17
270 #else
271 #define TABSIZE 2047
272 #endif
273
274 #define ADDRHASH(key) ((unsigned long)(key) >> 3)
275
276 /* Used to serialize create/destroy of rfs4_server_state database */
277 kmutex_t rfs4_state_lock;
278 static rfs4_database_t *rfs4_server_state = NULL;
279
280 /* Used to serialize lookups of clientids */
281 static krwlock_t rfs4_findclient_lock;
282
283 /*
284 * For now this "table" is exposed so that the CPR callback
285 * function can tromp through it..
286 */
287 rfs4_table_t *rfs4_client_tab;
288
289 static rfs4_index_t *rfs4_clientid_idx;
290 static rfs4_index_t *rfs4_nfsclnt_idx;
291 static rfs4_table_t *rfs4_clntip_tab;
292 static rfs4_index_t *rfs4_clntip_idx;
293 static rfs4_table_t *rfs4_openowner_tab;
294 static rfs4_index_t *rfs4_openowner_idx;
295 static rfs4_table_t *rfs4_state_tab;
296 static rfs4_index_t *rfs4_state_idx;
297 static rfs4_index_t *rfs4_state_owner_file_idx;
298 static rfs4_index_t *rfs4_state_file_idx;
299 static rfs4_table_t *rfs4_lo_state_tab;
300 static rfs4_index_t *rfs4_lo_state_idx;
301 static rfs4_index_t *rfs4_lo_state_owner_idx;
302 static rfs4_table_t *rfs4_lockowner_tab;
303 static rfs4_index_t *rfs4_lockowner_idx;
304 static rfs4_index_t *rfs4_lockowner_pid_idx;
305 static rfs4_table_t *rfs4_file_tab;
306 static rfs4_index_t *rfs4_file_idx;
307 static rfs4_table_t *rfs4_deleg_state_tab;
308 static rfs4_index_t *rfs4_deleg_idx;
309 static rfs4_index_t *rfs4_deleg_state_idx;
310
311 #define MAXTABSZ 1024*1024
312
313 /* The values below are rfs4_lease_time units */
314
315 #ifdef DEBUG
316 #define CLIENT_CACHE_TIME 1
317 #define OPENOWNER_CACHE_TIME 1
318 #define STATE_CACHE_TIME 1
319 #define LO_STATE_CACHE_TIME 1
320 #define LOCKOWNER_CACHE_TIME 1
321 #define FILE_CACHE_TIME 3
322 #define DELEG_STATE_CACHE_TIME 1
323 #else
324 #define CLIENT_CACHE_TIME 10
325 #define OPENOWNER_CACHE_TIME 5
326 #define STATE_CACHE_TIME 1
327 #define LO_STATE_CACHE_TIME 1
328 #define LOCKOWNER_CACHE_TIME 3
329 #define FILE_CACHE_TIME 40
330 #define DELEG_STATE_CACHE_TIME 1
331 #endif
332
333
334 static time_t rfs4_client_cache_time = 0;
335 static time_t rfs4_clntip_cache_time = 0;
336 static time_t rfs4_openowner_cache_time = 0;
337 static time_t rfs4_state_cache_time = 0;
338 static time_t rfs4_lo_state_cache_time = 0;
339 static time_t rfs4_lockowner_cache_time = 0;
340 static time_t rfs4_file_cache_time = 0;
341 static time_t rfs4_deleg_state_cache_time = 0;
342
343 static bool_t rfs4_client_create(rfs4_entry_t, void *);
344 static void rfs4_dss_remove_cpleaf(rfs4_client_t *);
345 static void rfs4_dss_remove_leaf(rfs4_servinst_t *, char *, char *);
346 static void rfs4_client_destroy(rfs4_entry_t);
347 static bool_t rfs4_client_expiry(rfs4_entry_t);
348 static uint32_t clientid_hash(void *);
349 static bool_t clientid_compare(rfs4_entry_t, void *);
350 static void *clientid_mkkey(rfs4_entry_t);
351 static uint32_t nfsclnt_hash(void *);
352 static bool_t nfsclnt_compare(rfs4_entry_t, void *);
353 static void *nfsclnt_mkkey(rfs4_entry_t);
354 static bool_t rfs4_clntip_expiry(rfs4_entry_t);
355 static void rfs4_clntip_destroy(rfs4_entry_t);
356 static bool_t rfs4_clntip_create(rfs4_entry_t, void *);
357 static uint32_t clntip_hash(void *);
358 static bool_t clntip_compare(rfs4_entry_t, void *);
359 static void *clntip_mkkey(rfs4_entry_t);
360 static bool_t rfs4_openowner_create(rfs4_entry_t, void *);
361 static void rfs4_openowner_destroy(rfs4_entry_t);
362 static bool_t rfs4_openowner_expiry(rfs4_entry_t);
688 cl_ss->ss_pn = rfs4_ss_movestate(
689 statedir, destdir, dep->d_name);
690 } else {
691 cl_ss->ss_pn = ss_pn;
692 }
693 insque(cl_ss, oldstate);
694 } else {
695 rfs4_ss_pnfree(ss_pn);
696 }
697 }
698 }
699
700 out:
701 (void) VOP_CLOSE(dvp, FREAD, 1, (offset_t)0, CRED(), NULL);
702 VN_RELE(dvp);
703 if (dirt)
704 kmem_free((caddr_t)dirt, RFS4_SS_DIRSIZE);
705 }
706
707 static void
708 rfs4_ss_init(void)
709 {
710 int npaths = 1;
711 char *default_dss_path = NFS4_DSS_VAR_DIR;
712
713 /* read the default stable storage state */
714 rfs4_dss_readstate(npaths, &default_dss_path);
715
716 rfs4_ss_enabled = 1;
717 }
718
719 static void
720 rfs4_ss_fini(void)
721 {
722 rfs4_servinst_t *sip;
723
724 mutex_enter(&rfs4_servinst_lock);
725 sip = rfs4_cur_servinst;
726 while (sip != NULL) {
727 rfs4_dss_clear_oldstate(sip);
728 sip = sip->next;
729 }
730 mutex_exit(&rfs4_servinst_lock);
731 }
732
733 /*
734 * Remove all oldstate files referenced by this servinst.
735 */
736 static void
737 rfs4_dss_clear_oldstate(rfs4_servinst_t *sip)
738 {
739 rfs4_oldstate_t *os_head, *osp;
740
741 rw_enter(&sip->oldstate_lock, RW_WRITER);
742 os_head = sip->oldstate;
743
744 if (os_head == NULL) {
745 rw_exit(&sip->oldstate_lock);
746 return;
747 }
748
749 /* skip dummy entry */
750 osp = os_head->next;
754
755 rfs4_dss_remove_leaf(sip, NFS4_DSS_OLDSTATE_LEAF, leaf);
756
757 if (osp->cl_id4.id_val)
758 kmem_free(osp->cl_id4.id_val, osp->cl_id4.id_len);
759 rfs4_ss_pnfree(osp->ss_pn);
760
761 os_next = osp->next;
762 remque(osp);
763 kmem_free(osp, sizeof (rfs4_oldstate_t));
764 osp = os_next;
765 }
766
767 rw_exit(&sip->oldstate_lock);
768 }
769
770 /*
771 * Form the state and oldstate paths, and read in the stable storage files.
772 */
773 void
774 rfs4_dss_readstate(int npaths, char **paths)
775 {
776 int i;
777 char *state, *oldstate;
778
779 state = kmem_alloc(MAXPATHLEN, KM_SLEEP);
780 oldstate = kmem_alloc(MAXPATHLEN, KM_SLEEP);
781
782 for (i = 0; i < npaths; i++) {
783 char *path = paths[i];
784
785 (void) sprintf(state, "%s/%s", path, NFS4_DSS_STATE_LEAF);
786 (void) sprintf(oldstate, "%s/%s", path, NFS4_DSS_OLDSTATE_LEAF);
787
788 /*
789 * Populate the current server instance's oldstate list.
790 *
791 * 1. Read stable storage data from old state directory,
792 * leaving its contents alone.
793 *
794 * 2. Read stable storage data from state directory,
795 * and move the latter's contents to old state
796 * directory.
797 */
798 rfs4_ss_oldstate(rfs4_cur_servinst->oldstate, oldstate, NULL);
799 rfs4_ss_oldstate(rfs4_cur_servinst->oldstate, state, oldstate);
800 }
801
802 kmem_free(state, MAXPATHLEN);
803 kmem_free(oldstate, MAXPATHLEN);
804 }
805
806
807 /*
808 * Check if we are still in grace and if the client can be
809 * granted permission to perform reclaims.
810 */
811 void
812 rfs4_ss_chkclid(rfs4_client_t *cp)
813 {
814 rfs4_servinst_t *sip;
815
816 /*
817 * It should be sufficient to check the oldstate data for just
818 * this client's instance. However, since our per-instance
819 * client grouping is solely temporal, HA-NFSv4 RG failover
820 * might result in clients of the same RG being partitioned into
821 * separate instances.
822 *
823 * Until the client grouping is improved, we must check the
824 * oldstate data for all instances with an active grace period.
825 *
826 * This also serves as the mechanism to remove stale oldstate data.
827 * The first time we check an instance after its grace period has
828 * expired, the oldstate data should be cleared.
829 *
830 * Start at the current instance, and walk the list backwards
831 * to the first.
832 */
833 mutex_enter(&rfs4_servinst_lock);
834 for (sip = rfs4_cur_servinst; sip != NULL; sip = sip->prev) {
835 rfs4_ss_chkclid_sip(cp, sip);
836
837 /* if the above check found this client, we're done */
838 if (cp->rc_can_reclaim)
839 break;
840 }
841 mutex_exit(&rfs4_servinst_lock);
842 }
843
844 static void
845 rfs4_ss_chkclid_sip(rfs4_client_t *cp, rfs4_servinst_t *sip)
846 {
847 rfs4_oldstate_t *osp, *os_head;
848
849 /* short circuit everything if this server instance has no oldstate */
850 rw_enter(&sip->oldstate_lock, RW_READER);
851 os_head = sip->oldstate;
852 rw_exit(&sip->oldstate_lock);
853 if (os_head == NULL)
854 return;
855
856 /*
857 * If this server instance is no longer in a grace period then
858 * the client won't be able to reclaim. No further need for this
859 * instance's oldstate data, so it can be cleared.
860 */
861 if (!rfs4_servinst_in_grace(sip))
871 while (osp != os_head) {
872 if (osp->cl_id4.id_len == cp->rc_nfs_client.id_len) {
873 if (bcmp(osp->cl_id4.id_val, cp->rc_nfs_client.id_val,
874 osp->cl_id4.id_len) == 0) {
875 cp->rc_can_reclaim = 1;
876 break;
877 }
878 }
879 osp = osp->next;
880 }
881
882 rw_exit(&sip->oldstate_lock);
883 }
884
885 /*
886 * Place client information into stable storage: 1/3.
887 * First, generate the leaf filename, from the client's IP address and
888 * the server-generated short-hand clientid.
889 */
890 void
891 rfs4_ss_clid(rfs4_client_t *cp)
892 {
893 const char *kinet_ntop6(uchar_t *, char *, size_t);
894 char leaf[MAXNAMELEN], buf[INET6_ADDRSTRLEN];
895 struct sockaddr *ca;
896 uchar_t *b;
897
898 if (rfs4_ss_enabled == 0) {
899 return;
900 }
901
902 buf[0] = 0;
903
904 ca = (struct sockaddr *)&cp->rc_addr;
905
906 /*
907 * Convert the caller's IP address to a dotted string
908 */
909 if (ca->sa_family == AF_INET) {
910 b = (uchar_t *)&((struct sockaddr_in *)ca)->sin_addr;
911 (void) sprintf(buf, "%03d.%03d.%03d.%03d", b[0] & 0xFF,
912 b[1] & 0xFF, b[2] & 0xFF, b[3] & 0xFF);
913 } else if (ca->sa_family == AF_INET6) {
914 struct sockaddr_in6 *sin6;
915
916 sin6 = (struct sockaddr_in6 *)ca;
917 (void) kinet_ntop6((uchar_t *)&sin6->sin6_addr,
918 buf, INET6_ADDRSTRLEN);
919 }
920
921 (void) snprintf(leaf, MAXNAMELEN, "%s-%llx", buf,
922 (longlong_t)cp->rc_clientid);
923 rfs4_ss_clid_write(cp, leaf);
924 }
925
926 /*
927 * Place client information into stable storage: 2/3.
928 * DSS: distributed stable storage: the file may need to be written to
929 * multiple directories.
930 */
931 static void
932 rfs4_ss_clid_write(rfs4_client_t *cp, char *leaf)
933 {
934 rfs4_servinst_t *sip;
935
936 /*
937 * It should be sufficient to write the leaf file to (all) DSS paths
938 * associated with just this client's instance. However, since our
939 * per-instance client grouping is solely temporal, HA-NFSv4 RG
940 * failover might result in us losing DSS data.
941 *
942 * Until the client grouping is improved, we must write the DSS data
943 * to all instances' paths. Start at the current instance, and
944 * walk the list backwards to the first.
945 */
946 mutex_enter(&rfs4_servinst_lock);
947 for (sip = rfs4_cur_servinst; sip != NULL; sip = sip->prev) {
948 int i, npaths = sip->dss_npaths;
949
950 /* write the leaf file to all DSS paths */
951 for (i = 0; i < npaths; i++) {
952 rfs4_dss_path_t *dss_path = sip->dss_paths[i];
953
954 /* HA-NFSv4 path might have been failed-away from us */
955 if (dss_path == NULL)
956 continue;
957
958 rfs4_ss_clid_write_one(cp, dss_path->path, leaf);
959 }
960 }
961 mutex_exit(&rfs4_servinst_lock);
962 }
963
964 /*
965 * Place client information into stable storage: 3/3.
966 * Write the stable storage data to the requested file.
967 */
968 static void
969 rfs4_ss_clid_write_one(rfs4_client_t *cp, char *dss_path, char *leaf)
970 {
971 int ioflag;
972 int file_vers = NFS4_SS_VERSION;
973 size_t dirlen;
974 struct uio uio;
975 struct iovec iov[4];
976 char *dir;
977 rfs4_ss_pn_t *ss_pn;
978 vnode_t *vp;
979 nfs_client_id4 *cl_id4 = &(cp->rc_nfs_client);
980
981 /* allow 2 extra bytes for '/' & NUL */
1134 * for forced expiration
1135 */
1136 if (ent_sin->sin_addr.s_addr == clr_in.s_addr) {
1137 cp->rc_forced_expire = 1;
1138 }
1139 break;
1140
1141 default:
1142 /* force this assert to fail */
1143 ASSERT(clr->addr_type != clr->addr_type);
1144 }
1145 }
1146
1147 /*
1148 * This is called from nfssys() in order to clear server state
1149 * for the specified client IP Address.
1150 */
1151 void
1152 rfs4_clear_client_state(struct nfs4clrst_args *clr)
1153 {
1154 (void) rfs4_dbe_walk(rfs4_client_tab, rfs4_client_scrub, clr);
1155 }
1156
1157 /*
1158 * Used to initialize the NFSv4 server's state or database. All of
1159 * the tables are created and timers are set. Only called when NFSv4
1160 * service is provided.
1161 */
1162 void
1163 rfs4_state_init()
1164 {
1165 int start_grace;
1166 extern boolean_t rfs4_cpr_callb(void *, int);
1167 char *dss_path = NFS4_DSS_VAR_DIR;
1168 time_t start_time;
1169
1170 mutex_enter(&rfs4_state_lock);
1171
1172 /*
1173 * If the server state database has already been initialized,
1174 * skip it
1175 */
1176 if (rfs4_server_state != NULL) {
1177 mutex_exit(&rfs4_state_lock);
1178 return;
1179 }
1180
1181 rw_init(&rfs4_findclient_lock, NULL, RW_DEFAULT, NULL);
1182
1183 /*
1184 * Set the boot time. If the server
1185 * has been restarted quickly and has had the opportunity to
1186 * service clients, then the start_time needs to be bumped
1187 * regardless. A small window but it exists...
1188 */
1189 start_time = gethrestime_sec();
1190 if (rfs4_start_time < start_time)
1191 rfs4_start_time = start_time;
1192 else
1193 rfs4_start_time++;
1194
1195 /* DSS: distributed stable storage: initialise served paths list */
1196 rfs4_dss_pathlist = NULL;
1197
1198 /*
1199 * Create the first server instance, or a new one if the server has
1200 * been restarted; see above comments on rfs4_start_time. Don't
1201 * start its grace period; that will be done later, to maximise the
1202 * clients' recovery window.
1203 */
1204 start_grace = 0;
1205 rfs4_servinst_create(start_grace, 1, &dss_path);
1206
1207 /* reset the "first NFSv4 request" status */
1208 rfs4_seen_first_compound = 0;
1209
1210 /*
1211 * Add a CPR callback so that we can update client
1212 * access times to extend the lease after a suspend
1213 * and resume (using the same class as rpcmod/connmgr)
1214 */
1215 cpr_id = callb_add(rfs4_cpr_callb, 0, CB_CL_CPR_RPC, "rfs4");
1216
1217 /* set the various cache timers for table creation */
1218 if (rfs4_client_cache_time == 0)
1219 rfs4_client_cache_time = CLIENT_CACHE_TIME;
1220 if (rfs4_openowner_cache_time == 0)
1221 rfs4_openowner_cache_time = OPENOWNER_CACHE_TIME;
1222 if (rfs4_state_cache_time == 0)
1223 rfs4_state_cache_time = STATE_CACHE_TIME;
1224 if (rfs4_lo_state_cache_time == 0)
1225 rfs4_lo_state_cache_time = LO_STATE_CACHE_TIME;
1226 if (rfs4_lockowner_cache_time == 0)
1227 rfs4_lockowner_cache_time = LOCKOWNER_CACHE_TIME;
1228 if (rfs4_file_cache_time == 0)
1229 rfs4_file_cache_time = FILE_CACHE_TIME;
1230 if (rfs4_deleg_state_cache_time == 0)
1231 rfs4_deleg_state_cache_time = DELEG_STATE_CACHE_TIME;
1232
1233 /* Create the overall database to hold all server state */
1234 rfs4_server_state = rfs4_database_create(rfs4_database_debug);
1235
1236 /* Now create the individual tables */
1237 rfs4_client_cache_time *= rfs4_lease_time;
1238 rfs4_client_tab = rfs4_table_create(rfs4_server_state,
1239 "Client",
1240 rfs4_client_cache_time,
1241 2,
1242 rfs4_client_create,
1243 rfs4_client_destroy,
1244 rfs4_client_expiry,
1245 sizeof (rfs4_client_t),
1246 TABSIZE,
1247 MAXTABSZ/8, 100);
1248 rfs4_nfsclnt_idx = rfs4_index_create(rfs4_client_tab,
1249 "nfs_client_id4", nfsclnt_hash,
1250 nfsclnt_compare, nfsclnt_mkkey,
1251 TRUE);
1252 rfs4_clientid_idx = rfs4_index_create(rfs4_client_tab,
1253 "client_id", clientid_hash,
1254 clientid_compare, clientid_mkkey,
1255 FALSE);
1256
1257 rfs4_clntip_cache_time = 86400 * 365; /* about a year */
1258 rfs4_clntip_tab = rfs4_table_create(rfs4_server_state,
1259 "ClntIP",
1260 rfs4_clntip_cache_time,
1261 1,
1262 rfs4_clntip_create,
1263 rfs4_clntip_destroy,
1264 rfs4_clntip_expiry,
1265 sizeof (rfs4_clntip_t),
1266 TABSIZE,
1267 MAXTABSZ, 100);
1268 rfs4_clntip_idx = rfs4_index_create(rfs4_clntip_tab,
1269 "client_ip", clntip_hash,
1270 clntip_compare, clntip_mkkey,
1271 TRUE);
1272
1273 rfs4_openowner_cache_time *= rfs4_lease_time;
1274 rfs4_openowner_tab = rfs4_table_create(rfs4_server_state,
1275 "OpenOwner",
1276 rfs4_openowner_cache_time,
1277 1,
1278 rfs4_openowner_create,
1279 rfs4_openowner_destroy,
1280 rfs4_openowner_expiry,
1281 sizeof (rfs4_openowner_t),
1282 TABSIZE,
1283 MAXTABSZ, 100);
1284 rfs4_openowner_idx = rfs4_index_create(rfs4_openowner_tab,
1285 "open_owner4", openowner_hash,
1286 openowner_compare,
1287 openowner_mkkey, TRUE);
1288
1289 rfs4_state_cache_time *= rfs4_lease_time;
1290 rfs4_state_tab = rfs4_table_create(rfs4_server_state,
1291 "OpenStateID",
1292 rfs4_state_cache_time,
1293 3,
1294 rfs4_state_create,
1295 rfs4_state_destroy,
1296 rfs4_state_expiry,
1297 sizeof (rfs4_state_t),
1298 TABSIZE,
1299 MAXTABSZ, 100);
1300
1301 rfs4_state_owner_file_idx = rfs4_index_create(rfs4_state_tab,
1302 "Openowner-File",
1303 state_owner_file_hash,
1304 state_owner_file_compare,
1305 state_owner_file_mkkey, TRUE);
1306
1307 rfs4_state_idx = rfs4_index_create(rfs4_state_tab,
1308 "State-id", state_hash,
1309 state_compare, state_mkkey, FALSE);
1310
1311 rfs4_state_file_idx = rfs4_index_create(rfs4_state_tab,
1312 "File", state_file_hash,
1313 state_file_compare, state_file_mkkey,
1314 FALSE);
1315
1316 rfs4_lo_state_cache_time *= rfs4_lease_time;
1317 rfs4_lo_state_tab = rfs4_table_create(rfs4_server_state,
1318 "LockStateID",
1319 rfs4_lo_state_cache_time,
1320 2,
1321 rfs4_lo_state_create,
1322 rfs4_lo_state_destroy,
1323 rfs4_lo_state_expiry,
1324 sizeof (rfs4_lo_state_t),
1325 TABSIZE,
1326 MAXTABSZ, 100);
1327
1328 rfs4_lo_state_owner_idx = rfs4_index_create(rfs4_lo_state_tab,
1329 "lockownerxstate",
1330 lo_state_lo_hash,
1331 lo_state_lo_compare,
1332 lo_state_lo_mkkey, TRUE);
1333
1334 rfs4_lo_state_idx = rfs4_index_create(rfs4_lo_state_tab,
1335 "State-id",
1336 lo_state_hash, lo_state_compare,
1337 lo_state_mkkey, FALSE);
1338
1339 rfs4_lockowner_cache_time *= rfs4_lease_time;
1340
1341 rfs4_lockowner_tab = rfs4_table_create(rfs4_server_state,
1342 "Lockowner",
1343 rfs4_lockowner_cache_time,
1344 2,
1345 rfs4_lockowner_create,
1346 rfs4_lockowner_destroy,
1347 rfs4_lockowner_expiry,
1348 sizeof (rfs4_lockowner_t),
1349 TABSIZE,
1350 MAXTABSZ, 100);
1351
1352 rfs4_lockowner_idx = rfs4_index_create(rfs4_lockowner_tab,
1353 "lock_owner4", lockowner_hash,
1354 lockowner_compare,
1355 lockowner_mkkey, TRUE);
1356
1357 rfs4_lockowner_pid_idx = rfs4_index_create(rfs4_lockowner_tab,
1358 "pid", pid_hash,
1359 pid_compare, pid_mkkey,
1360 FALSE);
1361
1362 rfs4_file_cache_time *= rfs4_lease_time;
1363 rfs4_file_tab = rfs4_table_create(rfs4_server_state,
1364 "File",
1365 rfs4_file_cache_time,
1366 1,
1367 rfs4_file_create,
1368 rfs4_file_destroy,
1369 NULL,
1370 sizeof (rfs4_file_t),
1371 TABSIZE,
1372 MAXTABSZ, -1);
1373
1374 rfs4_file_idx = rfs4_index_create(rfs4_file_tab,
1375 "Filehandle", file_hash,
1376 file_compare, file_mkkey, TRUE);
1377
1378 rfs4_deleg_state_cache_time *= rfs4_lease_time;
1379 rfs4_deleg_state_tab = rfs4_table_create(rfs4_server_state,
1380 "DelegStateID",
1381 rfs4_deleg_state_cache_time,
1382 2,
1383 rfs4_deleg_state_create,
1384 rfs4_deleg_state_destroy,
1385 rfs4_deleg_state_expiry,
1386 sizeof (rfs4_deleg_state_t),
1387 TABSIZE,
1388 MAXTABSZ, 100);
1389 rfs4_deleg_idx = rfs4_index_create(rfs4_deleg_state_tab,
1390 "DelegByFileClient",
1391 deleg_hash,
1392 deleg_compare,
1393 deleg_mkkey, TRUE);
1394
1395 rfs4_deleg_state_idx = rfs4_index_create(rfs4_deleg_state_tab,
1396 "DelegState",
1397 deleg_state_hash,
1398 deleg_state_compare,
1399 deleg_state_mkkey, FALSE);
1400
1401 /*
1402 * Init the stable storage.
1403 */
1404 rfs4_ss_init();
1405
1406 rfs4_client_clrst = rfs4_clear_client_state;
1407
1408 mutex_exit(&rfs4_state_lock);
1409 }
1410
1411
1412 /*
1413 * Used at server shutdown to cleanup all of the NFSv4 server's structures
1414 * and other state.
1415 */
1416 void
1417 rfs4_state_fini()
1418 {
1419 rfs4_database_t *dbp;
1420
1421 mutex_enter(&rfs4_state_lock);
1422
1423 if (rfs4_server_state == NULL) {
1424 mutex_exit(&rfs4_state_lock);
1425 return;
1426 }
1427
1428 rfs4_client_clrst = NULL;
1429
1430 rfs4_set_deleg_policy(SRV_NEVER_DELEGATE);
1431 dbp = rfs4_server_state;
1432 rfs4_server_state = NULL;
1433
1434 /*
1435 * Cleanup the CPR callback.
1436 */
1437 if (cpr_id)
1438 (void) callb_delete(cpr_id);
1439
1440 rw_destroy(&rfs4_findclient_lock);
1441
1442 /* First stop all of the reaper threads in the database */
1443 rfs4_database_shutdown(dbp);
1444 /* clean up any dangling stable storage structures */
1445 rfs4_ss_fini();
1446 /* Now actually destroy/release the database and its tables */
1447 rfs4_database_destroy(dbp);
1448
1449 /* Reset the cache timers for next time */
1450 rfs4_client_cache_time = 0;
1451 rfs4_openowner_cache_time = 0;
1452 rfs4_state_cache_time = 0;
1453 rfs4_lo_state_cache_time = 0;
1454 rfs4_lockowner_cache_time = 0;
1455 rfs4_file_cache_time = 0;
1456 rfs4_deleg_state_cache_time = 0;
1457
1458 mutex_exit(&rfs4_state_lock);
1459
1460 /* destroy server instances and current instance ptr */
1461 rfs4_servinst_destroy_all();
1462
1463 /* reset the "first NFSv4 request" status */
1464 rfs4_seen_first_compound = 0;
1465
1466 /* DSS: distributed stable storage */
1467 nvlist_free(rfs4_dss_oldpaths);
1468 nvlist_free(rfs4_dss_paths);
1469 rfs4_dss_paths = rfs4_dss_oldpaths = NULL;
1470 }
1471
1472 typedef union {
1473 struct {
1474 uint32_t start_time;
1475 uint32_t c_id;
1476 } impl_id;
1477 clientid4 id4;
1478 } cid;
1479
1480 static int foreign_stateid(stateid_t *id);
1481 static int foreign_clientid(cid *cidp);
1482 static void embed_nodeid(cid *cidp);
1483
1484 typedef union {
1485 struct {
1486 uint32_t c_id;
1487 uint32_t gen_num;
1488 } cv_impl;
1489 verifier4 confirm_verf;
1564 * If the sysadmin has used clear_locks for this
1565 * entry then forced_expire will be set and we
1566 * want this entry to be reaped. Or the entry
1567 * has exceeded its lease period.
1568 */
1569 cp_expired = (cp->rc_forced_expire ||
1570 (gethrestime_sec() - cp->rc_last_access
1571 > rfs4_lease_time));
1572
1573 if (!cp->rc_ss_remove && cp_expired)
1574 cp->rc_ss_remove = 1;
1575 return (cp_expired);
1576 }
1577
1578 /*
1579 * Remove the leaf file from all distributed stable storage paths.
1580 */
1581 static void
1582 rfs4_dss_remove_cpleaf(rfs4_client_t *cp)
1583 {
1584 rfs4_servinst_t *sip;
1585 char *leaf = cp->rc_ss_pn->leaf;
1586
1587 /*
1588 * since the state files are written to all DSS
1589 * paths we must remove this leaf file instance
1590 * from all server instances.
1591 */
1592
1593 mutex_enter(&rfs4_servinst_lock);
1594 for (sip = rfs4_cur_servinst; sip != NULL; sip = sip->prev) {
1595 /* remove the leaf file associated with this server instance */
1596 rfs4_dss_remove_leaf(sip, NFS4_DSS_STATE_LEAF, leaf);
1597 }
1598 mutex_exit(&rfs4_servinst_lock);
1599 }
1600
1601 static void
1602 rfs4_dss_remove_leaf(rfs4_servinst_t *sip, char *dir_leaf, char *leaf)
1603 {
1604 int i, npaths = sip->dss_npaths;
1605
1606 for (i = 0; i < npaths; i++) {
1607 rfs4_dss_path_t *dss_path = sip->dss_paths[i];
1608 char *path, *dir;
1609 size_t pathlen;
1610
1611 /* the HA-NFSv4 path might have been failed-over away from us */
1612 if (dss_path == NULL)
1613 continue;
1614
1615 dir = dss_path->path;
1616
1617 /* allow 3 extra bytes for two '/' & a NUL */
1618 pathlen = strlen(dir) + strlen(dir_leaf) + strlen(leaf) + 3;
1646 if (cp->rc_ss_remove)
1647 rfs4_dss_remove_cpleaf(cp);
1648 rfs4_ss_pnfree(cp->rc_ss_pn);
1649 }
1650
1651 /* Free the client supplied client id */
1652 kmem_free(cp->rc_nfs_client.id_val, cp->rc_nfs_client.id_len);
1653
1654 if (cp->rc_sysidt != LM_NOSYSID)
1655 lm_free_sysidt(cp->rc_sysidt);
1656 }
1657
1658 static bool_t
1659 rfs4_client_create(rfs4_entry_t u_entry, void *arg)
1660 {
1661 rfs4_client_t *cp = (rfs4_client_t *)u_entry;
1662 nfs_client_id4 *client = (nfs_client_id4 *)arg;
1663 struct sockaddr *ca;
1664 cid *cidp;
1665 scid_confirm_verf *scvp;
1666
1667 /* Get a clientid to give to the client */
1668 cidp = (cid *)&cp->rc_clientid;
1669 cidp->impl_id.start_time = rfs4_start_time;
1670 cidp->impl_id.c_id = (uint32_t)rfs4_dbe_getid(cp->rc_dbe);
1671
1672 /* If we are booted as a cluster node, embed our nodeid */
1673 if (cluster_bootflags & CLUSTER_BOOTED)
1674 embed_nodeid(cidp);
1675
1676 /* Allocate and copy client's client id value */
1677 cp->rc_nfs_client.id_val = kmem_alloc(client->id_len, KM_SLEEP);
1678 cp->rc_nfs_client.id_len = client->id_len;
1679 bcopy(client->id_val, cp->rc_nfs_client.id_val, client->id_len);
1680 cp->rc_nfs_client.verifier = client->verifier;
1681
1682 /* Copy client's IP address */
1683 ca = client->cl_addr;
1684 if (ca->sa_family == AF_INET)
1685 bcopy(ca, &cp->rc_addr, sizeof (struct sockaddr_in));
1686 else if (ca->sa_family == AF_INET6)
1687 bcopy(ca, &cp->rc_addr, sizeof (struct sockaddr_in6));
1688 cp->rc_nfs_client.cl_addr = (struct sockaddr *)&cp->rc_addr;
1689
1707
1708 cp->rc_cr_set = NULL;
1709
1710 cp->rc_sysidt = LM_NOSYSID;
1711
1712 list_create(&cp->rc_openownerlist, sizeof (rfs4_openowner_t),
1713 offsetof(rfs4_openowner_t, ro_node));
1714
1715 /* set up the callback control structure */
1716 cp->rc_cbinfo.cb_state = CB_UNINIT;
1717 mutex_init(cp->rc_cbinfo.cb_lock, NULL, MUTEX_DEFAULT, NULL);
1718 cv_init(cp->rc_cbinfo.cb_cv, NULL, CV_DEFAULT, NULL);
1719 cv_init(cp->rc_cbinfo.cb_cv_nullcaller, NULL, CV_DEFAULT, NULL);
1720
1721 /*
1722 * Associate the client_t with the current server instance.
1723 * The hold is solely to satisfy the calling requirement of
1724 * rfs4_servinst_assign(). In this case it's not strictly necessary.
1725 */
1726 rfs4_dbe_hold(cp->rc_dbe);
1727 rfs4_servinst_assign(cp, rfs4_cur_servinst);
1728 rfs4_dbe_rele(cp->rc_dbe);
1729
1730 return (TRUE);
1731 }
1732
1733 /*
1734 * Caller wants to generate/update the setclientid_confirm verifier
1735 * associated with a client. This is done during the SETCLIENTID
1736 * processing.
1737 */
1738 void
1739 rfs4_client_scv_next(rfs4_client_t *cp)
1740 {
1741 scid_confirm_verf *scvp;
1742
1743 /* Init the value for the SETCLIENTID_CONFIRM verifier */
1744 scvp = (scid_confirm_verf *)&cp->rc_confirm_verf;
1745 scvp->cv_impl.gen_num++;
1746 }
1747
1748 void
1749 rfs4_client_rele(rfs4_client_t *cp)
1750 {
1751 rfs4_dbe_rele(cp->rc_dbe);
1752 }
1753
1754 rfs4_client_t *
1755 rfs4_findclient(nfs_client_id4 *client, bool_t *create, rfs4_client_t *oldcp)
1756 {
1757 rfs4_client_t *cp;
1758
1759
1760 if (oldcp) {
1761 rw_enter(&rfs4_findclient_lock, RW_WRITER);
1762 rfs4_dbe_hide(oldcp->rc_dbe);
1763 } else {
1764 rw_enter(&rfs4_findclient_lock, RW_READER);
1765 }
1766
1767 cp = (rfs4_client_t *)rfs4_dbsearch(rfs4_nfsclnt_idx, client,
1768 create, (void *)client, RFS4_DBS_VALID);
1769
1770 if (oldcp)
1771 rfs4_dbe_unhide(oldcp->rc_dbe);
1772
1773 rw_exit(&rfs4_findclient_lock);
1774
1775 return (cp);
1776 }
1777
1778 rfs4_client_t *
1779 rfs4_findclient_by_id(clientid4 clientid, bool_t find_unconfirmed)
1780 {
1781 rfs4_client_t *cp;
1782 bool_t create = FALSE;
1783 cid *cidp = (cid *)&clientid;
1784
1785 /* If we're a cluster and the nodeid isn't right, short-circuit */
1786 if (cluster_bootflags & CLUSTER_BOOTED && foreign_clientid(cidp))
1787 return (NULL);
1788
1789 rw_enter(&rfs4_findclient_lock, RW_READER);
1790
1791 cp = (rfs4_client_t *)rfs4_dbsearch(rfs4_clientid_idx, &clientid,
1792 &create, NULL, RFS4_DBS_VALID);
1793
1794 rw_exit(&rfs4_findclient_lock);
1795
1796 if (cp && cp->rc_need_confirm && find_unconfirmed == FALSE) {
1797 rfs4_client_rele(cp);
1798 return (NULL);
1799 } else {
1800 return (cp);
1801 }
1802 }
1803
1804 static uint32_t
1805 clntip_hash(void *key)
1806 {
1807 struct sockaddr *addr = key;
1808 int i, len = 0;
1809 uint32_t hash = 0;
1810 char *ptr;
1811
1812 if (addr->sa_family == AF_INET) {
1813 struct sockaddr_in *a = (struct sockaddr_in *)addr;
1814 len = sizeof (struct in_addr);
1882 {
1883 rfs4_clntip_t *cp = (rfs4_clntip_t *)u_entry;
1884 struct sockaddr *ca = (struct sockaddr *)arg;
1885
1886 /* Copy client's IP address */
1887 if (ca->sa_family == AF_INET)
1888 bcopy(ca, &cp->ri_addr, sizeof (struct sockaddr_in));
1889 else if (ca->sa_family == AF_INET6)
1890 bcopy(ca, &cp->ri_addr, sizeof (struct sockaddr_in6));
1891 else
1892 return (FALSE);
1893 cp->ri_no_referrals = 1;
1894
1895 return (TRUE);
1896 }
1897
1898 rfs4_clntip_t *
1899 rfs4_find_clntip(struct sockaddr *addr, bool_t *create)
1900 {
1901 rfs4_clntip_t *cp;
1902
1903 rw_enter(&rfs4_findclient_lock, RW_READER);
1904
1905 cp = (rfs4_clntip_t *)rfs4_dbsearch(rfs4_clntip_idx, addr,
1906 create, addr, RFS4_DBS_VALID);
1907
1908 rw_exit(&rfs4_findclient_lock);
1909
1910 return (cp);
1911 }
1912
1913 void
1914 rfs4_invalidate_clntip(struct sockaddr *addr)
1915 {
1916 rfs4_clntip_t *cp;
1917 bool_t create = FALSE;
1918
1919 rw_enter(&rfs4_findclient_lock, RW_READER);
1920
1921 cp = (rfs4_clntip_t *)rfs4_dbsearch(rfs4_clntip_idx, addr,
1922 &create, NULL, RFS4_DBS_VALID);
1923 if (cp == NULL) {
1924 rw_exit(&rfs4_findclient_lock);
1925 return;
1926 }
1927 rfs4_dbe_invalidate(cp->ri_dbe);
1928 rfs4_dbe_rele(cp->ri_dbe);
1929
1930 rw_exit(&rfs4_findclient_lock);
1931 }
1932
1933 bool_t
1934 rfs4_lease_expired(rfs4_client_t *cp)
1935 {
1936 bool_t rc;
1937
1938 rfs4_dbe_lock(cp->rc_dbe);
1939
1940 /*
1941 * If the admin has executed clear_locks for this
1942 * client id, force expire will be set, so no need
1943 * to calculate anything because it's "outa here".
1944 */
1945 if (cp->rc_forced_expire) {
1946 rc = TRUE;
1947 } else {
1948 rc = (gethrestime_sec() - cp->rc_last_access > rfs4_lease_time);
1949 }
1950
2058
2059 /* Free the lock owner id */
2060 kmem_free(oo->ro_owner.owner_val, oo->ro_owner.owner_len);
2061 }
2062
2063 void
2064 rfs4_openowner_rele(rfs4_openowner_t *oo)
2065 {
2066 rfs4_dbe_rele(oo->ro_dbe);
2067 }
2068
2069 static bool_t
2070 rfs4_openowner_create(rfs4_entry_t u_entry, void *arg)
2071 {
2072 rfs4_openowner_t *oo = (rfs4_openowner_t *)u_entry;
2073 rfs4_openowner_t *argp = (rfs4_openowner_t *)arg;
2074 open_owner4 *openowner = &argp->ro_owner;
2075 seqid4 seqid = argp->ro_open_seqid;
2076 rfs4_client_t *cp;
2077 bool_t create = FALSE;
2078
2079 rw_enter(&rfs4_findclient_lock, RW_READER);
2080
2081 cp = (rfs4_client_t *)rfs4_dbsearch(rfs4_clientid_idx,
2082 &openowner->clientid,
2083 &create, NULL, RFS4_DBS_VALID);
2084
2085 rw_exit(&rfs4_findclient_lock);
2086
2087 if (cp == NULL)
2088 return (FALSE);
2089
2090 oo->ro_reply_fh.nfs_fh4_len = 0;
2091 oo->ro_reply_fh.nfs_fh4_val = NULL;
2092
2093 oo->ro_owner.clientid = openowner->clientid;
2094 oo->ro_owner.owner_val =
2095 kmem_alloc(openowner->owner_len, KM_SLEEP);
2096
2097 bcopy(openowner->owner_val,
2098 oo->ro_owner.owner_val, openowner->owner_len);
2099
2100 oo->ro_owner.owner_len = openowner->owner_len;
2101
2102 oo->ro_need_confirm = TRUE;
2103
2104 rfs4_sw_init(&oo->ro_sw);
2105
2107 bzero(&oo->ro_reply, sizeof (nfs_resop4));
2108 oo->ro_client = cp;
2109 oo->ro_cr_set = NULL;
2110
2111 list_create(&oo->ro_statelist, sizeof (rfs4_state_t),
2112 offsetof(rfs4_state_t, rs_node));
2113
2114 /* Insert openowner into client's open owner list */
2115 rfs4_dbe_lock(cp->rc_dbe);
2116 list_insert_tail(&cp->rc_openownerlist, oo);
2117 rfs4_dbe_unlock(cp->rc_dbe);
2118
2119 return (TRUE);
2120 }
2121
2122 rfs4_openowner_t *
2123 rfs4_findopenowner(open_owner4 *openowner, bool_t *create, seqid4 seqid)
2124 {
2125 rfs4_openowner_t *oo;
2126 rfs4_openowner_t arg;
2127
2128 arg.ro_owner = *openowner;
2129 arg.ro_open_seqid = seqid;
2130 oo = (rfs4_openowner_t *)rfs4_dbsearch(rfs4_openowner_idx, openowner,
2131 create, &arg, RFS4_DBS_VALID);
2132
2133 return (oo);
2134 }
2135
2136 void
2137 rfs4_update_open_sequence(rfs4_openowner_t *oo)
2138 {
2139
2140 rfs4_dbe_lock(oo->ro_dbe);
2141
2142 oo->ro_open_seqid++;
2143
2144 rfs4_dbe_unlock(oo->ro_dbe);
2145 }
2146
2147 void
2148 rfs4_update_open_resp(rfs4_openowner_t *oo, nfs_resop4 *resp, nfs_fh4 *fh)
2149 {
2150
2253 }
2254
2255 /* ARGSUSED */
2256 static bool_t
2257 rfs4_lockowner_expiry(rfs4_entry_t u_entry)
2258 {
2259 /*
2260 * Since expiry is called with no other references on
2261 * this struct, go ahead and have it removed.
2262 */
2263 return (TRUE);
2264 }
2265
2266 static bool_t
2267 rfs4_lockowner_create(rfs4_entry_t u_entry, void *arg)
2268 {
2269 rfs4_lockowner_t *lo = (rfs4_lockowner_t *)u_entry;
2270 lock_owner4 *lockowner = (lock_owner4 *)arg;
2271 rfs4_client_t *cp;
2272 bool_t create = FALSE;
2273
2274 rw_enter(&rfs4_findclient_lock, RW_READER);
2275
2276 cp = (rfs4_client_t *)rfs4_dbsearch(rfs4_clientid_idx,
2277 &lockowner->clientid,
2278 &create, NULL, RFS4_DBS_VALID);
2279
2280 rw_exit(&rfs4_findclient_lock);
2281
2282 if (cp == NULL)
2283 return (FALSE);
2284
2285 /* Reference client */
2286 lo->rl_client = cp;
2287 lo->rl_owner.clientid = lockowner->clientid;
2288 lo->rl_owner.owner_val = kmem_alloc(lockowner->owner_len, KM_SLEEP);
2289 bcopy(lockowner->owner_val, lo->rl_owner.owner_val,
2290 lockowner->owner_len);
2291 lo->rl_owner.owner_len = lockowner->owner_len;
2292 lo->rl_pid = rfs4_dbe_getid(lo->rl_dbe);
2293
2294 return (TRUE);
2295 }
2296
2297 rfs4_lockowner_t *
2298 rfs4_findlockowner(lock_owner4 *lockowner, bool_t *create)
2299 {
2300 rfs4_lockowner_t *lo;
2301
2302 lo = (rfs4_lockowner_t *)rfs4_dbsearch(rfs4_lockowner_idx, lockowner,
2303 create, lockowner, RFS4_DBS_VALID);
2304
2305 return (lo);
2306 }
2307
2308 rfs4_lockowner_t *
2309 rfs4_findlockowner_by_pid(pid_t pid)
2310 {
2311 rfs4_lockowner_t *lo;
2312 bool_t create = FALSE;
2313
2314 lo = (rfs4_lockowner_t *)rfs4_dbsearch(rfs4_lockowner_pid_idx,
2315 (void *)(uintptr_t)pid, &create, NULL, RFS4_DBS_VALID);
2316
2317 return (lo);
2318 }
2319
2320
2321 static uint32_t
2322 file_hash(void *key)
2323 {
2324 return (ADDRHASH(key));
2325 }
2326
2327 static void *
2328 file_mkkey(rfs4_entry_t u_entry)
2329 {
2330 rfs4_file_t *fp = (rfs4_file_t *)u_entry;
2331
2332 return (fp->rf_vp);
2333 }
2334
2405
2406 mutex_init(fp->rf_dinfo.rd_recall_lock, NULL, MUTEX_DEFAULT, NULL);
2407 cv_init(fp->rf_dinfo.rd_recall_cv, NULL, CV_DEFAULT, NULL);
2408
2409 fp->rf_dinfo.rd_dtype = OPEN_DELEGATE_NONE;
2410
2411 rw_init(&fp->rf_file_rwlock, NULL, RW_DEFAULT, NULL);
2412
2413 mutex_enter(&vp->v_vsd_lock);
2414 VERIFY(vsd_set(vp, nfs4_srv_vkey, (void *)fp) == 0);
2415 mutex_exit(&vp->v_vsd_lock);
2416
2417 return (TRUE);
2418 }
2419
2420 rfs4_file_t *
2421 rfs4_findfile(vnode_t *vp, nfs_fh4 *fh, bool_t *create)
2422 {
2423 rfs4_file_t *fp;
2424 rfs4_fcreate_arg arg;
2425
2426 arg.vp = vp;
2427 arg.fh = fh;
2428
2429 if (*create == TRUE)
2430 fp = (rfs4_file_t *)rfs4_dbsearch(rfs4_file_idx, vp, create,
2431 &arg, RFS4_DBS_VALID);
2432 else {
2433 mutex_enter(&vp->v_vsd_lock);
2434 fp = (rfs4_file_t *)vsd_get(vp, nfs4_srv_vkey);
2435 if (fp) {
2436 rfs4_dbe_lock(fp->rf_dbe);
2437 if (rfs4_dbe_is_invalid(fp->rf_dbe) ||
2438 (rfs4_dbe_refcnt(fp->rf_dbe) == 0)) {
2439 rfs4_dbe_unlock(fp->rf_dbe);
2440 fp = NULL;
2441 } else {
2442 rfs4_dbe_hold(fp->rf_dbe);
2443 rfs4_dbe_unlock(fp->rf_dbe);
2444 }
2445 }
2446 mutex_exit(&vp->v_vsd_lock);
2447 }
2448 return (fp);
2449 }
2450
2451 /*
2452 * Find a file in the db and once it is located, take the rw lock.
2453 * Need to check the vnode pointer and if it does not exist (it was
2454 * removed between the db location and check) redo the find. This
2455 * assumes that a file struct that has a NULL vnode pointer is marked
2456 * at 'invalid' and will not be found in the db the second time
2457 * around.
2458 */
2459 rfs4_file_t *
2460 rfs4_findfile_withlock(vnode_t *vp, nfs_fh4 *fh, bool_t *create)
2461 {
2462 rfs4_file_t *fp;
2463 rfs4_fcreate_arg arg;
2464 bool_t screate = *create;
2465
2466 if (screate == FALSE) {
2467 mutex_enter(&vp->v_vsd_lock);
2468 fp = (rfs4_file_t *)vsd_get(vp, nfs4_srv_vkey);
2469 if (fp) {
2470 rfs4_dbe_lock(fp->rf_dbe);
2471 if (rfs4_dbe_is_invalid(fp->rf_dbe) ||
2472 (rfs4_dbe_refcnt(fp->rf_dbe) == 0)) {
2473 rfs4_dbe_unlock(fp->rf_dbe);
2474 mutex_exit(&vp->v_vsd_lock);
2475 fp = NULL;
2476 } else {
2477 rfs4_dbe_hold(fp->rf_dbe);
2478 rfs4_dbe_unlock(fp->rf_dbe);
2479 mutex_exit(&vp->v_vsd_lock);
2480 rw_enter(&fp->rf_file_rwlock, RW_WRITER);
2481 if (fp->rf_vp == NULL) {
2482 rw_exit(&fp->rf_file_rwlock);
2483 rfs4_file_rele(fp);
2484 fp = NULL;
2485 }
2486 }
2487 } else {
2488 mutex_exit(&vp->v_vsd_lock);
2489 }
2490 } else {
2491 retry:
2492 arg.vp = vp;
2493 arg.fh = fh;
2494
2495 fp = (rfs4_file_t *)rfs4_dbsearch(rfs4_file_idx, vp, create,
2496 &arg, RFS4_DBS_VALID);
2497 if (fp != NULL) {
2498 rw_enter(&fp->rf_file_rwlock, RW_WRITER);
2499 if (fp->rf_vp == NULL) {
2500 rw_exit(&fp->rf_file_rwlock);
2501 rfs4_file_rele(fp);
2502 *create = screate;
2503 goto retry;
2504 }
2505 }
2506 }
2507
2508 return (fp);
2509 }
2510
2511 static uint32_t
2512 lo_state_hash(void *key)
2513 {
2514 stateid_t *id = key;
2515
2516 return (id->bits.ident+id->bits.pid);
2631 list_insert_tail(&sp->rs_lostatelist, lsp);
2632 rfs4_dbe_hold(sp->rs_dbe);
2633 rfs4_dbe_unlock(sp->rs_dbe);
2634
2635 return (TRUE);
2636 }
2637
2638 void
2639 rfs4_lo_state_rele(rfs4_lo_state_t *lsp, bool_t unlock_fp)
2640 {
2641 if (unlock_fp == TRUE)
2642 rw_exit(&lsp->rls_state->rs_finfo->rf_file_rwlock);
2643 rfs4_dbe_rele(lsp->rls_dbe);
2644 }
2645
2646 static rfs4_lo_state_t *
2647 rfs4_findlo_state(stateid_t *id, bool_t lock_fp)
2648 {
2649 rfs4_lo_state_t *lsp;
2650 bool_t create = FALSE;
2651
2652 lsp = (rfs4_lo_state_t *)rfs4_dbsearch(rfs4_lo_state_idx, id,
2653 &create, NULL, RFS4_DBS_VALID);
2654 if (lock_fp == TRUE && lsp != NULL)
2655 rw_enter(&lsp->rls_state->rs_finfo->rf_file_rwlock, RW_READER);
2656
2657 return (lsp);
2658 }
2659
2660
2661 static uint32_t
2662 lo_state_lo_hash(void *key)
2663 {
2664 rfs4_lo_state_t *lsp = key;
2665
2666 return (ADDRHASH(lsp->rls_locker) ^ ADDRHASH(lsp->rls_state));
2667 }
2668
2669 static bool_t
2670 lo_state_lo_compare(rfs4_entry_t u_entry, void *key)
2671 {
2672 rfs4_lo_state_t *lsp = (rfs4_lo_state_t *)u_entry;
2673 rfs4_lo_state_t *keyp = key;
2674
2675 return (keyp->rls_locker == lsp->rls_locker &&
2676 keyp->rls_state == lsp->rls_state);
2677 }
2678
2679 static void *
2680 lo_state_lo_mkkey(rfs4_entry_t u_entry)
2681 {
2682 return (u_entry);
2683 }
2684
2685 rfs4_lo_state_t *
2686 rfs4_findlo_state_by_owner(rfs4_lockowner_t *lo, rfs4_state_t *sp,
2687 bool_t *create)
2688 {
2689 rfs4_lo_state_t *lsp;
2690 rfs4_lo_state_t arg;
2691
2692 arg.rls_locker = lo;
2693 arg.rls_state = sp;
2694
2695 lsp = (rfs4_lo_state_t *)rfs4_dbsearch(rfs4_lo_state_owner_idx, &arg,
2696 create, &arg, RFS4_DBS_VALID);
2697
2698 return (lsp);
2699 }
2700
2701 static stateid_t
2702 get_stateid(id_t eid)
2703 {
2704 stateid_t id;
2705
2706 id.bits.boottime = rfs4_start_time;
2707 id.bits.ident = eid;
2708 id.bits.chgseq = 0;
2709 id.bits.type = 0;
2710 id.bits.pid = 0;
2711
2712 /*
2713 * If we are booted as a cluster node, embed our nodeid.
2714 * We've already done sanity checks in rfs4_client_create() so no
2715 * need to repeat them here.
2716 */
2717 id.bits.clnodeid = (cluster_bootflags & CLUSTER_BOOTED) ?
2718 clconf_get_nodeid() : 0;
2719
2720 return (id);
2721 }
2722
2723 /*
2724 * For use only when booted as a cluster node.
2725 * Returns TRUE if the embedded nodeid indicates that this stateid was
2726 * generated on another node.
2942 rfs4_deleg_state_destroy(rfs4_entry_t u_entry)
2943 {
2944 rfs4_deleg_state_t *dsp = (rfs4_deleg_state_t *)u_entry;
2945
2946 /* return delegation if necessary */
2947 rfs4_return_deleg(dsp, FALSE);
2948
2949 /* Were done with the file */
2950 rfs4_file_rele(dsp->rds_finfo);
2951 dsp->rds_finfo = NULL;
2952
2953 /* And now with the openowner */
2954 rfs4_client_rele(dsp->rds_client);
2955 dsp->rds_client = NULL;
2956 }
2957
2958 rfs4_deleg_state_t *
2959 rfs4_finddeleg(rfs4_state_t *sp, bool_t *create)
2960 {
2961 rfs4_deleg_state_t ds, *dsp;
2962
2963 ds.rds_client = sp->rs_owner->ro_client;
2964 ds.rds_finfo = sp->rs_finfo;
2965
2966 dsp = (rfs4_deleg_state_t *)rfs4_dbsearch(rfs4_deleg_idx, &ds,
2967 create, &ds, RFS4_DBS_VALID);
2968
2969 return (dsp);
2970 }
2971
2972 rfs4_deleg_state_t *
2973 rfs4_finddelegstate(stateid_t *id)
2974 {
2975 rfs4_deleg_state_t *dsp;
2976 bool_t create = FALSE;
2977
2978 dsp = (rfs4_deleg_state_t *)rfs4_dbsearch(rfs4_deleg_state_idx, id,
2979 &create, NULL, RFS4_DBS_VALID);
2980
2981 return (dsp);
2982 }
2983
2984 void
2985 rfs4_deleg_state_rele(rfs4_deleg_state_t *dsp)
2986 {
2987 rfs4_dbe_rele(dsp->rds_dbe);
2988 }
2989
2990 void
2991 rfs4_update_lock_sequence(rfs4_lo_state_t *lsp)
2992 {
2993
2994 rfs4_dbe_lock(lsp->rls_dbe);
2995
2996 /*
2997 * If we are skipping sequence id checking, this means that
2998 * this is the first lock request and therefore the sequence
2999 * id does not need to be updated. This only happens on the
3078 if (sp->rs_closed == TRUE)
3079 return (FALSE);
3080
3081 return (fp == sp->rs_finfo);
3082 }
3083
3084 static void *
3085 state_file_mkkey(rfs4_entry_t u_entry)
3086 {
3087 rfs4_state_t *sp = (rfs4_state_t *)u_entry;
3088
3089 return (sp->rs_finfo);
3090 }
3091
3092 rfs4_state_t *
3093 rfs4_findstate_by_owner_file(rfs4_openowner_t *oo, rfs4_file_t *fp,
3094 bool_t *create)
3095 {
3096 rfs4_state_t *sp;
3097 rfs4_state_t key;
3098
3099 key.rs_owner = oo;
3100 key.rs_finfo = fp;
3101
3102 sp = (rfs4_state_t *)rfs4_dbsearch(rfs4_state_owner_file_idx, &key,
3103 create, &key, RFS4_DBS_VALID);
3104
3105 return (sp);
3106 }
3107
3108 /* This returns ANY state struct that refers to this file */
3109 static rfs4_state_t *
3110 rfs4_findstate_by_file(rfs4_file_t *fp)
3111 {
3112 bool_t create = FALSE;
3113
3114 return ((rfs4_state_t *)rfs4_dbsearch(rfs4_state_file_idx, fp,
3115 &create, fp, RFS4_DBS_VALID));
3116 }
3117
3118 static bool_t
3119 rfs4_state_expiry(rfs4_entry_t u_entry)
3120 {
3121 rfs4_state_t *sp = (rfs4_state_t *)u_entry;
3122
3123 if (rfs4_dbe_is_invalid(sp->rs_dbe))
3124 return (TRUE);
3125
3126 if (sp->rs_closed == TRUE &&
3127 ((gethrestime_sec() - rfs4_dbe_get_timerele(sp->rs_dbe))
3128 > rfs4_lease_time))
3129 return (TRUE);
3130
3131 return ((gethrestime_sec() - sp->rs_owner->ro_client->rc_last_access
3132 > rfs4_lease_time));
3133 }
3134
3145 sp->rs_stateid.bits.type = OPENID;
3146 sp->rs_owner = oo;
3147 sp->rs_finfo = fp;
3148
3149 list_create(&sp->rs_lostatelist, sizeof (rfs4_lo_state_t),
3150 offsetof(rfs4_lo_state_t, rls_node));
3151
3152 /* Insert state on per open owner's list */
3153 rfs4_dbe_lock(oo->ro_dbe);
3154 list_insert_tail(&oo->ro_statelist, sp);
3155 rfs4_dbe_unlock(oo->ro_dbe);
3156
3157 return (TRUE);
3158 }
3159
3160 static rfs4_state_t *
3161 rfs4_findstate(stateid_t *id, rfs4_dbsearch_type_t find_invalid, bool_t lock_fp)
3162 {
3163 rfs4_state_t *sp;
3164 bool_t create = FALSE;
3165
3166 sp = (rfs4_state_t *)rfs4_dbsearch(rfs4_state_idx, id,
3167 &create, NULL, find_invalid);
3168 if (lock_fp == TRUE && sp != NULL)
3169 rw_enter(&sp->rs_finfo->rf_file_rwlock, RW_READER);
3170
3171 return (sp);
3172 }
3173
3174 void
3175 rfs4_state_close(rfs4_state_t *sp, bool_t lock_held, bool_t close_of_client,
3176 cred_t *cr)
3177 {
3178 /* Remove the associated lo_state owners */
3179 if (!lock_held)
3180 rfs4_dbe_lock(sp->rs_dbe);
3181
3182 /*
3183 * If refcnt == 0, the dbe is about to be destroyed.
3184 * lock state will be released by the reaper thread.
3185 */
3186
3214 }
3215
3216 void
3217 rfs4_client_close(rfs4_client_t *cp)
3218 {
3219 /* Mark client as going away. */
3220 rfs4_dbe_lock(cp->rc_dbe);
3221 rfs4_dbe_invalidate(cp->rc_dbe);
3222 rfs4_dbe_unlock(cp->rc_dbe);
3223
3224 rfs4_client_state_remove(cp);
3225
3226 /* Release the client */
3227 rfs4_client_rele(cp);
3228 }
3229
3230 nfsstat4
3231 rfs4_check_clientid(clientid4 *cp, int setclid_confirm)
3232 {
3233 cid *cidp = (cid *) cp;
3234
3235 /*
3236 * If we are booted as a cluster node, check the embedded nodeid.
3237 * If it indicates that this clientid was generated on another node,
3238 * inform the client accordingly.
3239 */
3240 if (cluster_bootflags & CLUSTER_BOOTED && foreign_clientid(cidp))
3241 return (NFS4ERR_STALE_CLIENTID);
3242
3243 /*
3244 * If the server start time matches the time provided
3245 * by the client (via the clientid) and this is NOT a
3246 * setclientid_confirm then return EXPIRED.
3247 */
3248 if (!setclid_confirm && cidp->impl_id.start_time == rfs4_start_time)
3249 return (NFS4ERR_EXPIRED);
3250
3251 return (NFS4ERR_STALE_CLIENTID);
3252 }
3253
3254 /*
3255 * This is used when a stateid has not been found amongst the
3256 * current server's state. Check the stateid to see if it
3257 * was from this server instantiation or not.
3258 */
3259 static nfsstat4
3260 what_stateid_error(stateid_t *id, stateid_type_t type)
3261 {
3262 /* If we are booted as a cluster node, was stateid locally generated? */
3263 if ((cluster_bootflags & CLUSTER_BOOTED) && foreign_stateid(id))
3264 return (NFS4ERR_STALE_STATEID);
3265
3266 /* If types don't match then no use checking further */
3267 if (type != id->bits.type)
3268 return (NFS4ERR_BAD_STATEID);
3269
3270 /* From a different server instantiation, return STALE */
3271 if (id->bits.boottime != rfs4_start_time)
3272 return (NFS4ERR_STALE_STATEID);
3273
3274 /*
3275 * From this server but the state is most likely beyond lease
3276 * timeout: return NFS4ERR_EXPIRED. However, there is the
3277 * case of a delegation stateid. For delegations, there is a
3278 * case where the state can be removed without the client's
3279 * knowledge/consent: revocation. In the case of delegation
3280 * revocation, the delegation state will be removed and will
3281 * not be found. If the client does something like a
3282 * DELEGRETURN or even a READ/WRITE with a delegatoin stateid
3283 * that has been revoked, the server should return BAD_STATEID
3284 * instead of the more common EXPIRED error.
3285 */
3286 if (id->bits.boottime == rfs4_start_time) {
3287 if (type == DELEGID)
3288 return (NFS4ERR_BAD_STATEID);
3289 else
3290 return (NFS4ERR_EXPIRED);
3291 }
3292
3293 return (NFS4ERR_BAD_STATEID);
3294 }
3295
3296 /*
3297 * Used later on to find the various state structs. When called from
3298 * rfs4_check_stateid()->rfs4_get_all_state(), no file struct lock is
3299 * taken (it is not needed) and helps on the read/write path with
3300 * respect to performance.
3301 */
3302 static nfsstat4
3303 rfs4_get_state_lockit(stateid4 *stateid, rfs4_state_t **spp,
3304 rfs4_dbsearch_type_t find_invalid, bool_t lock_fp)
3305 {
3306 stateid_t *id = (stateid_t *)stateid;
3768
3769 /*
3770 * This is a special function in that for the file struct provided the
3771 * server wants to remove/close all current state associated with the
3772 * file. The prime use of this would be with OP_REMOVE to force the
3773 * release of state and particularly of file locks.
3774 *
3775 * There is an assumption that there is no delegations outstanding on
3776 * this file at this point. The caller should have waited for those
3777 * to be returned or revoked.
3778 */
3779 void
3780 rfs4_close_all_state(rfs4_file_t *fp)
3781 {
3782 rfs4_state_t *sp;
3783
3784 rfs4_dbe_lock(fp->rf_dbe);
3785
3786 #ifdef DEBUG
3787 /* only applies when server is handing out delegations */
3788 if (rfs4_deleg_policy != SRV_NEVER_DELEGATE)
3789 ASSERT(fp->rf_dinfo.rd_hold_grant > 0);
3790 #endif
3791
3792 /* No delegations for this file */
3793 ASSERT(list_is_empty(&fp->rf_delegstatelist));
3794
3795 /* Make sure that it can not be found */
3796 rfs4_dbe_invalidate(fp->rf_dbe);
3797
3798 if (fp->rf_vp == NULL) {
3799 rfs4_dbe_unlock(fp->rf_dbe);
3800 return;
3801 }
3802 rfs4_dbe_unlock(fp->rf_dbe);
3803
3804 /*
3805 * Hold as writer to prevent other server threads from
3806 * processing requests related to the file while all state is
3807 * being removed.
3808 */
3978 }
3979 mutex_enter(&vp->v_vsd_lock);
3980 (void) vsd_set(vp, nfs4_srv_vkey, NULL);
3981 mutex_exit(&vp->v_vsd_lock);
3982 VN_RELE(vp);
3983 fp->rf_vp = NULL;
3984 }
3985 rfs4_dbe_invalidate(fp->rf_dbe);
3986 }
3987 }
3988
3989 /*
3990 * Given a directory that is being unexported, cleanup/release all
3991 * state in the server that refers to objects residing underneath this
3992 * particular export. The ordering of the release is important.
3993 * Lock_owner, then state and then file.
3994 */
3995 void
3996 rfs4_clean_state_exi(struct exportinfo *exi)
3997 {
3998 mutex_enter(&rfs4_state_lock);
3999
4000 if (rfs4_server_state == NULL) {
4001 mutex_exit(&rfs4_state_lock);
4002 return;
4003 }
4004
4005 rfs4_dbe_walk(rfs4_lo_state_tab, rfs4_lo_state_walk_callout, exi);
4006 rfs4_dbe_walk(rfs4_state_tab, rfs4_state_walk_callout, exi);
4007 rfs4_dbe_walk(rfs4_deleg_state_tab, rfs4_deleg_state_walk_callout, exi);
4008 rfs4_dbe_walk(rfs4_file_tab, rfs4_file_walk_callout, exi);
4009
4010 mutex_exit(&rfs4_state_lock);
4011 }
|
1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
24 */
25
26 /*
27 * Copyright 2018 Nexenta Systems, Inc.
28 */
29
30 #include <sys/systm.h>
31 #include <sys/kmem.h>
32 #include <sys/cmn_err.h>
33 #include <sys/atomic.h>
34 #include <sys/clconf.h>
35 #include <sys/cladm.h>
36 #include <sys/flock.h>
37 #include <nfs/export.h>
38 #include <nfs/nfs.h>
39 #include <nfs/nfs4.h>
40 #include <nfs/nfssys.h>
41 #include <nfs/lm.h>
42 #include <sys/pathname.h>
43 #include <sys/sdt.h>
44 #include <sys/nvpair.h>
45
46 extern u_longlong_t nfs4_srv_caller_id;
47
48 extern uint_t nfs4_srv_vkey;
49
50 stateid4 special0 = {
51 0,
52 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }
53 };
54
55 stateid4 special1 = {
56 0xffffffff,
57 {
58 (char)0xff, (char)0xff, (char)0xff, (char)0xff,
59 (char)0xff, (char)0xff, (char)0xff, (char)0xff,
60 (char)0xff, (char)0xff, (char)0xff, (char)0xff
61 }
62 };
63
64
65 #define ISSPECIAL(id) (stateid4_cmp(id, &special0) || \
66 stateid4_cmp(id, &special1))
67
68 /* For embedding the cluster nodeid into our clientid */
69 #define CLUSTER_NODEID_SHIFT 24
70 #define CLUSTER_MAX_NODEID 255
71
72 #ifdef DEBUG
73 int rfs4_debug;
74 #endif
75
76 static uint32_t rfs4_database_debug = 0x00;
77
78 /* CSTYLED */
79 static void rfs4_ss_clid_write(nfs4_srv_t *nsrv4, rfs4_client_t *cp, char *leaf);
80 static void rfs4_ss_clid_write_one(rfs4_client_t *cp, char *dir, char *leaf);
81 static void rfs4_dss_clear_oldstate(rfs4_servinst_t *sip);
82 static void rfs4_ss_chkclid_sip(rfs4_client_t *cp, rfs4_servinst_t *sip);
83
84 /*
85 * Couple of simple init/destroy functions for a general waiter
86 */
87 void
88 rfs4_sw_init(rfs4_state_wait_t *swp)
89 {
90 mutex_init(swp->sw_cv_lock, NULL, MUTEX_DEFAULT, NULL);
91 cv_init(swp->sw_cv, NULL, CV_DEFAULT, NULL);
92 swp->sw_active = FALSE;
93 swp->sw_wait_count = 0;
94 }
95
96 void
97 rfs4_sw_destroy(rfs4_state_wait_t *swp)
98 {
99 mutex_destroy(swp->sw_cv_lock);
108 swp->sw_wait_count++;
109 cv_wait(swp->sw_cv, swp->sw_cv_lock);
110 swp->sw_wait_count--;
111 }
112 ASSERT(swp->sw_active == FALSE);
113 swp->sw_active = TRUE;
114 mutex_exit(swp->sw_cv_lock);
115 }
116
117 void
118 rfs4_sw_exit(rfs4_state_wait_t *swp)
119 {
120 mutex_enter(swp->sw_cv_lock);
121 ASSERT(swp->sw_active == TRUE);
122 swp->sw_active = FALSE;
123 if (swp->sw_wait_count != 0)
124 cv_broadcast(swp->sw_cv);
125 mutex_exit(swp->sw_cv_lock);
126 }
127
128 static void
129 deep_lock_copy(LOCK4res *dres, LOCK4res *sres)
130 {
131 lock_owner4 *slo = &sres->LOCK4res_u.denied.owner;
132 lock_owner4 *dlo = &dres->LOCK4res_u.denied.owner;
133
134 if (sres->status == NFS4ERR_DENIED) {
135 dlo->owner_val = kmem_alloc(slo->owner_len, KM_SLEEP);
136 bcopy(slo->owner_val, dlo->owner_val, slo->owner_len);
137 }
138 }
139
140 /*
141 * CPR callback id -- not related to v4 callbacks
142 */
143 static callb_id_t cpr_id = 0;
144
145 static void
146 deep_lock_free(LOCK4res *res)
147 {
148 lock_owner4 *lo = &res->LOCK4res_u.denied.owner;
149
150 if (res->status == NFS4ERR_DENIED)
151 kmem_free(lo->owner_val, lo->owner_len);
152 }
153
154 static void
155 deep_open_copy(OPEN4res *dres, OPEN4res *sres)
156 {
157 nfsace4 *sacep, *dacep;
158
159 if (sres->status != NFS4_OK) {
160 return;
161 }
162
163 dres->attrset = sres->attrset;
164
260 /*
261 * This code is some what prototypical for now. Its purpose currently is to
262 * implement the interfaces sufficiently to finish the higher protocol
263 * elements. This will be replaced by a dynamically resizeable tables
264 * backed by kmem_cache allocator. However synchronization is handled
265 * correctly (I hope) and will not change by much. The mutexes for
266 * the hash buckets that can be used to create new instances of data
267 * structures might be good candidates to evolve into reader writer
268 * locks. If it has to do a creation, it would be holding the
269 * mutex across a kmem_alloc with KM_SLEEP specified.
270 */
271
272 #ifdef DEBUG
273 #define TABSIZE 17
274 #else
275 #define TABSIZE 2047
276 #endif
277
278 #define ADDRHASH(key) ((unsigned long)(key) >> 3)
279
280 #define MAXTABSZ 1024*1024
281
282 /* The values below are rfs4_lease_time units */
283
284 #ifdef DEBUG
285 #define CLIENT_CACHE_TIME 1
286 #define OPENOWNER_CACHE_TIME 1
287 #define STATE_CACHE_TIME 1
288 #define LO_STATE_CACHE_TIME 1
289 #define LOCKOWNER_CACHE_TIME 1
290 #define FILE_CACHE_TIME 3
291 #define DELEG_STATE_CACHE_TIME 1
292 #else
293 #define CLIENT_CACHE_TIME 10
294 #define OPENOWNER_CACHE_TIME 5
295 #define STATE_CACHE_TIME 1
296 #define LO_STATE_CACHE_TIME 1
297 #define LOCKOWNER_CACHE_TIME 3
298 #define FILE_CACHE_TIME 40
299 #define DELEG_STATE_CACHE_TIME 1
300 #endif
301
302 /*
303 * NFSv4 server state databases
304 *
305 * Initilized when the module is loaded and used by NFSv4 state tables.
306 * These kmem_cache databases are global, the tables that make use of these
307 * are per zone.
308 */
309 kmem_cache_t *rfs4_client_mem_cache;
310 kmem_cache_t *rfs4_clntIP_mem_cache;
311 kmem_cache_t *rfs4_openown_mem_cache;
312 kmem_cache_t *rfs4_openstID_mem_cache;
313 kmem_cache_t *rfs4_lockstID_mem_cache;
314 kmem_cache_t *rfs4_lockown_mem_cache;
315 kmem_cache_t *rfs4_file_mem_cache;
316 kmem_cache_t *rfs4_delegstID_mem_cache;
317
318 /*
319 * NFSv4 state table functions
320 */
321 static bool_t rfs4_client_create(rfs4_entry_t, void *);
322 static void rfs4_dss_remove_cpleaf(rfs4_client_t *);
323 static void rfs4_dss_remove_leaf(rfs4_servinst_t *, char *, char *);
324 static void rfs4_client_destroy(rfs4_entry_t);
325 static bool_t rfs4_client_expiry(rfs4_entry_t);
326 static uint32_t clientid_hash(void *);
327 static bool_t clientid_compare(rfs4_entry_t, void *);
328 static void *clientid_mkkey(rfs4_entry_t);
329 static uint32_t nfsclnt_hash(void *);
330 static bool_t nfsclnt_compare(rfs4_entry_t, void *);
331 static void *nfsclnt_mkkey(rfs4_entry_t);
332 static bool_t rfs4_clntip_expiry(rfs4_entry_t);
333 static void rfs4_clntip_destroy(rfs4_entry_t);
334 static bool_t rfs4_clntip_create(rfs4_entry_t, void *);
335 static uint32_t clntip_hash(void *);
336 static bool_t clntip_compare(rfs4_entry_t, void *);
337 static void *clntip_mkkey(rfs4_entry_t);
338 static bool_t rfs4_openowner_create(rfs4_entry_t, void *);
339 static void rfs4_openowner_destroy(rfs4_entry_t);
340 static bool_t rfs4_openowner_expiry(rfs4_entry_t);
666 cl_ss->ss_pn = rfs4_ss_movestate(
667 statedir, destdir, dep->d_name);
668 } else {
669 cl_ss->ss_pn = ss_pn;
670 }
671 insque(cl_ss, oldstate);
672 } else {
673 rfs4_ss_pnfree(ss_pn);
674 }
675 }
676 }
677
678 out:
679 (void) VOP_CLOSE(dvp, FREAD, 1, (offset_t)0, CRED(), NULL);
680 VN_RELE(dvp);
681 if (dirt)
682 kmem_free((caddr_t)dirt, RFS4_SS_DIRSIZE);
683 }
684
685 static void
686 rfs4_ss_init(nfs4_srv_t *nsrv4)
687 {
688 int npaths = 1;
689 char *default_dss_path = NFS4_DSS_VAR_DIR;
690
691 /* read the default stable storage state */
692 rfs4_dss_readstate(nsrv4, npaths, &default_dss_path);
693
694 rfs4_ss_enabled = 1;
695 }
696
697 static void
698 rfs4_ss_fini(nfs4_srv_t *nsrv4)
699 {
700 rfs4_servinst_t *sip;
701
702 mutex_enter(&nsrv4->servinst_lock);
703 sip = nsrv4->nfs4_cur_servinst;
704 while (sip != NULL) {
705 rfs4_dss_clear_oldstate(sip);
706 sip = sip->next;
707 }
708 mutex_exit(&nsrv4->servinst_lock);
709 }
710
711 /*
712 * Remove all oldstate files referenced by this servinst.
713 */
714 static void
715 rfs4_dss_clear_oldstate(rfs4_servinst_t *sip)
716 {
717 rfs4_oldstate_t *os_head, *osp;
718
719 rw_enter(&sip->oldstate_lock, RW_WRITER);
720 os_head = sip->oldstate;
721
722 if (os_head == NULL) {
723 rw_exit(&sip->oldstate_lock);
724 return;
725 }
726
727 /* skip dummy entry */
728 osp = os_head->next;
732
733 rfs4_dss_remove_leaf(sip, NFS4_DSS_OLDSTATE_LEAF, leaf);
734
735 if (osp->cl_id4.id_val)
736 kmem_free(osp->cl_id4.id_val, osp->cl_id4.id_len);
737 rfs4_ss_pnfree(osp->ss_pn);
738
739 os_next = osp->next;
740 remque(osp);
741 kmem_free(osp, sizeof (rfs4_oldstate_t));
742 osp = os_next;
743 }
744
745 rw_exit(&sip->oldstate_lock);
746 }
747
748 /*
749 * Form the state and oldstate paths, and read in the stable storage files.
750 */
751 void
752 rfs4_dss_readstate(nfs4_srv_t *nsrv4, int npaths, char **paths)
753 {
754 int i;
755 char *state, *oldstate;
756
757 state = kmem_alloc(MAXPATHLEN, KM_SLEEP);
758 oldstate = kmem_alloc(MAXPATHLEN, KM_SLEEP);
759
760 for (i = 0; i < npaths; i++) {
761 char *path = paths[i];
762
763 (void) sprintf(state, "%s/%s", path, NFS4_DSS_STATE_LEAF);
764 (void) sprintf(oldstate, "%s/%s", path, NFS4_DSS_OLDSTATE_LEAF);
765
766 /*
767 * Populate the current server instance's oldstate list.
768 *
769 * 1. Read stable storage data from old state directory,
770 * leaving its contents alone.
771 *
772 * 2. Read stable storage data from state directory,
773 * and move the latter's contents to old state
774 * directory.
775 */
776 /* CSTYLED */
777 rfs4_ss_oldstate(nsrv4->nfs4_cur_servinst->oldstate, oldstate, NULL);
778 /* CSTYLED */
779 rfs4_ss_oldstate(nsrv4->nfs4_cur_servinst->oldstate, state, oldstate);
780 }
781
782 kmem_free(state, MAXPATHLEN);
783 kmem_free(oldstate, MAXPATHLEN);
784 }
785
786
787 /*
788 * Check if we are still in grace and if the client can be
789 * granted permission to perform reclaims.
790 */
791 void
792 rfs4_ss_chkclid(nfs4_srv_t *nsrv4, rfs4_client_t *cp)
793 {
794 rfs4_servinst_t *sip;
795
796 /*
797 * It should be sufficient to check the oldstate data for just
798 * this client's instance. However, since our per-instance
799 * client grouping is solely temporal, HA-NFSv4 RG failover
800 * might result in clients of the same RG being partitioned into
801 * separate instances.
802 *
803 * Until the client grouping is improved, we must check the
804 * oldstate data for all instances with an active grace period.
805 *
806 * This also serves as the mechanism to remove stale oldstate data.
807 * The first time we check an instance after its grace period has
808 * expired, the oldstate data should be cleared.
809 *
810 * Start at the current instance, and walk the list backwards
811 * to the first.
812 */
813 mutex_enter(&nsrv4->servinst_lock);
814 for (sip = nsrv4->nfs4_cur_servinst; sip != NULL; sip = sip->prev) {
815 rfs4_ss_chkclid_sip(cp, sip);
816
817 /* if the above check found this client, we're done */
818 if (cp->rc_can_reclaim)
819 break;
820 }
821 mutex_exit(&nsrv4->servinst_lock);
822 }
823
824 static void
825 rfs4_ss_chkclid_sip(rfs4_client_t *cp, rfs4_servinst_t *sip)
826 {
827 rfs4_oldstate_t *osp, *os_head;
828
829 /* short circuit everything if this server instance has no oldstate */
830 rw_enter(&sip->oldstate_lock, RW_READER);
831 os_head = sip->oldstate;
832 rw_exit(&sip->oldstate_lock);
833 if (os_head == NULL)
834 return;
835
836 /*
837 * If this server instance is no longer in a grace period then
838 * the client won't be able to reclaim. No further need for this
839 * instance's oldstate data, so it can be cleared.
840 */
841 if (!rfs4_servinst_in_grace(sip))
851 while (osp != os_head) {
852 if (osp->cl_id4.id_len == cp->rc_nfs_client.id_len) {
853 if (bcmp(osp->cl_id4.id_val, cp->rc_nfs_client.id_val,
854 osp->cl_id4.id_len) == 0) {
855 cp->rc_can_reclaim = 1;
856 break;
857 }
858 }
859 osp = osp->next;
860 }
861
862 rw_exit(&sip->oldstate_lock);
863 }
864
865 /*
866 * Place client information into stable storage: 1/3.
867 * First, generate the leaf filename, from the client's IP address and
868 * the server-generated short-hand clientid.
869 */
870 void
871 rfs4_ss_clid(nfs4_srv_t *nsrv4, rfs4_client_t *cp)
872 {
873 const char *kinet_ntop6(uchar_t *, char *, size_t);
874 char leaf[MAXNAMELEN], buf[INET6_ADDRSTRLEN];
875 struct sockaddr *ca;
876 uchar_t *b;
877
878 if (rfs4_ss_enabled == 0) {
879 return;
880 }
881
882 buf[0] = 0;
883
884 ca = (struct sockaddr *)&cp->rc_addr;
885
886 /*
887 * Convert the caller's IP address to a dotted string
888 */
889 if (ca->sa_family == AF_INET) {
890 b = (uchar_t *)&((struct sockaddr_in *)ca)->sin_addr;
891 (void) sprintf(buf, "%03d.%03d.%03d.%03d", b[0] & 0xFF,
892 b[1] & 0xFF, b[2] & 0xFF, b[3] & 0xFF);
893 } else if (ca->sa_family == AF_INET6) {
894 struct sockaddr_in6 *sin6;
895
896 sin6 = (struct sockaddr_in6 *)ca;
897 (void) kinet_ntop6((uchar_t *)&sin6->sin6_addr,
898 buf, INET6_ADDRSTRLEN);
899 }
900
901 (void) snprintf(leaf, MAXNAMELEN, "%s-%llx", buf,
902 (longlong_t)cp->rc_clientid);
903 rfs4_ss_clid_write(nsrv4, cp, leaf);
904 }
905
906 /*
907 * Place client information into stable storage: 2/3.
908 * DSS: distributed stable storage: the file may need to be written to
909 * multiple directories.
910 */
911 static void
912 rfs4_ss_clid_write(nfs4_srv_t *nsrv4, rfs4_client_t *cp, char *leaf)
913 {
914 rfs4_servinst_t *sip;
915
916 /*
917 * It should be sufficient to write the leaf file to (all) DSS paths
918 * associated with just this client's instance. However, since our
919 * per-instance client grouping is solely temporal, HA-NFSv4 RG
920 * failover might result in us losing DSS data.
921 *
922 * Until the client grouping is improved, we must write the DSS data
923 * to all instances' paths. Start at the current instance, and
924 * walk the list backwards to the first.
925 */
926 mutex_enter(&nsrv4->servinst_lock);
927 for (sip = nsrv4->nfs4_cur_servinst; sip != NULL; sip = sip->prev) {
928 int i, npaths = sip->dss_npaths;
929
930 /* write the leaf file to all DSS paths */
931 for (i = 0; i < npaths; i++) {
932 rfs4_dss_path_t *dss_path = sip->dss_paths[i];
933
934 /* HA-NFSv4 path might have been failed-away from us */
935 if (dss_path == NULL)
936 continue;
937
938 rfs4_ss_clid_write_one(cp, dss_path->path, leaf);
939 }
940 }
941 mutex_exit(&nsrv4->servinst_lock);
942 }
943
944 /*
945 * Place client information into stable storage: 3/3.
946 * Write the stable storage data to the requested file.
947 */
948 static void
949 rfs4_ss_clid_write_one(rfs4_client_t *cp, char *dss_path, char *leaf)
950 {
951 int ioflag;
952 int file_vers = NFS4_SS_VERSION;
953 size_t dirlen;
954 struct uio uio;
955 struct iovec iov[4];
956 char *dir;
957 rfs4_ss_pn_t *ss_pn;
958 vnode_t *vp;
959 nfs_client_id4 *cl_id4 = &(cp->rc_nfs_client);
960
961 /* allow 2 extra bytes for '/' & NUL */
1114 * for forced expiration
1115 */
1116 if (ent_sin->sin_addr.s_addr == clr_in.s_addr) {
1117 cp->rc_forced_expire = 1;
1118 }
1119 break;
1120
1121 default:
1122 /* force this assert to fail */
1123 ASSERT(clr->addr_type != clr->addr_type);
1124 }
1125 }
1126
1127 /*
1128 * This is called from nfssys() in order to clear server state
1129 * for the specified client IP Address.
1130 */
1131 void
1132 rfs4_clear_client_state(struct nfs4clrst_args *clr)
1133 {
1134 nfs4_srv_t *nsrv4;
1135 nsrv4 = zone_getspecific(rfs4_zone_key, curzone);
1136 (void) rfs4_dbe_walk(nsrv4->rfs4_client_tab, rfs4_client_scrub, clr);
1137 }
1138
1139 /*
1140 * Used to initialize the NFSv4 server's state or database. All of
1141 * the tables are created and timers are set.
1142 */
1143 void
1144 rfs4_state_g_init()
1145 {
1146 extern boolean_t rfs4_cpr_callb(void *, int);
1147 /*
1148 * Add a CPR callback so that we can update client
1149 * access times to extend the lease after a suspend
1150 * and resume (using the same class as rpcmod/connmgr)
1151 */
1152 cpr_id = callb_add(rfs4_cpr_callb, 0, CB_CL_CPR_RPC, "rfs4");
1153
1154 /*
1155 * NFSv4 server state databases
1156 *
1157 * Initilized when the module is loaded and used by NFSv4 state tables.
1158 * These kmem_cache free pools are used globally, the NFSv4 state
1159 * tables which make use of these kmem_cache free pools are per zone.
1160 *
1161 * initialize the global kmem_cache free pools which will be used by
1162 * the NFSv4 state tables.
1163 */
1164 /* CSTYLED */
1165 rfs4_client_mem_cache = nfs4_init_mem_cache("Client_entry_cache", 2, sizeof (rfs4_client_t), 0);
1166 /* CSTYLED */
1167 rfs4_clntIP_mem_cache = nfs4_init_mem_cache("ClntIP_entry_cache", 1, sizeof (rfs4_clntip_t), 1);
1168 /* CSTYLED */
1169 rfs4_openown_mem_cache = nfs4_init_mem_cache("OpenOwner_entry_cache", 1, sizeof (rfs4_openowner_t), 2);
1170 /* CSTYLED */
1171 rfs4_openstID_mem_cache = nfs4_init_mem_cache("OpenStateID_entry_cache", 3, sizeof (rfs4_state_t), 3);
1172 /* CSTYLED */
1173 rfs4_lockstID_mem_cache = nfs4_init_mem_cache("LockStateID_entry_cache", 3, sizeof (rfs4_lo_state_t), 4);
1174 /* CSTYLED */
1175 rfs4_lockown_mem_cache = nfs4_init_mem_cache("Lockowner_entry_cache", 2, sizeof (rfs4_lockowner_t), 5);
1176 /* CSTYLED */
1177 rfs4_file_mem_cache = nfs4_init_mem_cache("File_entry_cache", 1, sizeof (rfs4_file_t), 6);
1178 /* CSTYLED */
1179 rfs4_delegstID_mem_cache = nfs4_init_mem_cache("DelegStateID_entry_cache", 2, sizeof (rfs4_deleg_state_t), 7);
1180
1181 rfs4_client_clrst = rfs4_clear_client_state;
1182 }
1183
1184
1185 /*
1186 * Used at server shutdown to cleanup all of the NFSv4 server's structures
1187 * and other state.
1188 */
1189 void
1190 rfs4_state_g_fini()
1191 {
1192 int i;
1193 /*
1194 * Cleanup the CPR callback.
1195 */
1196 if (cpr_id)
1197 (void) callb_delete(cpr_id);
1198
1199 rfs4_client_clrst = NULL;
1200
1201 /* free the NFSv4 state databases */
1202 for (i = 0; i < RFS4_DB_MEM_CACHE_NUM; i++) {
1203 kmem_cache_destroy(rfs4_db_mem_cache_table[i].r_db_mem_cache);
1204 rfs4_db_mem_cache_table[i].r_db_mem_cache = NULL;
1205 }
1206
1207 rfs4_client_mem_cache = NULL;
1208 rfs4_clntIP_mem_cache = NULL;
1209 rfs4_openown_mem_cache = NULL;
1210 rfs4_openstID_mem_cache = NULL;
1211 rfs4_lockstID_mem_cache = NULL;
1212 rfs4_lockown_mem_cache = NULL;
1213 rfs4_file_mem_cache = NULL;
1214 rfs4_delegstID_mem_cache = NULL;
1215
1216 /* DSS: distributed stable storage */
1217 nvlist_free(rfs4_dss_oldpaths);
1218 nvlist_free(rfs4_dss_paths);
1219 rfs4_dss_paths = rfs4_dss_oldpaths = NULL;
1220 }
1221
1222 /*
1223 * Used to initialize the per zone NFSv4 server's state
1224 */
1225 void
1226 rfs4_state_zone_init(nfs4_srv_t *nsrv4)
1227 {
1228 time_t start_time;
1229 int start_grace;
1230 char *dss_path = NFS4_DSS_VAR_DIR;
1231
1232 /* DSS: distributed stable storage: initialise served paths list */
1233 nsrv4->dss_pathlist = NULL;
1234
1235 /*
1236 * Set the boot time. If the server
1237 * has been restarted quickly and has had the opportunity to
1238 * service clients, then the start_time needs to be bumped
1239 * regardless. A small window but it exists...
1240 */
1241 start_time = gethrestime_sec();
1242 if (nsrv4->rfs4_start_time < start_time)
1243 nsrv4->rfs4_start_time = start_time;
1244 else
1245 nsrv4->rfs4_start_time++;
1246
1247 /*
1248 * Create the first server instance, or a new one if the server has
1249 * been restarted; see above comments on rfs4_start_time. Don't
1250 * start its grace period; that will be done later, to maximise the
1251 * clients' recovery window.
1252 */
1253 start_grace = 0;
1254 rfs4_servinst_create(nsrv4, start_grace, 1, &dss_path);
1255
1256 /* reset the "first NFSv4 request" status */
1257 nsrv4->seen_first_compound = 0;
1258
1259 mutex_enter(&nsrv4->state_lock);
1260
1261 /*
1262 * If the server state database has already been initialized,
1263 * skip it
1264 */
1265 if (nsrv4->nfs4_server_state != NULL) {
1266 mutex_exit(&nsrv4->state_lock);
1267 return;
1268 }
1269
1270 rw_init(&nsrv4->rfs4_findclient_lock, NULL, RW_DEFAULT, NULL);
1271
1272 /* set the various cache timers for table creation */
1273 if (nsrv4->rfs4_client_cache_time == 0)
1274 nsrv4->rfs4_client_cache_time = CLIENT_CACHE_TIME;
1275 if (nsrv4->rfs4_openowner_cache_time == 0)
1276 nsrv4->rfs4_openowner_cache_time = OPENOWNER_CACHE_TIME;
1277 if (nsrv4->rfs4_state_cache_time == 0)
1278 nsrv4->rfs4_state_cache_time = STATE_CACHE_TIME;
1279 if (nsrv4->rfs4_lo_state_cache_time == 0)
1280 nsrv4->rfs4_lo_state_cache_time = LO_STATE_CACHE_TIME;
1281 if (nsrv4->rfs4_lockowner_cache_time == 0)
1282 nsrv4->rfs4_lockowner_cache_time = LOCKOWNER_CACHE_TIME;
1283 if (nsrv4->rfs4_file_cache_time == 0)
1284 nsrv4->rfs4_file_cache_time = FILE_CACHE_TIME;
1285 if (nsrv4->rfs4_deleg_state_cache_time == 0)
1286 nsrv4->rfs4_deleg_state_cache_time = DELEG_STATE_CACHE_TIME;
1287
1288 /* Create the overall database to hold all server state */
1289 nsrv4->nfs4_server_state = rfs4_database_create(rfs4_database_debug);
1290
1291 /* Now create the individual tables */
1292 nsrv4->rfs4_client_cache_time *= rfs4_lease_time;
1293 nsrv4->rfs4_client_tab = rfs4_table_create(nsrv4->nfs4_server_state,
1294 "Client",
1295 nsrv4->rfs4_client_cache_time,
1296 2,
1297 rfs4_client_create,
1298 rfs4_client_destroy,
1299 rfs4_client_expiry,
1300 sizeof (rfs4_client_t),
1301 TABSIZE,
1302 MAXTABSZ/8, 100);
1303 nsrv4->rfs4_nfsclnt_idx = rfs4_index_create(nsrv4->rfs4_client_tab,
1304 "nfs_client_id4", nfsclnt_hash,
1305 nfsclnt_compare, nfsclnt_mkkey,
1306 TRUE);
1307 nsrv4->rfs4_clientid_idx = rfs4_index_create(nsrv4->rfs4_client_tab,
1308 "client_id", clientid_hash,
1309 clientid_compare, clientid_mkkey,
1310 FALSE);
1311
1312 nsrv4->rfs4_clntip_cache_time = 86400 * 365; /* about a year */
1313 nsrv4->rfs4_clntip_tab = rfs4_table_create(nsrv4->nfs4_server_state,
1314 "ClntIP",
1315 nsrv4->rfs4_clntip_cache_time,
1316 1,
1317 rfs4_clntip_create,
1318 rfs4_clntip_destroy,
1319 rfs4_clntip_expiry,
1320 sizeof (rfs4_clntip_t),
1321 TABSIZE,
1322 MAXTABSZ, 100);
1323 nsrv4->rfs4_clntip_idx = rfs4_index_create(nsrv4->rfs4_clntip_tab,
1324 "client_ip", clntip_hash,
1325 clntip_compare, clntip_mkkey,
1326 TRUE);
1327
1328 nsrv4->rfs4_openowner_cache_time *= rfs4_lease_time;
1329 nsrv4->rfs4_openowner_tab = rfs4_table_create(nsrv4->nfs4_server_state,
1330 "OpenOwner",
1331 nsrv4->rfs4_openowner_cache_time,
1332 1,
1333 rfs4_openowner_create,
1334 rfs4_openowner_destroy,
1335 rfs4_openowner_expiry,
1336 sizeof (rfs4_openowner_t),
1337 TABSIZE,
1338 MAXTABSZ, 100);
1339 nsrv4->rfs4_openowner_idx = rfs4_index_create(nsrv4->rfs4_openowner_tab,
1340 "open_owner4", openowner_hash,
1341 openowner_compare,
1342 openowner_mkkey, TRUE);
1343
1344 nsrv4->rfs4_state_cache_time *= rfs4_lease_time;
1345 nsrv4->rfs4_state_tab = rfs4_table_create(nsrv4->nfs4_server_state,
1346 "OpenStateID",
1347 nsrv4->rfs4_state_cache_time,
1348 3,
1349 rfs4_state_create,
1350 rfs4_state_destroy,
1351 rfs4_state_expiry,
1352 sizeof (rfs4_state_t),
1353 TABSIZE,
1354 MAXTABSZ, 100);
1355
1356 /* CSTYLED */
1357 nsrv4->rfs4_state_owner_file_idx = rfs4_index_create(nsrv4->rfs4_state_tab,
1358 "Openowner-File",
1359 state_owner_file_hash,
1360 state_owner_file_compare,
1361 state_owner_file_mkkey, TRUE);
1362
1363 nsrv4->rfs4_state_idx = rfs4_index_create(nsrv4->rfs4_state_tab,
1364 "State-id", state_hash,
1365 state_compare, state_mkkey, FALSE);
1366
1367 nsrv4->rfs4_state_file_idx = rfs4_index_create(nsrv4->rfs4_state_tab,
1368 "File", state_file_hash,
1369 state_file_compare, state_file_mkkey,
1370 FALSE);
1371
1372 nsrv4->rfs4_lo_state_cache_time *= rfs4_lease_time;
1373 nsrv4->rfs4_lo_state_tab = rfs4_table_create(nsrv4->nfs4_server_state,
1374 "LockStateID",
1375 nsrv4->rfs4_lo_state_cache_time,
1376 2,
1377 rfs4_lo_state_create,
1378 rfs4_lo_state_destroy,
1379 rfs4_lo_state_expiry,
1380 sizeof (rfs4_lo_state_t),
1381 TABSIZE,
1382 MAXTABSZ, 100);
1383
1384 /* CSTYLED */
1385 nsrv4->rfs4_lo_state_owner_idx = rfs4_index_create(nsrv4->rfs4_lo_state_tab,
1386 "lockownerxstate",
1387 lo_state_lo_hash,
1388 lo_state_lo_compare,
1389 lo_state_lo_mkkey, TRUE);
1390
1391 nsrv4->rfs4_lo_state_idx = rfs4_index_create(nsrv4->rfs4_lo_state_tab,
1392 "State-id",
1393 lo_state_hash, lo_state_compare,
1394 lo_state_mkkey, FALSE);
1395
1396 nsrv4->rfs4_lockowner_cache_time *= rfs4_lease_time;
1397
1398 nsrv4->rfs4_lockowner_tab = rfs4_table_create(nsrv4->nfs4_server_state,
1399 "Lockowner",
1400 nsrv4->rfs4_lockowner_cache_time,
1401 2,
1402 rfs4_lockowner_create,
1403 rfs4_lockowner_destroy,
1404 rfs4_lockowner_expiry,
1405 sizeof (rfs4_lockowner_t),
1406 TABSIZE,
1407 MAXTABSZ, 100);
1408
1409 nsrv4->rfs4_lockowner_idx = rfs4_index_create(nsrv4->rfs4_lockowner_tab,
1410 "lock_owner4", lockowner_hash,
1411 lockowner_compare,
1412 lockowner_mkkey, TRUE);
1413
1414 /* CSTYLED */
1415 nsrv4->rfs4_lockowner_pid_idx = rfs4_index_create(nsrv4->rfs4_lockowner_tab,
1416 "pid", pid_hash,
1417 pid_compare, pid_mkkey,
1418 FALSE);
1419
1420 nsrv4->rfs4_file_cache_time *= rfs4_lease_time;
1421 nsrv4->rfs4_file_tab = rfs4_table_create(nsrv4->nfs4_server_state,
1422 "File",
1423 nsrv4->rfs4_file_cache_time,
1424 1,
1425 rfs4_file_create,
1426 rfs4_file_destroy,
1427 NULL,
1428 sizeof (rfs4_file_t),
1429 TABSIZE,
1430 MAXTABSZ, -1);
1431
1432 nsrv4->rfs4_file_idx = rfs4_index_create(nsrv4->rfs4_file_tab,
1433 "Filehandle", file_hash,
1434 file_compare, file_mkkey, TRUE);
1435
1436 nsrv4->rfs4_deleg_state_cache_time *= rfs4_lease_time;
1437 /* CSTYLED */
1438 nsrv4->rfs4_deleg_state_tab = rfs4_table_create(nsrv4->nfs4_server_state,
1439 "DelegStateID",
1440 nsrv4->rfs4_deleg_state_cache_time,
1441 2,
1442 rfs4_deleg_state_create,
1443 rfs4_deleg_state_destroy,
1444 rfs4_deleg_state_expiry,
1445 sizeof (rfs4_deleg_state_t),
1446 TABSIZE,
1447 MAXTABSZ, 100);
1448 nsrv4->rfs4_deleg_idx = rfs4_index_create(nsrv4->rfs4_deleg_state_tab,
1449 "DelegByFileClient",
1450 deleg_hash,
1451 deleg_compare,
1452 deleg_mkkey, TRUE);
1453
1454 /* CSTYLED */
1455 nsrv4->rfs4_deleg_state_idx = rfs4_index_create(nsrv4->rfs4_deleg_state_tab,
1456 "DelegState",
1457 deleg_state_hash,
1458 deleg_state_compare,
1459 deleg_state_mkkey, FALSE);
1460
1461 mutex_exit(&nsrv4->state_lock);
1462
1463 /*
1464 * Init the stable storage.
1465 */
1466 rfs4_ss_init(nsrv4);
1467 }
1468
1469 /*
1470 * Used at server shutdown to cleanup all of NFSv4 server's zone structures
1471 * and state.
1472 */
1473 void
1474 rfs4_state_zone_fini()
1475 {
1476 rfs4_database_t *dbp;
1477 nfs4_srv_t *nsrv4;
1478 nsrv4 = zone_getspecific(rfs4_zone_key, curzone);
1479
1480 rfs4_set_deleg_policy(nsrv4, SRV_NEVER_DELEGATE);
1481
1482 mutex_enter(&nsrv4->state_lock);
1483
1484 if (nsrv4->nfs4_server_state == NULL) {
1485 mutex_exit(&nsrv4->state_lock);
1486 return;
1487 }
1488
1489 /* destroy server instances and current instance ptr */
1490 rfs4_servinst_destroy_all(nsrv4);
1491
1492 /* reset the "first NFSv4 request" status */
1493 nsrv4->seen_first_compound = 0;
1494
1495 dbp = nsrv4->nfs4_server_state;
1496 nsrv4->nfs4_server_state = NULL;
1497
1498 rw_destroy(&nsrv4->rfs4_findclient_lock);
1499
1500 /* First stop all of the reaper threads in the database */
1501 rfs4_database_shutdown(dbp);
1502 /*
1503 * XXX workaround
1504 * Skip destrying the state database yet just in case there
1505 * are unfinished operations depending on it.
1506 */
1507 /* Now destroy/release the database tables */
1508 /* rfs4_database_destroy(dbp); */
1509
1510 /* Reset the cache timers for next time */
1511 nsrv4->rfs4_client_cache_time = 0;
1512 nsrv4->rfs4_openowner_cache_time = 0;
1513 nsrv4->rfs4_state_cache_time = 0;
1514 nsrv4->rfs4_lo_state_cache_time = 0;
1515 nsrv4->rfs4_lockowner_cache_time = 0;
1516 nsrv4->rfs4_file_cache_time = 0;
1517 nsrv4->rfs4_deleg_state_cache_time = 0;
1518
1519 mutex_exit(&nsrv4->state_lock);
1520
1521 /* clean up any dangling stable storage structures */
1522 rfs4_ss_fini(nsrv4);
1523 }
1524
1525 typedef union {
1526 struct {
1527 uint32_t start_time;
1528 uint32_t c_id;
1529 } impl_id;
1530 clientid4 id4;
1531 } cid;
1532
1533 static int foreign_stateid(stateid_t *id);
1534 static int foreign_clientid(cid *cidp);
1535 static void embed_nodeid(cid *cidp);
1536
1537 typedef union {
1538 struct {
1539 uint32_t c_id;
1540 uint32_t gen_num;
1541 } cv_impl;
1542 verifier4 confirm_verf;
1617 * If the sysadmin has used clear_locks for this
1618 * entry then forced_expire will be set and we
1619 * want this entry to be reaped. Or the entry
1620 * has exceeded its lease period.
1621 */
1622 cp_expired = (cp->rc_forced_expire ||
1623 (gethrestime_sec() - cp->rc_last_access
1624 > rfs4_lease_time));
1625
1626 if (!cp->rc_ss_remove && cp_expired)
1627 cp->rc_ss_remove = 1;
1628 return (cp_expired);
1629 }
1630
1631 /*
1632 * Remove the leaf file from all distributed stable storage paths.
1633 */
1634 static void
1635 rfs4_dss_remove_cpleaf(rfs4_client_t *cp)
1636 {
1637 nfs4_srv_t *nsrv4;
1638 rfs4_servinst_t *sip;
1639 char *leaf = cp->rc_ss_pn->leaf;
1640
1641 /*
1642 * since the state files are written to all DSS
1643 * paths we must remove this leaf file instance
1644 * from all server instances.
1645 */
1646
1647 nsrv4 = zone_getspecific(rfs4_zone_key, curzone);
1648 mutex_enter(&nsrv4->servinst_lock);
1649 for (sip = nsrv4->nfs4_cur_servinst; sip != NULL; sip = sip->prev) {
1650 /* remove the leaf file associated with this server instance */
1651 rfs4_dss_remove_leaf(sip, NFS4_DSS_STATE_LEAF, leaf);
1652 }
1653 mutex_exit(&nsrv4->servinst_lock);
1654 }
1655
1656 static void
1657 rfs4_dss_remove_leaf(rfs4_servinst_t *sip, char *dir_leaf, char *leaf)
1658 {
1659 int i, npaths = sip->dss_npaths;
1660
1661 for (i = 0; i < npaths; i++) {
1662 rfs4_dss_path_t *dss_path = sip->dss_paths[i];
1663 char *path, *dir;
1664 size_t pathlen;
1665
1666 /* the HA-NFSv4 path might have been failed-over away from us */
1667 if (dss_path == NULL)
1668 continue;
1669
1670 dir = dss_path->path;
1671
1672 /* allow 3 extra bytes for two '/' & a NUL */
1673 pathlen = strlen(dir) + strlen(dir_leaf) + strlen(leaf) + 3;
1701 if (cp->rc_ss_remove)
1702 rfs4_dss_remove_cpleaf(cp);
1703 rfs4_ss_pnfree(cp->rc_ss_pn);
1704 }
1705
1706 /* Free the client supplied client id */
1707 kmem_free(cp->rc_nfs_client.id_val, cp->rc_nfs_client.id_len);
1708
1709 if (cp->rc_sysidt != LM_NOSYSID)
1710 lm_free_sysidt(cp->rc_sysidt);
1711 }
1712
1713 static bool_t
1714 rfs4_client_create(rfs4_entry_t u_entry, void *arg)
1715 {
1716 rfs4_client_t *cp = (rfs4_client_t *)u_entry;
1717 nfs_client_id4 *client = (nfs_client_id4 *)arg;
1718 struct sockaddr *ca;
1719 cid *cidp;
1720 scid_confirm_verf *scvp;
1721 nfs4_srv_t *nsrv4;
1722
1723 nsrv4 = zone_getspecific(rfs4_zone_key, curzone);
1724
1725 /* Get a clientid to give to the client */
1726 cidp = (cid *)&cp->rc_clientid;
1727 cidp->impl_id.start_time = nsrv4->rfs4_start_time;
1728 cidp->impl_id.c_id = (uint32_t)rfs4_dbe_getid(cp->rc_dbe);
1729
1730 /* If we are booted as a cluster node, embed our nodeid */
1731 if (cluster_bootflags & CLUSTER_BOOTED)
1732 embed_nodeid(cidp);
1733
1734 /* Allocate and copy client's client id value */
1735 cp->rc_nfs_client.id_val = kmem_alloc(client->id_len, KM_SLEEP);
1736 cp->rc_nfs_client.id_len = client->id_len;
1737 bcopy(client->id_val, cp->rc_nfs_client.id_val, client->id_len);
1738 cp->rc_nfs_client.verifier = client->verifier;
1739
1740 /* Copy client's IP address */
1741 ca = client->cl_addr;
1742 if (ca->sa_family == AF_INET)
1743 bcopy(ca, &cp->rc_addr, sizeof (struct sockaddr_in));
1744 else if (ca->sa_family == AF_INET6)
1745 bcopy(ca, &cp->rc_addr, sizeof (struct sockaddr_in6));
1746 cp->rc_nfs_client.cl_addr = (struct sockaddr *)&cp->rc_addr;
1747
1765
1766 cp->rc_cr_set = NULL;
1767
1768 cp->rc_sysidt = LM_NOSYSID;
1769
1770 list_create(&cp->rc_openownerlist, sizeof (rfs4_openowner_t),
1771 offsetof(rfs4_openowner_t, ro_node));
1772
1773 /* set up the callback control structure */
1774 cp->rc_cbinfo.cb_state = CB_UNINIT;
1775 mutex_init(cp->rc_cbinfo.cb_lock, NULL, MUTEX_DEFAULT, NULL);
1776 cv_init(cp->rc_cbinfo.cb_cv, NULL, CV_DEFAULT, NULL);
1777 cv_init(cp->rc_cbinfo.cb_cv_nullcaller, NULL, CV_DEFAULT, NULL);
1778
1779 /*
1780 * Associate the client_t with the current server instance.
1781 * The hold is solely to satisfy the calling requirement of
1782 * rfs4_servinst_assign(). In this case it's not strictly necessary.
1783 */
1784 rfs4_dbe_hold(cp->rc_dbe);
1785 rfs4_servinst_assign(nsrv4, cp, nsrv4->nfs4_cur_servinst);
1786 rfs4_dbe_rele(cp->rc_dbe);
1787
1788 return (TRUE);
1789 }
1790
1791 /*
1792 * Caller wants to generate/update the setclientid_confirm verifier
1793 * associated with a client. This is done during the SETCLIENTID
1794 * processing.
1795 */
1796 void
1797 rfs4_client_scv_next(rfs4_client_t *cp)
1798 {
1799 scid_confirm_verf *scvp;
1800
1801 /* Init the value for the SETCLIENTID_CONFIRM verifier */
1802 scvp = (scid_confirm_verf *)&cp->rc_confirm_verf;
1803 scvp->cv_impl.gen_num++;
1804 }
1805
1806 void
1807 rfs4_client_rele(rfs4_client_t *cp)
1808 {
1809 rfs4_dbe_rele(cp->rc_dbe);
1810 }
1811
1812 rfs4_client_t *
1813 rfs4_findclient(nfs_client_id4 *client, bool_t *create, rfs4_client_t *oldcp)
1814 {
1815 rfs4_client_t *cp;
1816 nfs4_srv_t *nsrv4;
1817 nsrv4 = zone_getspecific(rfs4_zone_key, curzone);
1818
1819
1820 if (oldcp) {
1821 rw_enter(&nsrv4->rfs4_findclient_lock, RW_WRITER);
1822 rfs4_dbe_hide(oldcp->rc_dbe);
1823 } else {
1824 rw_enter(&nsrv4->rfs4_findclient_lock, RW_READER);
1825 }
1826
1827 cp = (rfs4_client_t *)rfs4_dbsearch(nsrv4->rfs4_nfsclnt_idx, client,
1828 create, (void *)client, RFS4_DBS_VALID);
1829
1830 if (oldcp)
1831 rfs4_dbe_unhide(oldcp->rc_dbe);
1832
1833 rw_exit(&nsrv4->rfs4_findclient_lock);
1834
1835 return (cp);
1836 }
1837
1838 rfs4_client_t *
1839 rfs4_findclient_by_id(clientid4 clientid, bool_t find_unconfirmed)
1840 {
1841 rfs4_client_t *cp;
1842 bool_t create = FALSE;
1843 cid *cidp = (cid *)&clientid;
1844 nfs4_srv_t *nsrv4 = zone_getspecific(rfs4_zone_key, curzone);
1845
1846 /* If we're a cluster and the nodeid isn't right, short-circuit */
1847 if (cluster_bootflags & CLUSTER_BOOTED && foreign_clientid(cidp))
1848 return (NULL);
1849
1850 rw_enter(&nsrv4->rfs4_findclient_lock, RW_READER);
1851
1852 cp = (rfs4_client_t *)rfs4_dbsearch(nsrv4->rfs4_clientid_idx, &clientid,
1853 &create, NULL, RFS4_DBS_VALID);
1854
1855 rw_exit(&nsrv4->rfs4_findclient_lock);
1856
1857 if (cp && cp->rc_need_confirm && find_unconfirmed == FALSE) {
1858 rfs4_client_rele(cp);
1859 return (NULL);
1860 } else {
1861 return (cp);
1862 }
1863 }
1864
1865 static uint32_t
1866 clntip_hash(void *key)
1867 {
1868 struct sockaddr *addr = key;
1869 int i, len = 0;
1870 uint32_t hash = 0;
1871 char *ptr;
1872
1873 if (addr->sa_family == AF_INET) {
1874 struct sockaddr_in *a = (struct sockaddr_in *)addr;
1875 len = sizeof (struct in_addr);
1943 {
1944 rfs4_clntip_t *cp = (rfs4_clntip_t *)u_entry;
1945 struct sockaddr *ca = (struct sockaddr *)arg;
1946
1947 /* Copy client's IP address */
1948 if (ca->sa_family == AF_INET)
1949 bcopy(ca, &cp->ri_addr, sizeof (struct sockaddr_in));
1950 else if (ca->sa_family == AF_INET6)
1951 bcopy(ca, &cp->ri_addr, sizeof (struct sockaddr_in6));
1952 else
1953 return (FALSE);
1954 cp->ri_no_referrals = 1;
1955
1956 return (TRUE);
1957 }
1958
1959 rfs4_clntip_t *
1960 rfs4_find_clntip(struct sockaddr *addr, bool_t *create)
1961 {
1962 rfs4_clntip_t *cp;
1963 nfs4_srv_t *nsrv4;
1964
1965 nsrv4 = zone_getspecific(rfs4_zone_key, curzone);
1966
1967 rw_enter(&nsrv4->rfs4_findclient_lock, RW_READER);
1968
1969 cp = (rfs4_clntip_t *)rfs4_dbsearch(nsrv4->rfs4_clntip_idx, addr,
1970 create, addr, RFS4_DBS_VALID);
1971
1972 rw_exit(&nsrv4->rfs4_findclient_lock);
1973
1974 return (cp);
1975 }
1976
1977 void
1978 rfs4_invalidate_clntip(struct sockaddr *addr)
1979 {
1980 rfs4_clntip_t *cp;
1981 bool_t create = FALSE;
1982 nfs4_srv_t *nsrv4 = zone_getspecific(rfs4_zone_key, curzone);
1983
1984 rw_enter(&nsrv4->rfs4_findclient_lock, RW_READER);
1985
1986 cp = (rfs4_clntip_t *)rfs4_dbsearch(nsrv4->rfs4_clntip_idx, addr,
1987 &create, NULL, RFS4_DBS_VALID);
1988 if (cp == NULL) {
1989 rw_exit(&nsrv4->rfs4_findclient_lock);
1990 return;
1991 }
1992 rfs4_dbe_invalidate(cp->ri_dbe);
1993 rfs4_dbe_rele(cp->ri_dbe);
1994
1995 rw_exit(&nsrv4->rfs4_findclient_lock);
1996 }
1997
1998 bool_t
1999 rfs4_lease_expired(rfs4_client_t *cp)
2000 {
2001 bool_t rc;
2002
2003 rfs4_dbe_lock(cp->rc_dbe);
2004
2005 /*
2006 * If the admin has executed clear_locks for this
2007 * client id, force expire will be set, so no need
2008 * to calculate anything because it's "outa here".
2009 */
2010 if (cp->rc_forced_expire) {
2011 rc = TRUE;
2012 } else {
2013 rc = (gethrestime_sec() - cp->rc_last_access > rfs4_lease_time);
2014 }
2015
2123
2124 /* Free the lock owner id */
2125 kmem_free(oo->ro_owner.owner_val, oo->ro_owner.owner_len);
2126 }
2127
2128 void
2129 rfs4_openowner_rele(rfs4_openowner_t *oo)
2130 {
2131 rfs4_dbe_rele(oo->ro_dbe);
2132 }
2133
2134 static bool_t
2135 rfs4_openowner_create(rfs4_entry_t u_entry, void *arg)
2136 {
2137 rfs4_openowner_t *oo = (rfs4_openowner_t *)u_entry;
2138 rfs4_openowner_t *argp = (rfs4_openowner_t *)arg;
2139 open_owner4 *openowner = &argp->ro_owner;
2140 seqid4 seqid = argp->ro_open_seqid;
2141 rfs4_client_t *cp;
2142 bool_t create = FALSE;
2143 nfs4_srv_t *nsrv4 = zone_getspecific(rfs4_zone_key, curzone);
2144
2145 rw_enter(&nsrv4->rfs4_findclient_lock, RW_READER);
2146
2147 cp = (rfs4_client_t *)rfs4_dbsearch(nsrv4->rfs4_clientid_idx,
2148 &openowner->clientid,
2149 &create, NULL, RFS4_DBS_VALID);
2150
2151 rw_exit(&nsrv4->rfs4_findclient_lock);
2152
2153 if (cp == NULL)
2154 return (FALSE);
2155
2156 oo->ro_reply_fh.nfs_fh4_len = 0;
2157 oo->ro_reply_fh.nfs_fh4_val = NULL;
2158
2159 oo->ro_owner.clientid = openowner->clientid;
2160 oo->ro_owner.owner_val =
2161 kmem_alloc(openowner->owner_len, KM_SLEEP);
2162
2163 bcopy(openowner->owner_val,
2164 oo->ro_owner.owner_val, openowner->owner_len);
2165
2166 oo->ro_owner.owner_len = openowner->owner_len;
2167
2168 oo->ro_need_confirm = TRUE;
2169
2170 rfs4_sw_init(&oo->ro_sw);
2171
2173 bzero(&oo->ro_reply, sizeof (nfs_resop4));
2174 oo->ro_client = cp;
2175 oo->ro_cr_set = NULL;
2176
2177 list_create(&oo->ro_statelist, sizeof (rfs4_state_t),
2178 offsetof(rfs4_state_t, rs_node));
2179
2180 /* Insert openowner into client's open owner list */
2181 rfs4_dbe_lock(cp->rc_dbe);
2182 list_insert_tail(&cp->rc_openownerlist, oo);
2183 rfs4_dbe_unlock(cp->rc_dbe);
2184
2185 return (TRUE);
2186 }
2187
2188 rfs4_openowner_t *
2189 rfs4_findopenowner(open_owner4 *openowner, bool_t *create, seqid4 seqid)
2190 {
2191 rfs4_openowner_t *oo;
2192 rfs4_openowner_t arg;
2193 nfs4_srv_t *nsrv4 = zone_getspecific(rfs4_zone_key, curzone);
2194
2195 arg.ro_owner = *openowner;
2196 arg.ro_open_seqid = seqid;
2197 /* CSTYLED */
2198 oo = (rfs4_openowner_t *)rfs4_dbsearch(nsrv4->rfs4_openowner_idx, openowner,
2199 create, &arg, RFS4_DBS_VALID);
2200
2201 return (oo);
2202 }
2203
2204 void
2205 rfs4_update_open_sequence(rfs4_openowner_t *oo)
2206 {
2207
2208 rfs4_dbe_lock(oo->ro_dbe);
2209
2210 oo->ro_open_seqid++;
2211
2212 rfs4_dbe_unlock(oo->ro_dbe);
2213 }
2214
2215 void
2216 rfs4_update_open_resp(rfs4_openowner_t *oo, nfs_resop4 *resp, nfs_fh4 *fh)
2217 {
2218
2321 }
2322
2323 /* ARGSUSED */
2324 static bool_t
2325 rfs4_lockowner_expiry(rfs4_entry_t u_entry)
2326 {
2327 /*
2328 * Since expiry is called with no other references on
2329 * this struct, go ahead and have it removed.
2330 */
2331 return (TRUE);
2332 }
2333
2334 static bool_t
2335 rfs4_lockowner_create(rfs4_entry_t u_entry, void *arg)
2336 {
2337 rfs4_lockowner_t *lo = (rfs4_lockowner_t *)u_entry;
2338 lock_owner4 *lockowner = (lock_owner4 *)arg;
2339 rfs4_client_t *cp;
2340 bool_t create = FALSE;
2341 nfs4_srv_t *nsrv4 = zone_getspecific(rfs4_zone_key, curzone);
2342
2343 rw_enter(&nsrv4->rfs4_findclient_lock, RW_READER);
2344
2345 cp = (rfs4_client_t *)rfs4_dbsearch(nsrv4->rfs4_clientid_idx,
2346 &lockowner->clientid,
2347 &create, NULL, RFS4_DBS_VALID);
2348
2349 rw_exit(&nsrv4->rfs4_findclient_lock);
2350
2351 if (cp == NULL)
2352 return (FALSE);
2353
2354 /* Reference client */
2355 lo->rl_client = cp;
2356 lo->rl_owner.clientid = lockowner->clientid;
2357 lo->rl_owner.owner_val = kmem_alloc(lockowner->owner_len, KM_SLEEP);
2358 bcopy(lockowner->owner_val, lo->rl_owner.owner_val,
2359 lockowner->owner_len);
2360 lo->rl_owner.owner_len = lockowner->owner_len;
2361 lo->rl_pid = rfs4_dbe_getid(lo->rl_dbe);
2362
2363 return (TRUE);
2364 }
2365
2366 rfs4_lockowner_t *
2367 rfs4_findlockowner(lock_owner4 *lockowner, bool_t *create)
2368 {
2369 rfs4_lockowner_t *lo;
2370 nfs4_srv_t *nsrv4 = zone_getspecific(rfs4_zone_key, curzone);
2371
2372 /* CSTYLED */
2373 lo = (rfs4_lockowner_t *)rfs4_dbsearch(nsrv4->rfs4_lockowner_idx, lockowner,
2374 create, lockowner, RFS4_DBS_VALID);
2375
2376 return (lo);
2377 }
2378
2379 rfs4_lockowner_t *
2380 rfs4_findlockowner_by_pid(pid_t pid)
2381 {
2382 rfs4_lockowner_t *lo;
2383 bool_t create = FALSE;
2384 nfs4_srv_t *nsrv4 = zone_getspecific(rfs4_zone_key, curzone);
2385
2386 lo = (rfs4_lockowner_t *)rfs4_dbsearch(nsrv4->rfs4_lockowner_pid_idx,
2387 (void *)(uintptr_t)pid, &create, NULL, RFS4_DBS_VALID);
2388
2389 return (lo);
2390 }
2391
2392
2393 static uint32_t
2394 file_hash(void *key)
2395 {
2396 return (ADDRHASH(key));
2397 }
2398
2399 static void *
2400 file_mkkey(rfs4_entry_t u_entry)
2401 {
2402 rfs4_file_t *fp = (rfs4_file_t *)u_entry;
2403
2404 return (fp->rf_vp);
2405 }
2406
2477
2478 mutex_init(fp->rf_dinfo.rd_recall_lock, NULL, MUTEX_DEFAULT, NULL);
2479 cv_init(fp->rf_dinfo.rd_recall_cv, NULL, CV_DEFAULT, NULL);
2480
2481 fp->rf_dinfo.rd_dtype = OPEN_DELEGATE_NONE;
2482
2483 rw_init(&fp->rf_file_rwlock, NULL, RW_DEFAULT, NULL);
2484
2485 mutex_enter(&vp->v_vsd_lock);
2486 VERIFY(vsd_set(vp, nfs4_srv_vkey, (void *)fp) == 0);
2487 mutex_exit(&vp->v_vsd_lock);
2488
2489 return (TRUE);
2490 }
2491
2492 rfs4_file_t *
2493 rfs4_findfile(vnode_t *vp, nfs_fh4 *fh, bool_t *create)
2494 {
2495 rfs4_file_t *fp;
2496 rfs4_fcreate_arg arg;
2497 nfs4_srv_t *nsrv4 = zone_getspecific(rfs4_zone_key, curzone);
2498
2499 arg.vp = vp;
2500 arg.fh = fh;
2501
2502 if (*create == TRUE)
2503 /* CSTYLED */
2504 fp = (rfs4_file_t *)rfs4_dbsearch(nsrv4->rfs4_file_idx, vp, create,
2505 &arg, RFS4_DBS_VALID);
2506 else {
2507 mutex_enter(&vp->v_vsd_lock);
2508 fp = (rfs4_file_t *)vsd_get(vp, nfs4_srv_vkey);
2509 if (fp) {
2510 rfs4_dbe_lock(fp->rf_dbe);
2511 if (rfs4_dbe_is_invalid(fp->rf_dbe) ||
2512 (rfs4_dbe_refcnt(fp->rf_dbe) == 0)) {
2513 rfs4_dbe_unlock(fp->rf_dbe);
2514 fp = NULL;
2515 } else {
2516 rfs4_dbe_hold(fp->rf_dbe);
2517 rfs4_dbe_unlock(fp->rf_dbe);
2518 }
2519 }
2520 mutex_exit(&vp->v_vsd_lock);
2521 }
2522 return (fp);
2523 }
2524
2525 /*
2526 * Find a file in the db and once it is located, take the rw lock.
2527 * Need to check the vnode pointer and if it does not exist (it was
2528 * removed between the db location and check) redo the find. This
2529 * assumes that a file struct that has a NULL vnode pointer is marked
2530 * at 'invalid' and will not be found in the db the second time
2531 * around.
2532 */
2533 rfs4_file_t *
2534 rfs4_findfile_withlock(vnode_t *vp, nfs_fh4 *fh, bool_t *create)
2535 {
2536 rfs4_file_t *fp;
2537 rfs4_fcreate_arg arg;
2538 bool_t screate = *create;
2539 nfs4_srv_t *nsrv4 = zone_getspecific(rfs4_zone_key, curzone);
2540
2541 if (screate == FALSE) {
2542 mutex_enter(&vp->v_vsd_lock);
2543 fp = (rfs4_file_t *)vsd_get(vp, nfs4_srv_vkey);
2544 if (fp) {
2545 rfs4_dbe_lock(fp->rf_dbe);
2546 if (rfs4_dbe_is_invalid(fp->rf_dbe) ||
2547 (rfs4_dbe_refcnt(fp->rf_dbe) == 0)) {
2548 rfs4_dbe_unlock(fp->rf_dbe);
2549 mutex_exit(&vp->v_vsd_lock);
2550 fp = NULL;
2551 } else {
2552 rfs4_dbe_hold(fp->rf_dbe);
2553 rfs4_dbe_unlock(fp->rf_dbe);
2554 mutex_exit(&vp->v_vsd_lock);
2555 rw_enter(&fp->rf_file_rwlock, RW_WRITER);
2556 if (fp->rf_vp == NULL) {
2557 rw_exit(&fp->rf_file_rwlock);
2558 rfs4_file_rele(fp);
2559 fp = NULL;
2560 }
2561 }
2562 } else {
2563 mutex_exit(&vp->v_vsd_lock);
2564 }
2565 } else {
2566 retry:
2567 arg.vp = vp;
2568 arg.fh = fh;
2569
2570 fp = (rfs4_file_t *)rfs4_dbsearch(nsrv4->rfs4_file_idx, vp,
2571 create, &arg, RFS4_DBS_VALID);
2572 if (fp != NULL) {
2573 rw_enter(&fp->rf_file_rwlock, RW_WRITER);
2574 if (fp->rf_vp == NULL) {
2575 rw_exit(&fp->rf_file_rwlock);
2576 rfs4_file_rele(fp);
2577 *create = screate;
2578 goto retry;
2579 }
2580 }
2581 }
2582
2583 return (fp);
2584 }
2585
2586 static uint32_t
2587 lo_state_hash(void *key)
2588 {
2589 stateid_t *id = key;
2590
2591 return (id->bits.ident+id->bits.pid);
2706 list_insert_tail(&sp->rs_lostatelist, lsp);
2707 rfs4_dbe_hold(sp->rs_dbe);
2708 rfs4_dbe_unlock(sp->rs_dbe);
2709
2710 return (TRUE);
2711 }
2712
2713 void
2714 rfs4_lo_state_rele(rfs4_lo_state_t *lsp, bool_t unlock_fp)
2715 {
2716 if (unlock_fp == TRUE)
2717 rw_exit(&lsp->rls_state->rs_finfo->rf_file_rwlock);
2718 rfs4_dbe_rele(lsp->rls_dbe);
2719 }
2720
2721 static rfs4_lo_state_t *
2722 rfs4_findlo_state(stateid_t *id, bool_t lock_fp)
2723 {
2724 rfs4_lo_state_t *lsp;
2725 bool_t create = FALSE;
2726 nfs4_srv_t *nsrv4 = zone_getspecific(rfs4_zone_key, curzone);
2727
2728 lsp = (rfs4_lo_state_t *)rfs4_dbsearch(nsrv4->rfs4_lo_state_idx, id,
2729 &create, NULL, RFS4_DBS_VALID);
2730 if (lock_fp == TRUE && lsp != NULL)
2731 rw_enter(&lsp->rls_state->rs_finfo->rf_file_rwlock, RW_READER);
2732
2733 return (lsp);
2734 }
2735
2736
2737 static uint32_t
2738 lo_state_lo_hash(void *key)
2739 {
2740 rfs4_lo_state_t *lsp = key;
2741
2742 return (ADDRHASH(lsp->rls_locker) ^ ADDRHASH(lsp->rls_state));
2743 }
2744
2745 static bool_t
2746 lo_state_lo_compare(rfs4_entry_t u_entry, void *key)
2747 {
2748 rfs4_lo_state_t *lsp = (rfs4_lo_state_t *)u_entry;
2749 rfs4_lo_state_t *keyp = key;
2750
2751 return (keyp->rls_locker == lsp->rls_locker &&
2752 keyp->rls_state == lsp->rls_state);
2753 }
2754
2755 static void *
2756 lo_state_lo_mkkey(rfs4_entry_t u_entry)
2757 {
2758 return (u_entry);
2759 }
2760
2761 rfs4_lo_state_t *
2762 rfs4_findlo_state_by_owner(rfs4_lockowner_t *lo, rfs4_state_t *sp,
2763 bool_t *create)
2764 {
2765 rfs4_lo_state_t *lsp;
2766 rfs4_lo_state_t arg;
2767 nfs4_srv_t *nsrv4 = zone_getspecific(rfs4_zone_key, curzone);
2768
2769 arg.rls_locker = lo;
2770 arg.rls_state = sp;
2771
2772 lsp = (rfs4_lo_state_t *)rfs4_dbsearch(nsrv4->rfs4_lo_state_owner_idx,
2773 &arg, create, &arg, RFS4_DBS_VALID);
2774
2775 return (lsp);
2776 }
2777
2778 static stateid_t
2779 get_stateid(id_t eid)
2780 {
2781 stateid_t id;
2782 nfs4_srv_t *nsrv4;
2783
2784 nsrv4 = zone_getspecific(rfs4_zone_key, curzone);
2785
2786 id.bits.boottime = nsrv4->rfs4_start_time;
2787 id.bits.ident = eid;
2788 id.bits.chgseq = 0;
2789 id.bits.type = 0;
2790 id.bits.pid = 0;
2791
2792 /*
2793 * If we are booted as a cluster node, embed our nodeid.
2794 * We've already done sanity checks in rfs4_client_create() so no
2795 * need to repeat them here.
2796 */
2797 id.bits.clnodeid = (cluster_bootflags & CLUSTER_BOOTED) ?
2798 clconf_get_nodeid() : 0;
2799
2800 return (id);
2801 }
2802
2803 /*
2804 * For use only when booted as a cluster node.
2805 * Returns TRUE if the embedded nodeid indicates that this stateid was
2806 * generated on another node.
3022 rfs4_deleg_state_destroy(rfs4_entry_t u_entry)
3023 {
3024 rfs4_deleg_state_t *dsp = (rfs4_deleg_state_t *)u_entry;
3025
3026 /* return delegation if necessary */
3027 rfs4_return_deleg(dsp, FALSE);
3028
3029 /* Were done with the file */
3030 rfs4_file_rele(dsp->rds_finfo);
3031 dsp->rds_finfo = NULL;
3032
3033 /* And now with the openowner */
3034 rfs4_client_rele(dsp->rds_client);
3035 dsp->rds_client = NULL;
3036 }
3037
3038 rfs4_deleg_state_t *
3039 rfs4_finddeleg(rfs4_state_t *sp, bool_t *create)
3040 {
3041 rfs4_deleg_state_t ds, *dsp;
3042 nfs4_srv_t *nsrv4 = zone_getspecific(rfs4_zone_key, curzone);
3043
3044 ds.rds_client = sp->rs_owner->ro_client;
3045 ds.rds_finfo = sp->rs_finfo;
3046
3047 dsp = (rfs4_deleg_state_t *)rfs4_dbsearch(nsrv4->rfs4_deleg_idx, &ds,
3048 create, &ds, RFS4_DBS_VALID);
3049
3050 return (dsp);
3051 }
3052
3053 rfs4_deleg_state_t *
3054 rfs4_finddelegstate(stateid_t *id)
3055 {
3056 rfs4_deleg_state_t *dsp;
3057 bool_t create = FALSE;
3058 nfs4_srv_t *nsrv4 = zone_getspecific(rfs4_zone_key, curzone);
3059
3060 dsp = (rfs4_deleg_state_t *)rfs4_dbsearch(nsrv4->rfs4_deleg_state_idx,
3061 id, &create, NULL, RFS4_DBS_VALID);
3062
3063 return (dsp);
3064 }
3065
3066 void
3067 rfs4_deleg_state_rele(rfs4_deleg_state_t *dsp)
3068 {
3069 rfs4_dbe_rele(dsp->rds_dbe);
3070 }
3071
3072 void
3073 rfs4_update_lock_sequence(rfs4_lo_state_t *lsp)
3074 {
3075
3076 rfs4_dbe_lock(lsp->rls_dbe);
3077
3078 /*
3079 * If we are skipping sequence id checking, this means that
3080 * this is the first lock request and therefore the sequence
3081 * id does not need to be updated. This only happens on the
3160 if (sp->rs_closed == TRUE)
3161 return (FALSE);
3162
3163 return (fp == sp->rs_finfo);
3164 }
3165
3166 static void *
3167 state_file_mkkey(rfs4_entry_t u_entry)
3168 {
3169 rfs4_state_t *sp = (rfs4_state_t *)u_entry;
3170
3171 return (sp->rs_finfo);
3172 }
3173
3174 rfs4_state_t *
3175 rfs4_findstate_by_owner_file(rfs4_openowner_t *oo, rfs4_file_t *fp,
3176 bool_t *create)
3177 {
3178 rfs4_state_t *sp;
3179 rfs4_state_t key;
3180 nfs4_srv_t *nsrv4 = zone_getspecific(rfs4_zone_key, curzone);
3181
3182 key.rs_owner = oo;
3183 key.rs_finfo = fp;
3184
3185 sp = (rfs4_state_t *)rfs4_dbsearch(nsrv4->rfs4_state_owner_file_idx,
3186 &key, create, &key, RFS4_DBS_VALID);
3187
3188 return (sp);
3189 }
3190
3191 /* This returns ANY state struct that refers to this file */
3192 static rfs4_state_t *
3193 rfs4_findstate_by_file(rfs4_file_t *fp)
3194 {
3195 bool_t create = FALSE;
3196 nfs4_srv_t *nsrv4 = zone_getspecific(rfs4_zone_key, curzone);
3197
3198 return ((rfs4_state_t *)rfs4_dbsearch(nsrv4->rfs4_state_file_idx, fp,
3199 &create, fp, RFS4_DBS_VALID));
3200 }
3201
3202 static bool_t
3203 rfs4_state_expiry(rfs4_entry_t u_entry)
3204 {
3205 rfs4_state_t *sp = (rfs4_state_t *)u_entry;
3206
3207 if (rfs4_dbe_is_invalid(sp->rs_dbe))
3208 return (TRUE);
3209
3210 if (sp->rs_closed == TRUE &&
3211 ((gethrestime_sec() - rfs4_dbe_get_timerele(sp->rs_dbe))
3212 > rfs4_lease_time))
3213 return (TRUE);
3214
3215 return ((gethrestime_sec() - sp->rs_owner->ro_client->rc_last_access
3216 > rfs4_lease_time));
3217 }
3218
3229 sp->rs_stateid.bits.type = OPENID;
3230 sp->rs_owner = oo;
3231 sp->rs_finfo = fp;
3232
3233 list_create(&sp->rs_lostatelist, sizeof (rfs4_lo_state_t),
3234 offsetof(rfs4_lo_state_t, rls_node));
3235
3236 /* Insert state on per open owner's list */
3237 rfs4_dbe_lock(oo->ro_dbe);
3238 list_insert_tail(&oo->ro_statelist, sp);
3239 rfs4_dbe_unlock(oo->ro_dbe);
3240
3241 return (TRUE);
3242 }
3243
3244 static rfs4_state_t *
3245 rfs4_findstate(stateid_t *id, rfs4_dbsearch_type_t find_invalid, bool_t lock_fp)
3246 {
3247 rfs4_state_t *sp;
3248 bool_t create = FALSE;
3249 nfs4_srv_t *nsrv4 = zone_getspecific(rfs4_zone_key, curzone);
3250
3251 sp = (rfs4_state_t *)rfs4_dbsearch(nsrv4->rfs4_state_idx, id,
3252 &create, NULL, find_invalid);
3253 if (lock_fp == TRUE && sp != NULL)
3254 rw_enter(&sp->rs_finfo->rf_file_rwlock, RW_READER);
3255
3256 return (sp);
3257 }
3258
3259 void
3260 rfs4_state_close(rfs4_state_t *sp, bool_t lock_held, bool_t close_of_client,
3261 cred_t *cr)
3262 {
3263 /* Remove the associated lo_state owners */
3264 if (!lock_held)
3265 rfs4_dbe_lock(sp->rs_dbe);
3266
3267 /*
3268 * If refcnt == 0, the dbe is about to be destroyed.
3269 * lock state will be released by the reaper thread.
3270 */
3271
3299 }
3300
3301 void
3302 rfs4_client_close(rfs4_client_t *cp)
3303 {
3304 /* Mark client as going away. */
3305 rfs4_dbe_lock(cp->rc_dbe);
3306 rfs4_dbe_invalidate(cp->rc_dbe);
3307 rfs4_dbe_unlock(cp->rc_dbe);
3308
3309 rfs4_client_state_remove(cp);
3310
3311 /* Release the client */
3312 rfs4_client_rele(cp);
3313 }
3314
3315 nfsstat4
3316 rfs4_check_clientid(clientid4 *cp, int setclid_confirm)
3317 {
3318 cid *cidp = (cid *) cp;
3319 nfs4_srv_t *nsrv4;
3320
3321 nsrv4 = zone_getspecific(rfs4_zone_key, curzone);
3322
3323 /*
3324 * If we are booted as a cluster node, check the embedded nodeid.
3325 * If it indicates that this clientid was generated on another node,
3326 * inform the client accordingly.
3327 */
3328 if (cluster_bootflags & CLUSTER_BOOTED && foreign_clientid(cidp))
3329 return (NFS4ERR_STALE_CLIENTID);
3330
3331 /*
3332 * If the server start time matches the time provided
3333 * by the client (via the clientid) and this is NOT a
3334 * setclientid_confirm then return EXPIRED.
3335 */
3336 if (!setclid_confirm &&
3337 cidp->impl_id.start_time == nsrv4->rfs4_start_time)
3338 return (NFS4ERR_EXPIRED);
3339
3340 return (NFS4ERR_STALE_CLIENTID);
3341 }
3342
3343 /*
3344 * This is used when a stateid has not been found amongst the
3345 * current server's state. Check the stateid to see if it
3346 * was from this server instantiation or not.
3347 */
3348 static nfsstat4
3349 what_stateid_error(stateid_t *id, stateid_type_t type)
3350 {
3351 nfs4_srv_t *nsrv4;
3352
3353 nsrv4 = zone_getspecific(rfs4_zone_key, curzone);
3354
3355 /* If we are booted as a cluster node, was stateid locally generated? */
3356 if ((cluster_bootflags & CLUSTER_BOOTED) && foreign_stateid(id))
3357 return (NFS4ERR_STALE_STATEID);
3358
3359 /* If types don't match then no use checking further */
3360 if (type != id->bits.type)
3361 return (NFS4ERR_BAD_STATEID);
3362
3363 /* From a different server instantiation, return STALE */
3364 if (id->bits.boottime != nsrv4->rfs4_start_time)
3365 return (NFS4ERR_STALE_STATEID);
3366
3367 /*
3368 * From this server but the state is most likely beyond lease
3369 * timeout: return NFS4ERR_EXPIRED. However, there is the
3370 * case of a delegation stateid. For delegations, there is a
3371 * case where the state can be removed without the client's
3372 * knowledge/consent: revocation. In the case of delegation
3373 * revocation, the delegation state will be removed and will
3374 * not be found. If the client does something like a
3375 * DELEGRETURN or even a READ/WRITE with a delegatoin stateid
3376 * that has been revoked, the server should return BAD_STATEID
3377 * instead of the more common EXPIRED error.
3378 */
3379 if (id->bits.boottime == nsrv4->rfs4_start_time) {
3380 if (type == DELEGID)
3381 return (NFS4ERR_BAD_STATEID);
3382 else
3383 return (NFS4ERR_EXPIRED);
3384 }
3385
3386 return (NFS4ERR_BAD_STATEID);
3387 }
3388
3389 /*
3390 * Used later on to find the various state structs. When called from
3391 * rfs4_check_stateid()->rfs4_get_all_state(), no file struct lock is
3392 * taken (it is not needed) and helps on the read/write path with
3393 * respect to performance.
3394 */
3395 static nfsstat4
3396 rfs4_get_state_lockit(stateid4 *stateid, rfs4_state_t **spp,
3397 rfs4_dbsearch_type_t find_invalid, bool_t lock_fp)
3398 {
3399 stateid_t *id = (stateid_t *)stateid;
3861
3862 /*
3863 * This is a special function in that for the file struct provided the
3864 * server wants to remove/close all current state associated with the
3865 * file. The prime use of this would be with OP_REMOVE to force the
3866 * release of state and particularly of file locks.
3867 *
3868 * There is an assumption that there is no delegations outstanding on
3869 * this file at this point. The caller should have waited for those
3870 * to be returned or revoked.
3871 */
3872 void
3873 rfs4_close_all_state(rfs4_file_t *fp)
3874 {
3875 rfs4_state_t *sp;
3876
3877 rfs4_dbe_lock(fp->rf_dbe);
3878
3879 #ifdef DEBUG
3880 /* only applies when server is handing out delegations */
3881 if (nfs4_get_deleg_policy() != SRV_NEVER_DELEGATE)
3882 ASSERT(fp->rf_dinfo.rd_hold_grant > 0);
3883 #endif
3884
3885 /* No delegations for this file */
3886 ASSERT(list_is_empty(&fp->rf_delegstatelist));
3887
3888 /* Make sure that it can not be found */
3889 rfs4_dbe_invalidate(fp->rf_dbe);
3890
3891 if (fp->rf_vp == NULL) {
3892 rfs4_dbe_unlock(fp->rf_dbe);
3893 return;
3894 }
3895 rfs4_dbe_unlock(fp->rf_dbe);
3896
3897 /*
3898 * Hold as writer to prevent other server threads from
3899 * processing requests related to the file while all state is
3900 * being removed.
3901 */
4071 }
4072 mutex_enter(&vp->v_vsd_lock);
4073 (void) vsd_set(vp, nfs4_srv_vkey, NULL);
4074 mutex_exit(&vp->v_vsd_lock);
4075 VN_RELE(vp);
4076 fp->rf_vp = NULL;
4077 }
4078 rfs4_dbe_invalidate(fp->rf_dbe);
4079 }
4080 }
4081
4082 /*
4083 * Given a directory that is being unexported, cleanup/release all
4084 * state in the server that refers to objects residing underneath this
4085 * particular export. The ordering of the release is important.
4086 * Lock_owner, then state and then file.
4087 */
4088 void
4089 rfs4_clean_state_exi(struct exportinfo *exi)
4090 {
4091 nfs4_srv_t *nsrv4;
4092
4093 nsrv4 = zone_getspecific(rfs4_zone_key, curzone);
4094 mutex_enter(&nsrv4->state_lock);
4095
4096 if (nsrv4->nfs4_server_state == NULL) {
4097 mutex_exit(&nsrv4->state_lock);
4098 return;
4099 }
4100
4101 /* CSTYLED */
4102 rfs4_dbe_walk(nsrv4->rfs4_lo_state_tab, rfs4_lo_state_walk_callout, exi);
4103 rfs4_dbe_walk(nsrv4->rfs4_state_tab, rfs4_state_walk_callout, exi);
4104 /* CSTYLED */
4105 rfs4_dbe_walk(nsrv4->rfs4_deleg_state_tab, rfs4_deleg_state_walk_callout, exi);
4106 rfs4_dbe_walk(nsrv4->rfs4_file_tab, rfs4_file_walk_callout, exi);
4107
4108 mutex_exit(&nsrv4->state_lock);
4109 }
|