1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright 2014 Nexenta Systems, Inc. All rights reserved.
24 */
25
26 #include <sys/systm.h>
27 #include <sys/kmem.h>
28 #include <sys/cmn_err.h>
29 #include <sys/atomic.h>
30 #include <sys/clconf.h>
31 #include <sys/cladm.h>
32 #include <sys/flock.h>
33 #include <nfs/export.h>
34 #include <nfs/nfs.h>
35 #include <nfs/nfs4.h>
36 #include <nfs/nfssys.h>
37 #include <nfs/lm.h>
38 #include <sys/pathname.h>
39 #include <sys/sdt.h>
40 #include <sys/nvpair.h>
41
42 extern u_longlong_t nfs4_srv_caller_id;
43
44 extern time_t rfs4_start_time;
45 extern uint_t nfs4_srv_vkey;
46
47 stateid4 special0 = {
48 0,
49 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }
50 };
51
52 stateid4 special1 = {
53 0xffffffff,
54 {
55 (char)0xff, (char)0xff, (char)0xff, (char)0xff,
56 (char)0xff, (char)0xff, (char)0xff, (char)0xff,
57 (char)0xff, (char)0xff, (char)0xff, (char)0xff
58 }
59 };
60
61
62 #define ISSPECIAL(id) (stateid4_cmp(id, &special0) || \
63 stateid4_cmp(id, &special1))
64
65 /* For embedding the cluster nodeid into our clientid */
66 #define CLUSTER_NODEID_SHIFT 24
67 #define CLUSTER_MAX_NODEID 255
68
69 #ifdef DEBUG
70 int rfs4_debug;
71 #endif
72
73 static uint32_t rfs4_database_debug = 0x00;
74
75 static void rfs4_ss_clid_write(rfs4_client_t *cp, char *leaf);
76 static void rfs4_ss_clid_write_one(rfs4_client_t *cp, char *dir, char *leaf);
77 static void rfs4_dss_clear_oldstate(rfs4_servinst_t *sip);
78 static void rfs4_ss_chkclid_sip(rfs4_client_t *cp, rfs4_servinst_t *sip);
79
80 /*
81 * Couple of simple init/destroy functions for a general waiter
82 */
83 void
84 rfs4_sw_init(rfs4_state_wait_t *swp)
85 {
86 mutex_init(swp->sw_cv_lock, NULL, MUTEX_DEFAULT, NULL);
87 cv_init(swp->sw_cv, NULL, CV_DEFAULT, NULL);
88 swp->sw_active = FALSE;
89 swp->sw_wait_count = 0;
90 }
91
92 void
93 rfs4_sw_destroy(rfs4_state_wait_t *swp)
94 {
95 mutex_destroy(swp->sw_cv_lock);
104 swp->sw_wait_count++;
105 cv_wait(swp->sw_cv, swp->sw_cv_lock);
106 swp->sw_wait_count--;
107 }
108 ASSERT(swp->sw_active == FALSE);
109 swp->sw_active = TRUE;
110 mutex_exit(swp->sw_cv_lock);
111 }
112
113 void
114 rfs4_sw_exit(rfs4_state_wait_t *swp)
115 {
116 mutex_enter(swp->sw_cv_lock);
117 ASSERT(swp->sw_active == TRUE);
118 swp->sw_active = FALSE;
119 if (swp->sw_wait_count != 0)
120 cv_broadcast(swp->sw_cv);
121 mutex_exit(swp->sw_cv_lock);
122 }
123
124 /*
125 * CPR callback id -- not related to v4 callbacks
126 */
127 static callb_id_t cpr_id = 0;
128
129 static void
130 deep_lock_copy(LOCK4res *dres, LOCK4res *sres)
131 {
132 lock_owner4 *slo = &sres->LOCK4res_u.denied.owner;
133 lock_owner4 *dlo = &dres->LOCK4res_u.denied.owner;
134
135 if (sres->status == NFS4ERR_DENIED) {
136 dlo->owner_val = kmem_alloc(slo->owner_len, KM_SLEEP);
137 bcopy(slo->owner_val, dlo->owner_val, slo->owner_len);
138 }
139 }
140
141 static void
142 deep_lock_free(LOCK4res *res)
143 {
144 lock_owner4 *lo = &res->LOCK4res_u.denied.owner;
145
146 if (res->status == NFS4ERR_DENIED)
147 kmem_free(lo->owner_val, lo->owner_len);
148 }
149
150 static void
151 deep_open_copy(OPEN4res *dres, OPEN4res *sres)
152 {
153 nfsace4 *sacep, *dacep;
154
155 if (sres->status != NFS4_OK) {
156 return;
157 }
158
159 dres->attrset = sres->attrset;
160
256 /*
257 * This code is some what prototypical for now. Its purpose currently is to
258 * implement the interfaces sufficiently to finish the higher protocol
259 * elements. This will be replaced by a dynamically resizeable tables
260 * backed by kmem_cache allocator. However synchronization is handled
261 * correctly (I hope) and will not change by much. The mutexes for
262 * the hash buckets that can be used to create new instances of data
263 * structures might be good candidates to evolve into reader writer
264 * locks. If it has to do a creation, it would be holding the
265 * mutex across a kmem_alloc with KM_SLEEP specified.
266 */
267
268 #ifdef DEBUG
269 #define TABSIZE 17
270 #else
271 #define TABSIZE 2047
272 #endif
273
274 #define ADDRHASH(key) ((unsigned long)(key) >> 3)
275
276 /* Used to serialize create/destroy of rfs4_server_state database */
277 kmutex_t rfs4_state_lock;
278 static rfs4_database_t *rfs4_server_state = NULL;
279
280 /* Used to serialize lookups of clientids */
281 static krwlock_t rfs4_findclient_lock;
282
283 /*
284 * For now this "table" is exposed so that the CPR callback
285 * function can tromp through it..
286 */
287 rfs4_table_t *rfs4_client_tab;
288
289 static rfs4_index_t *rfs4_clientid_idx;
290 static rfs4_index_t *rfs4_nfsclnt_idx;
291 static rfs4_table_t *rfs4_clntip_tab;
292 static rfs4_index_t *rfs4_clntip_idx;
293 static rfs4_table_t *rfs4_openowner_tab;
294 static rfs4_index_t *rfs4_openowner_idx;
295 static rfs4_table_t *rfs4_state_tab;
296 static rfs4_index_t *rfs4_state_idx;
297 static rfs4_index_t *rfs4_state_owner_file_idx;
298 static rfs4_index_t *rfs4_state_file_idx;
299 static rfs4_table_t *rfs4_lo_state_tab;
300 static rfs4_index_t *rfs4_lo_state_idx;
301 static rfs4_index_t *rfs4_lo_state_owner_idx;
302 static rfs4_table_t *rfs4_lockowner_tab;
303 static rfs4_index_t *rfs4_lockowner_idx;
304 static rfs4_index_t *rfs4_lockowner_pid_idx;
305 static rfs4_table_t *rfs4_file_tab;
306 static rfs4_index_t *rfs4_file_idx;
307 static rfs4_table_t *rfs4_deleg_state_tab;
308 static rfs4_index_t *rfs4_deleg_idx;
309 static rfs4_index_t *rfs4_deleg_state_idx;
310
311 #define MAXTABSZ 1024*1024
312
313 /* The values below are rfs4_lease_time units */
314
315 #ifdef DEBUG
316 #define CLIENT_CACHE_TIME 1
317 #define OPENOWNER_CACHE_TIME 1
318 #define STATE_CACHE_TIME 1
319 #define LO_STATE_CACHE_TIME 1
320 #define LOCKOWNER_CACHE_TIME 1
321 #define FILE_CACHE_TIME 3
322 #define DELEG_STATE_CACHE_TIME 1
323 #else
324 #define CLIENT_CACHE_TIME 10
325 #define OPENOWNER_CACHE_TIME 5
326 #define STATE_CACHE_TIME 1
327 #define LO_STATE_CACHE_TIME 1
328 #define LOCKOWNER_CACHE_TIME 3
329 #define FILE_CACHE_TIME 40
330 #define DELEG_STATE_CACHE_TIME 1
331 #endif
332
333
334 static time_t rfs4_client_cache_time = 0;
335 static time_t rfs4_clntip_cache_time = 0;
336 static time_t rfs4_openowner_cache_time = 0;
337 static time_t rfs4_state_cache_time = 0;
338 static time_t rfs4_lo_state_cache_time = 0;
339 static time_t rfs4_lockowner_cache_time = 0;
340 static time_t rfs4_file_cache_time = 0;
341 static time_t rfs4_deleg_state_cache_time = 0;
342
343 static bool_t rfs4_client_create(rfs4_entry_t, void *);
344 static void rfs4_dss_remove_cpleaf(rfs4_client_t *);
345 static void rfs4_dss_remove_leaf(rfs4_servinst_t *, char *, char *);
346 static void rfs4_client_destroy(rfs4_entry_t);
347 static bool_t rfs4_client_expiry(rfs4_entry_t);
348 static uint32_t clientid_hash(void *);
349 static bool_t clientid_compare(rfs4_entry_t, void *);
350 static void *clientid_mkkey(rfs4_entry_t);
351 static uint32_t nfsclnt_hash(void *);
352 static bool_t nfsclnt_compare(rfs4_entry_t, void *);
353 static void *nfsclnt_mkkey(rfs4_entry_t);
354 static bool_t rfs4_clntip_expiry(rfs4_entry_t);
355 static void rfs4_clntip_destroy(rfs4_entry_t);
356 static bool_t rfs4_clntip_create(rfs4_entry_t, void *);
357 static uint32_t clntip_hash(void *);
358 static bool_t clntip_compare(rfs4_entry_t, void *);
359 static void *clntip_mkkey(rfs4_entry_t);
360 static bool_t rfs4_openowner_create(rfs4_entry_t, void *);
361 static void rfs4_openowner_destroy(rfs4_entry_t);
362 static bool_t rfs4_openowner_expiry(rfs4_entry_t);
688 cl_ss->ss_pn = rfs4_ss_movestate(
689 statedir, destdir, dep->d_name);
690 } else {
691 cl_ss->ss_pn = ss_pn;
692 }
693 insque(cl_ss, oldstate);
694 } else {
695 rfs4_ss_pnfree(ss_pn);
696 }
697 }
698 }
699
700 out:
701 (void) VOP_CLOSE(dvp, FREAD, 1, (offset_t)0, CRED(), NULL);
702 VN_RELE(dvp);
703 if (dirt)
704 kmem_free((caddr_t)dirt, RFS4_SS_DIRSIZE);
705 }
706
707 static void
708 rfs4_ss_init(void)
709 {
710 int npaths = 1;
711 char *default_dss_path = NFS4_DSS_VAR_DIR;
712
713 /* read the default stable storage state */
714 rfs4_dss_readstate(npaths, &default_dss_path);
715
716 rfs4_ss_enabled = 1;
717 }
718
719 static void
720 rfs4_ss_fini(void)
721 {
722 rfs4_servinst_t *sip;
723
724 mutex_enter(&rfs4_servinst_lock);
725 sip = rfs4_cur_servinst;
726 while (sip != NULL) {
727 rfs4_dss_clear_oldstate(sip);
728 sip = sip->next;
729 }
730 mutex_exit(&rfs4_servinst_lock);
731 }
732
733 /*
734 * Remove all oldstate files referenced by this servinst.
735 */
736 static void
737 rfs4_dss_clear_oldstate(rfs4_servinst_t *sip)
738 {
739 rfs4_oldstate_t *os_head, *osp;
740
741 rw_enter(&sip->oldstate_lock, RW_WRITER);
742 os_head = sip->oldstate;
743
744 if (os_head == NULL) {
745 rw_exit(&sip->oldstate_lock);
746 return;
747 }
748
749 /* skip dummy entry */
750 osp = os_head->next;
754
755 rfs4_dss_remove_leaf(sip, NFS4_DSS_OLDSTATE_LEAF, leaf);
756
757 if (osp->cl_id4.id_val)
758 kmem_free(osp->cl_id4.id_val, osp->cl_id4.id_len);
759 rfs4_ss_pnfree(osp->ss_pn);
760
761 os_next = osp->next;
762 remque(osp);
763 kmem_free(osp, sizeof (rfs4_oldstate_t));
764 osp = os_next;
765 }
766
767 rw_exit(&sip->oldstate_lock);
768 }
769
770 /*
771 * Form the state and oldstate paths, and read in the stable storage files.
772 */
773 void
774 rfs4_dss_readstate(int npaths, char **paths)
775 {
776 int i;
777 char *state, *oldstate;
778
779 state = kmem_alloc(MAXPATHLEN, KM_SLEEP);
780 oldstate = kmem_alloc(MAXPATHLEN, KM_SLEEP);
781
782 for (i = 0; i < npaths; i++) {
783 char *path = paths[i];
784
785 (void) sprintf(state, "%s/%s", path, NFS4_DSS_STATE_LEAF);
786 (void) sprintf(oldstate, "%s/%s", path, NFS4_DSS_OLDSTATE_LEAF);
787
788 /*
789 * Populate the current server instance's oldstate list.
790 *
791 * 1. Read stable storage data from old state directory,
792 * leaving its contents alone.
793 *
794 * 2. Read stable storage data from state directory,
795 * and move the latter's contents to old state
796 * directory.
797 */
798 rfs4_ss_oldstate(rfs4_cur_servinst->oldstate, oldstate, NULL);
799 rfs4_ss_oldstate(rfs4_cur_servinst->oldstate, state, oldstate);
800 }
801
802 kmem_free(state, MAXPATHLEN);
803 kmem_free(oldstate, MAXPATHLEN);
804 }
805
806
807 /*
808 * Check if we are still in grace and if the client can be
809 * granted permission to perform reclaims.
810 */
811 void
812 rfs4_ss_chkclid(rfs4_client_t *cp)
813 {
814 rfs4_servinst_t *sip;
815
816 /*
817 * It should be sufficient to check the oldstate data for just
818 * this client's instance. However, since our per-instance
819 * client grouping is solely temporal, HA-NFSv4 RG failover
820 * might result in clients of the same RG being partitioned into
821 * separate instances.
822 *
823 * Until the client grouping is improved, we must check the
824 * oldstate data for all instances with an active grace period.
825 *
826 * This also serves as the mechanism to remove stale oldstate data.
827 * The first time we check an instance after its grace period has
828 * expired, the oldstate data should be cleared.
829 *
830 * Start at the current instance, and walk the list backwards
831 * to the first.
832 */
833 mutex_enter(&rfs4_servinst_lock);
834 for (sip = rfs4_cur_servinst; sip != NULL; sip = sip->prev) {
835 rfs4_ss_chkclid_sip(cp, sip);
836
837 /* if the above check found this client, we're done */
838 if (cp->rc_can_reclaim)
839 break;
840 }
841 mutex_exit(&rfs4_servinst_lock);
842 }
843
844 static void
845 rfs4_ss_chkclid_sip(rfs4_client_t *cp, rfs4_servinst_t *sip)
846 {
847 rfs4_oldstate_t *osp, *os_head;
848
849 /* short circuit everything if this server instance has no oldstate */
850 rw_enter(&sip->oldstate_lock, RW_READER);
851 os_head = sip->oldstate;
852 rw_exit(&sip->oldstate_lock);
853 if (os_head == NULL)
854 return;
855
856 /*
857 * If this server instance is no longer in a grace period then
858 * the client won't be able to reclaim. No further need for this
859 * instance's oldstate data, so it can be cleared.
860 */
861 if (!rfs4_servinst_in_grace(sip))
871 while (osp != os_head) {
872 if (osp->cl_id4.id_len == cp->rc_nfs_client.id_len) {
873 if (bcmp(osp->cl_id4.id_val, cp->rc_nfs_client.id_val,
874 osp->cl_id4.id_len) == 0) {
875 cp->rc_can_reclaim = 1;
876 break;
877 }
878 }
879 osp = osp->next;
880 }
881
882 rw_exit(&sip->oldstate_lock);
883 }
884
885 /*
886 * Place client information into stable storage: 1/3.
887 * First, generate the leaf filename, from the client's IP address and
888 * the server-generated short-hand clientid.
889 */
890 void
891 rfs4_ss_clid(rfs4_client_t *cp)
892 {
893 const char *kinet_ntop6(uchar_t *, char *, size_t);
894 char leaf[MAXNAMELEN], buf[INET6_ADDRSTRLEN];
895 struct sockaddr *ca;
896 uchar_t *b;
897
898 if (rfs4_ss_enabled == 0) {
899 return;
900 }
901
902 buf[0] = 0;
903
904 ca = (struct sockaddr *)&cp->rc_addr;
905
906 /*
907 * Convert the caller's IP address to a dotted string
908 */
909 if (ca->sa_family == AF_INET) {
910 b = (uchar_t *)&((struct sockaddr_in *)ca)->sin_addr;
911 (void) sprintf(buf, "%03d.%03d.%03d.%03d", b[0] & 0xFF,
912 b[1] & 0xFF, b[2] & 0xFF, b[3] & 0xFF);
913 } else if (ca->sa_family == AF_INET6) {
914 struct sockaddr_in6 *sin6;
915
916 sin6 = (struct sockaddr_in6 *)ca;
917 (void) kinet_ntop6((uchar_t *)&sin6->sin6_addr,
918 buf, INET6_ADDRSTRLEN);
919 }
920
921 (void) snprintf(leaf, MAXNAMELEN, "%s-%llx", buf,
922 (longlong_t)cp->rc_clientid);
923 rfs4_ss_clid_write(cp, leaf);
924 }
925
926 /*
927 * Place client information into stable storage: 2/3.
928 * DSS: distributed stable storage: the file may need to be written to
929 * multiple directories.
930 */
931 static void
932 rfs4_ss_clid_write(rfs4_client_t *cp, char *leaf)
933 {
934 rfs4_servinst_t *sip;
935
936 /*
937 * It should be sufficient to write the leaf file to (all) DSS paths
938 * associated with just this client's instance. However, since our
939 * per-instance client grouping is solely temporal, HA-NFSv4 RG
940 * failover might result in us losing DSS data.
941 *
942 * Until the client grouping is improved, we must write the DSS data
943 * to all instances' paths. Start at the current instance, and
944 * walk the list backwards to the first.
945 */
946 mutex_enter(&rfs4_servinst_lock);
947 for (sip = rfs4_cur_servinst; sip != NULL; sip = sip->prev) {
948 int i, npaths = sip->dss_npaths;
949
950 /* write the leaf file to all DSS paths */
951 for (i = 0; i < npaths; i++) {
952 rfs4_dss_path_t *dss_path = sip->dss_paths[i];
953
954 /* HA-NFSv4 path might have been failed-away from us */
955 if (dss_path == NULL)
956 continue;
957
958 rfs4_ss_clid_write_one(cp, dss_path->path, leaf);
959 }
960 }
961 mutex_exit(&rfs4_servinst_lock);
962 }
963
964 /*
965 * Place client information into stable storage: 3/3.
966 * Write the stable storage data to the requested file.
967 */
968 static void
969 rfs4_ss_clid_write_one(rfs4_client_t *cp, char *dss_path, char *leaf)
970 {
971 int ioflag;
972 int file_vers = NFS4_SS_VERSION;
973 size_t dirlen;
974 struct uio uio;
975 struct iovec iov[4];
976 char *dir;
977 rfs4_ss_pn_t *ss_pn;
978 vnode_t *vp;
979 nfs_client_id4 *cl_id4 = &(cp->rc_nfs_client);
980
981 /* allow 2 extra bytes for '/' & NUL */
1134 * for forced expiration
1135 */
1136 if (ent_sin->sin_addr.s_addr == clr_in.s_addr) {
1137 cp->rc_forced_expire = 1;
1138 }
1139 break;
1140
1141 default:
1142 /* force this assert to fail */
1143 ASSERT(clr->addr_type != clr->addr_type);
1144 }
1145 }
1146
1147 /*
1148 * This is called from nfssys() in order to clear server state
1149 * for the specified client IP Address.
1150 */
1151 void
1152 rfs4_clear_client_state(struct nfs4clrst_args *clr)
1153 {
1154 (void) rfs4_dbe_walk(rfs4_client_tab, rfs4_client_scrub, clr);
1155 }
1156
1157 /*
1158 * Used to initialize the NFSv4 server's state or database. All of
1159 * the tables are created and timers are set. Only called when NFSv4
1160 * service is provided.
1161 */
1162 void
1163 rfs4_state_init()
1164 {
1165 int start_grace;
1166 extern boolean_t rfs4_cpr_callb(void *, int);
1167 char *dss_path = NFS4_DSS_VAR_DIR;
1168 time_t start_time;
1169
1170 mutex_enter(&rfs4_state_lock);
1171
1172 /*
1173 * If the server state database has already been initialized,
1174 * skip it
1175 */
1176 if (rfs4_server_state != NULL) {
1177 mutex_exit(&rfs4_state_lock);
1178 return;
1179 }
1180
1181 rw_init(&rfs4_findclient_lock, NULL, RW_DEFAULT, NULL);
1182
1183 /*
1184 * Set the boot time. If the server
1185 * has been restarted quickly and has had the opportunity to
1186 * service clients, then the start_time needs to be bumped
1187 * regardless. A small window but it exists...
1188 */
1189 start_time = gethrestime_sec();
1190 if (rfs4_start_time < start_time)
1191 rfs4_start_time = start_time;
1192 else
1193 rfs4_start_time++;
1194
1195 /* DSS: distributed stable storage: initialise served paths list */
1196 rfs4_dss_pathlist = NULL;
1197
1198 /*
1199 * Create the first server instance, or a new one if the server has
1200 * been restarted; see above comments on rfs4_start_time. Don't
1201 * start its grace period; that will be done later, to maximise the
1202 * clients' recovery window.
1203 */
1204 start_grace = 0;
1205 rfs4_servinst_create(start_grace, 1, &dss_path);
1206
1207 /* reset the "first NFSv4 request" status */
1208 rfs4_seen_first_compound = 0;
1209
1210 /*
1211 * Add a CPR callback so that we can update client
1212 * access times to extend the lease after a suspend
1213 * and resume (using the same class as rpcmod/connmgr)
1214 */
1215 cpr_id = callb_add(rfs4_cpr_callb, 0, CB_CL_CPR_RPC, "rfs4");
1216
1217 /* set the various cache timers for table creation */
1218 if (rfs4_client_cache_time == 0)
1219 rfs4_client_cache_time = CLIENT_CACHE_TIME;
1220 if (rfs4_openowner_cache_time == 0)
1221 rfs4_openowner_cache_time = OPENOWNER_CACHE_TIME;
1222 if (rfs4_state_cache_time == 0)
1223 rfs4_state_cache_time = STATE_CACHE_TIME;
1224 if (rfs4_lo_state_cache_time == 0)
1225 rfs4_lo_state_cache_time = LO_STATE_CACHE_TIME;
1226 if (rfs4_lockowner_cache_time == 0)
1227 rfs4_lockowner_cache_time = LOCKOWNER_CACHE_TIME;
1228 if (rfs4_file_cache_time == 0)
1229 rfs4_file_cache_time = FILE_CACHE_TIME;
1230 if (rfs4_deleg_state_cache_time == 0)
1231 rfs4_deleg_state_cache_time = DELEG_STATE_CACHE_TIME;
1232
1233 /* Create the overall database to hold all server state */
1234 rfs4_server_state = rfs4_database_create(rfs4_database_debug);
1235
1236 /* Now create the individual tables */
1237 rfs4_client_cache_time *= rfs4_lease_time;
1238 rfs4_client_tab = rfs4_table_create(rfs4_server_state,
1239 "Client",
1240 rfs4_client_cache_time,
1241 2,
1242 rfs4_client_create,
1243 rfs4_client_destroy,
1244 rfs4_client_expiry,
1245 sizeof (rfs4_client_t),
1246 TABSIZE,
1247 MAXTABSZ/8, 100);
1248 rfs4_nfsclnt_idx = rfs4_index_create(rfs4_client_tab,
1249 "nfs_client_id4", nfsclnt_hash,
1250 nfsclnt_compare, nfsclnt_mkkey,
1251 TRUE);
1252 rfs4_clientid_idx = rfs4_index_create(rfs4_client_tab,
1253 "client_id", clientid_hash,
1254 clientid_compare, clientid_mkkey,
1255 FALSE);
1256
1257 rfs4_clntip_cache_time = 86400 * 365; /* about a year */
1258 rfs4_clntip_tab = rfs4_table_create(rfs4_server_state,
1259 "ClntIP",
1260 rfs4_clntip_cache_time,
1261 1,
1262 rfs4_clntip_create,
1263 rfs4_clntip_destroy,
1264 rfs4_clntip_expiry,
1265 sizeof (rfs4_clntip_t),
1266 TABSIZE,
1267 MAXTABSZ, 100);
1268 rfs4_clntip_idx = rfs4_index_create(rfs4_clntip_tab,
1269 "client_ip", clntip_hash,
1270 clntip_compare, clntip_mkkey,
1271 TRUE);
1272
1273 rfs4_openowner_cache_time *= rfs4_lease_time;
1274 rfs4_openowner_tab = rfs4_table_create(rfs4_server_state,
1275 "OpenOwner",
1276 rfs4_openowner_cache_time,
1277 1,
1278 rfs4_openowner_create,
1279 rfs4_openowner_destroy,
1280 rfs4_openowner_expiry,
1281 sizeof (rfs4_openowner_t),
1282 TABSIZE,
1283 MAXTABSZ, 100);
1284 rfs4_openowner_idx = rfs4_index_create(rfs4_openowner_tab,
1285 "open_owner4", openowner_hash,
1286 openowner_compare,
1287 openowner_mkkey, TRUE);
1288
1289 rfs4_state_cache_time *= rfs4_lease_time;
1290 rfs4_state_tab = rfs4_table_create(rfs4_server_state,
1291 "OpenStateID",
1292 rfs4_state_cache_time,
1293 3,
1294 rfs4_state_create,
1295 rfs4_state_destroy,
1296 rfs4_state_expiry,
1297 sizeof (rfs4_state_t),
1298 TABSIZE,
1299 MAXTABSZ, 100);
1300
1301 rfs4_state_owner_file_idx = rfs4_index_create(rfs4_state_tab,
1302 "Openowner-File",
1303 state_owner_file_hash,
1304 state_owner_file_compare,
1305 state_owner_file_mkkey, TRUE);
1306
1307 rfs4_state_idx = rfs4_index_create(rfs4_state_tab,
1308 "State-id", state_hash,
1309 state_compare, state_mkkey, FALSE);
1310
1311 rfs4_state_file_idx = rfs4_index_create(rfs4_state_tab,
1312 "File", state_file_hash,
1313 state_file_compare, state_file_mkkey,
1314 FALSE);
1315
1316 rfs4_lo_state_cache_time *= rfs4_lease_time;
1317 rfs4_lo_state_tab = rfs4_table_create(rfs4_server_state,
1318 "LockStateID",
1319 rfs4_lo_state_cache_time,
1320 2,
1321 rfs4_lo_state_create,
1322 rfs4_lo_state_destroy,
1323 rfs4_lo_state_expiry,
1324 sizeof (rfs4_lo_state_t),
1325 TABSIZE,
1326 MAXTABSZ, 100);
1327
1328 rfs4_lo_state_owner_idx = rfs4_index_create(rfs4_lo_state_tab,
1329 "lockownerxstate",
1330 lo_state_lo_hash,
1331 lo_state_lo_compare,
1332 lo_state_lo_mkkey, TRUE);
1333
1334 rfs4_lo_state_idx = rfs4_index_create(rfs4_lo_state_tab,
1335 "State-id",
1336 lo_state_hash, lo_state_compare,
1337 lo_state_mkkey, FALSE);
1338
1339 rfs4_lockowner_cache_time *= rfs4_lease_time;
1340
1341 rfs4_lockowner_tab = rfs4_table_create(rfs4_server_state,
1342 "Lockowner",
1343 rfs4_lockowner_cache_time,
1344 2,
1345 rfs4_lockowner_create,
1346 rfs4_lockowner_destroy,
1347 rfs4_lockowner_expiry,
1348 sizeof (rfs4_lockowner_t),
1349 TABSIZE,
1350 MAXTABSZ, 100);
1351
1352 rfs4_lockowner_idx = rfs4_index_create(rfs4_lockowner_tab,
1353 "lock_owner4", lockowner_hash,
1354 lockowner_compare,
1355 lockowner_mkkey, TRUE);
1356
1357 rfs4_lockowner_pid_idx = rfs4_index_create(rfs4_lockowner_tab,
1358 "pid", pid_hash,
1359 pid_compare, pid_mkkey,
1360 FALSE);
1361
1362 rfs4_file_cache_time *= rfs4_lease_time;
1363 rfs4_file_tab = rfs4_table_create(rfs4_server_state,
1364 "File",
1365 rfs4_file_cache_time,
1366 1,
1367 rfs4_file_create,
1368 rfs4_file_destroy,
1369 NULL,
1370 sizeof (rfs4_file_t),
1371 TABSIZE,
1372 MAXTABSZ, -1);
1373
1374 rfs4_file_idx = rfs4_index_create(rfs4_file_tab,
1375 "Filehandle", file_hash,
1376 file_compare, file_mkkey, TRUE);
1377
1378 rfs4_deleg_state_cache_time *= rfs4_lease_time;
1379 rfs4_deleg_state_tab = rfs4_table_create(rfs4_server_state,
1380 "DelegStateID",
1381 rfs4_deleg_state_cache_time,
1382 2,
1383 rfs4_deleg_state_create,
1384 rfs4_deleg_state_destroy,
1385 rfs4_deleg_state_expiry,
1386 sizeof (rfs4_deleg_state_t),
1387 TABSIZE,
1388 MAXTABSZ, 100);
1389 rfs4_deleg_idx = rfs4_index_create(rfs4_deleg_state_tab,
1390 "DelegByFileClient",
1391 deleg_hash,
1392 deleg_compare,
1393 deleg_mkkey, TRUE);
1394
1395 rfs4_deleg_state_idx = rfs4_index_create(rfs4_deleg_state_tab,
1396 "DelegState",
1397 deleg_state_hash,
1398 deleg_state_compare,
1399 deleg_state_mkkey, FALSE);
1400
1401 /*
1402 * Init the stable storage.
1403 */
1404 rfs4_ss_init();
1405
1406 rfs4_client_clrst = rfs4_clear_client_state;
1407
1408 mutex_exit(&rfs4_state_lock);
1409 }
1410
1411
1412 /*
1413 * Used at server shutdown to cleanup all of the NFSv4 server's structures
1414 * and other state.
1415 */
1416 void
1417 rfs4_state_fini()
1418 {
1419 rfs4_database_t *dbp;
1420
1421 mutex_enter(&rfs4_state_lock);
1422
1423 if (rfs4_server_state == NULL) {
1424 mutex_exit(&rfs4_state_lock);
1425 return;
1426 }
1427
1428 rfs4_client_clrst = NULL;
1429
1430 rfs4_set_deleg_policy(SRV_NEVER_DELEGATE);
1431 dbp = rfs4_server_state;
1432 rfs4_server_state = NULL;
1433
1434 /*
1435 * Cleanup the CPR callback.
1436 */
1437 if (cpr_id)
1438 (void) callb_delete(cpr_id);
1439
1440 rw_destroy(&rfs4_findclient_lock);
1441
1442 /* First stop all of the reaper threads in the database */
1443 rfs4_database_shutdown(dbp);
1444 /* clean up any dangling stable storage structures */
1445 rfs4_ss_fini();
1446 /* Now actually destroy/release the database and its tables */
1447 rfs4_database_destroy(dbp);
1448
1449 /* Reset the cache timers for next time */
1450 rfs4_client_cache_time = 0;
1451 rfs4_openowner_cache_time = 0;
1452 rfs4_state_cache_time = 0;
1453 rfs4_lo_state_cache_time = 0;
1454 rfs4_lockowner_cache_time = 0;
1455 rfs4_file_cache_time = 0;
1456 rfs4_deleg_state_cache_time = 0;
1457
1458 mutex_exit(&rfs4_state_lock);
1459
1460 /* destroy server instances and current instance ptr */
1461 rfs4_servinst_destroy_all();
1462
1463 /* reset the "first NFSv4 request" status */
1464 rfs4_seen_first_compound = 0;
1465
1466 /* DSS: distributed stable storage */
1467 nvlist_free(rfs4_dss_oldpaths);
1468 nvlist_free(rfs4_dss_paths);
1469 rfs4_dss_paths = rfs4_dss_oldpaths = NULL;
1470 }
1471
1472 typedef union {
1473 struct {
1474 uint32_t start_time;
1475 uint32_t c_id;
1476 } impl_id;
1477 clientid4 id4;
1478 } cid;
1479
1480 static int foreign_stateid(stateid_t *id);
1481 static int foreign_clientid(cid *cidp);
1482 static void embed_nodeid(cid *cidp);
1483
1484 typedef union {
1485 struct {
1486 uint32_t c_id;
1487 uint32_t gen_num;
1488 } cv_impl;
1489 verifier4 confirm_verf;
1564 * If the sysadmin has used clear_locks for this
1565 * entry then forced_expire will be set and we
1566 * want this entry to be reaped. Or the entry
1567 * has exceeded its lease period.
1568 */
1569 cp_expired = (cp->rc_forced_expire ||
1570 (gethrestime_sec() - cp->rc_last_access
1571 > rfs4_lease_time));
1572
1573 if (!cp->rc_ss_remove && cp_expired)
1574 cp->rc_ss_remove = 1;
1575 return (cp_expired);
1576 }
1577
1578 /*
1579 * Remove the leaf file from all distributed stable storage paths.
1580 */
1581 static void
1582 rfs4_dss_remove_cpleaf(rfs4_client_t *cp)
1583 {
1584 rfs4_servinst_t *sip;
1585 char *leaf = cp->rc_ss_pn->leaf;
1586
1587 /*
1588 * since the state files are written to all DSS
1589 * paths we must remove this leaf file instance
1590 * from all server instances.
1591 */
1592
1593 mutex_enter(&rfs4_servinst_lock);
1594 for (sip = rfs4_cur_servinst; sip != NULL; sip = sip->prev) {
1595 /* remove the leaf file associated with this server instance */
1596 rfs4_dss_remove_leaf(sip, NFS4_DSS_STATE_LEAF, leaf);
1597 }
1598 mutex_exit(&rfs4_servinst_lock);
1599 }
1600
1601 static void
1602 rfs4_dss_remove_leaf(rfs4_servinst_t *sip, char *dir_leaf, char *leaf)
1603 {
1604 int i, npaths = sip->dss_npaths;
1605
1606 for (i = 0; i < npaths; i++) {
1607 rfs4_dss_path_t *dss_path = sip->dss_paths[i];
1608 char *path, *dir;
1609 size_t pathlen;
1610
1611 /* the HA-NFSv4 path might have been failed-over away from us */
1612 if (dss_path == NULL)
1613 continue;
1614
1615 dir = dss_path->path;
1616
1617 /* allow 3 extra bytes for two '/' & a NUL */
1618 pathlen = strlen(dir) + strlen(dir_leaf) + strlen(leaf) + 3;
1646 if (cp->rc_ss_remove)
1647 rfs4_dss_remove_cpleaf(cp);
1648 rfs4_ss_pnfree(cp->rc_ss_pn);
1649 }
1650
1651 /* Free the client supplied client id */
1652 kmem_free(cp->rc_nfs_client.id_val, cp->rc_nfs_client.id_len);
1653
1654 if (cp->rc_sysidt != LM_NOSYSID)
1655 lm_free_sysidt(cp->rc_sysidt);
1656 }
1657
1658 static bool_t
1659 rfs4_client_create(rfs4_entry_t u_entry, void *arg)
1660 {
1661 rfs4_client_t *cp = (rfs4_client_t *)u_entry;
1662 nfs_client_id4 *client = (nfs_client_id4 *)arg;
1663 struct sockaddr *ca;
1664 cid *cidp;
1665 scid_confirm_verf *scvp;
1666
1667 /* Get a clientid to give to the client */
1668 cidp = (cid *)&cp->rc_clientid;
1669 cidp->impl_id.start_time = rfs4_start_time;
1670 cidp->impl_id.c_id = (uint32_t)rfs4_dbe_getid(cp->rc_dbe);
1671
1672 /* If we are booted as a cluster node, embed our nodeid */
1673 if (cluster_bootflags & CLUSTER_BOOTED)
1674 embed_nodeid(cidp);
1675
1676 /* Allocate and copy client's client id value */
1677 cp->rc_nfs_client.id_val = kmem_alloc(client->id_len, KM_SLEEP);
1678 cp->rc_nfs_client.id_len = client->id_len;
1679 bcopy(client->id_val, cp->rc_nfs_client.id_val, client->id_len);
1680 cp->rc_nfs_client.verifier = client->verifier;
1681
1682 /* Copy client's IP address */
1683 ca = client->cl_addr;
1684 if (ca->sa_family == AF_INET)
1685 bcopy(ca, &cp->rc_addr, sizeof (struct sockaddr_in));
1686 else if (ca->sa_family == AF_INET6)
1687 bcopy(ca, &cp->rc_addr, sizeof (struct sockaddr_in6));
1688 cp->rc_nfs_client.cl_addr = (struct sockaddr *)&cp->rc_addr;
1689
1707
1708 cp->rc_cr_set = NULL;
1709
1710 cp->rc_sysidt = LM_NOSYSID;
1711
1712 list_create(&cp->rc_openownerlist, sizeof (rfs4_openowner_t),
1713 offsetof(rfs4_openowner_t, ro_node));
1714
1715 /* set up the callback control structure */
1716 cp->rc_cbinfo.cb_state = CB_UNINIT;
1717 mutex_init(cp->rc_cbinfo.cb_lock, NULL, MUTEX_DEFAULT, NULL);
1718 cv_init(cp->rc_cbinfo.cb_cv, NULL, CV_DEFAULT, NULL);
1719 cv_init(cp->rc_cbinfo.cb_cv_nullcaller, NULL, CV_DEFAULT, NULL);
1720
1721 /*
1722 * Associate the client_t with the current server instance.
1723 * The hold is solely to satisfy the calling requirement of
1724 * rfs4_servinst_assign(). In this case it's not strictly necessary.
1725 */
1726 rfs4_dbe_hold(cp->rc_dbe);
1727 rfs4_servinst_assign(cp, rfs4_cur_servinst);
1728 rfs4_dbe_rele(cp->rc_dbe);
1729
1730 return (TRUE);
1731 }
1732
1733 /*
1734 * Caller wants to generate/update the setclientid_confirm verifier
1735 * associated with a client. This is done during the SETCLIENTID
1736 * processing.
1737 */
1738 void
1739 rfs4_client_scv_next(rfs4_client_t *cp)
1740 {
1741 scid_confirm_verf *scvp;
1742
1743 /* Init the value for the SETCLIENTID_CONFIRM verifier */
1744 scvp = (scid_confirm_verf *)&cp->rc_confirm_verf;
1745 scvp->cv_impl.gen_num++;
1746 }
1747
1748 void
1749 rfs4_client_rele(rfs4_client_t *cp)
1750 {
1751 rfs4_dbe_rele(cp->rc_dbe);
1752 }
1753
1754 rfs4_client_t *
1755 rfs4_findclient(nfs_client_id4 *client, bool_t *create, rfs4_client_t *oldcp)
1756 {
1757 rfs4_client_t *cp;
1758
1759
1760 if (oldcp) {
1761 rw_enter(&rfs4_findclient_lock, RW_WRITER);
1762 rfs4_dbe_hide(oldcp->rc_dbe);
1763 } else {
1764 rw_enter(&rfs4_findclient_lock, RW_READER);
1765 }
1766
1767 cp = (rfs4_client_t *)rfs4_dbsearch(rfs4_nfsclnt_idx, client,
1768 create, (void *)client, RFS4_DBS_VALID);
1769
1770 if (oldcp)
1771 rfs4_dbe_unhide(oldcp->rc_dbe);
1772
1773 rw_exit(&rfs4_findclient_lock);
1774
1775 return (cp);
1776 }
1777
1778 rfs4_client_t *
1779 rfs4_findclient_by_id(clientid4 clientid, bool_t find_unconfirmed)
1780 {
1781 rfs4_client_t *cp;
1782 bool_t create = FALSE;
1783 cid *cidp = (cid *)&clientid;
1784
1785 /* If we're a cluster and the nodeid isn't right, short-circuit */
1786 if (cluster_bootflags & CLUSTER_BOOTED && foreign_clientid(cidp))
1787 return (NULL);
1788
1789 rw_enter(&rfs4_findclient_lock, RW_READER);
1790
1791 cp = (rfs4_client_t *)rfs4_dbsearch(rfs4_clientid_idx, &clientid,
1792 &create, NULL, RFS4_DBS_VALID);
1793
1794 rw_exit(&rfs4_findclient_lock);
1795
1796 if (cp && cp->rc_need_confirm && find_unconfirmed == FALSE) {
1797 rfs4_client_rele(cp);
1798 return (NULL);
1799 } else {
1800 return (cp);
1801 }
1802 }
1803
1804 static uint32_t
1805 clntip_hash(void *key)
1806 {
1807 struct sockaddr *addr = key;
1808 int i, len = 0;
1809 uint32_t hash = 0;
1810 char *ptr;
1811
1812 if (addr->sa_family == AF_INET) {
1813 struct sockaddr_in *a = (struct sockaddr_in *)addr;
1814 len = sizeof (struct in_addr);
1882 {
1883 rfs4_clntip_t *cp = (rfs4_clntip_t *)u_entry;
1884 struct sockaddr *ca = (struct sockaddr *)arg;
1885
1886 /* Copy client's IP address */
1887 if (ca->sa_family == AF_INET)
1888 bcopy(ca, &cp->ri_addr, sizeof (struct sockaddr_in));
1889 else if (ca->sa_family == AF_INET6)
1890 bcopy(ca, &cp->ri_addr, sizeof (struct sockaddr_in6));
1891 else
1892 return (FALSE);
1893 cp->ri_no_referrals = 1;
1894
1895 return (TRUE);
1896 }
1897
1898 rfs4_clntip_t *
1899 rfs4_find_clntip(struct sockaddr *addr, bool_t *create)
1900 {
1901 rfs4_clntip_t *cp;
1902
1903 rw_enter(&rfs4_findclient_lock, RW_READER);
1904
1905 cp = (rfs4_clntip_t *)rfs4_dbsearch(rfs4_clntip_idx, addr,
1906 create, addr, RFS4_DBS_VALID);
1907
1908 rw_exit(&rfs4_findclient_lock);
1909
1910 return (cp);
1911 }
1912
1913 void
1914 rfs4_invalidate_clntip(struct sockaddr *addr)
1915 {
1916 rfs4_clntip_t *cp;
1917 bool_t create = FALSE;
1918
1919 rw_enter(&rfs4_findclient_lock, RW_READER);
1920
1921 cp = (rfs4_clntip_t *)rfs4_dbsearch(rfs4_clntip_idx, addr,
1922 &create, NULL, RFS4_DBS_VALID);
1923 if (cp == NULL) {
1924 rw_exit(&rfs4_findclient_lock);
1925 return;
1926 }
1927 rfs4_dbe_invalidate(cp->ri_dbe);
1928 rfs4_dbe_rele(cp->ri_dbe);
1929
1930 rw_exit(&rfs4_findclient_lock);
1931 }
1932
1933 bool_t
1934 rfs4_lease_expired(rfs4_client_t *cp)
1935 {
1936 bool_t rc;
1937
1938 rfs4_dbe_lock(cp->rc_dbe);
1939
1940 /*
1941 * If the admin has executed clear_locks for this
1942 * client id, force expire will be set, so no need
1943 * to calculate anything because it's "outa here".
1944 */
1945 if (cp->rc_forced_expire) {
1946 rc = TRUE;
1947 } else {
1948 rc = (gethrestime_sec() - cp->rc_last_access > rfs4_lease_time);
1949 }
1950
2058
2059 /* Free the lock owner id */
2060 kmem_free(oo->ro_owner.owner_val, oo->ro_owner.owner_len);
2061 }
2062
2063 void
2064 rfs4_openowner_rele(rfs4_openowner_t *oo)
2065 {
2066 rfs4_dbe_rele(oo->ro_dbe);
2067 }
2068
2069 static bool_t
2070 rfs4_openowner_create(rfs4_entry_t u_entry, void *arg)
2071 {
2072 rfs4_openowner_t *oo = (rfs4_openowner_t *)u_entry;
2073 rfs4_openowner_t *argp = (rfs4_openowner_t *)arg;
2074 open_owner4 *openowner = &argp->ro_owner;
2075 seqid4 seqid = argp->ro_open_seqid;
2076 rfs4_client_t *cp;
2077 bool_t create = FALSE;
2078
2079 rw_enter(&rfs4_findclient_lock, RW_READER);
2080
2081 cp = (rfs4_client_t *)rfs4_dbsearch(rfs4_clientid_idx,
2082 &openowner->clientid,
2083 &create, NULL, RFS4_DBS_VALID);
2084
2085 rw_exit(&rfs4_findclient_lock);
2086
2087 if (cp == NULL)
2088 return (FALSE);
2089
2090 oo->ro_reply_fh.nfs_fh4_len = 0;
2091 oo->ro_reply_fh.nfs_fh4_val = NULL;
2092
2093 oo->ro_owner.clientid = openowner->clientid;
2094 oo->ro_owner.owner_val =
2095 kmem_alloc(openowner->owner_len, KM_SLEEP);
2096
2097 bcopy(openowner->owner_val,
2098 oo->ro_owner.owner_val, openowner->owner_len);
2099
2100 oo->ro_owner.owner_len = openowner->owner_len;
2101
2102 oo->ro_need_confirm = TRUE;
2103
2104 rfs4_sw_init(&oo->ro_sw);
2105
2107 bzero(&oo->ro_reply, sizeof (nfs_resop4));
2108 oo->ro_client = cp;
2109 oo->ro_cr_set = NULL;
2110
2111 list_create(&oo->ro_statelist, sizeof (rfs4_state_t),
2112 offsetof(rfs4_state_t, rs_node));
2113
2114 /* Insert openowner into client's open owner list */
2115 rfs4_dbe_lock(cp->rc_dbe);
2116 list_insert_tail(&cp->rc_openownerlist, oo);
2117 rfs4_dbe_unlock(cp->rc_dbe);
2118
2119 return (TRUE);
2120 }
2121
2122 rfs4_openowner_t *
2123 rfs4_findopenowner(open_owner4 *openowner, bool_t *create, seqid4 seqid)
2124 {
2125 rfs4_openowner_t *oo;
2126 rfs4_openowner_t arg;
2127
2128 arg.ro_owner = *openowner;
2129 arg.ro_open_seqid = seqid;
2130 oo = (rfs4_openowner_t *)rfs4_dbsearch(rfs4_openowner_idx, openowner,
2131 create, &arg, RFS4_DBS_VALID);
2132
2133 return (oo);
2134 }
2135
2136 void
2137 rfs4_update_open_sequence(rfs4_openowner_t *oo)
2138 {
2139
2140 rfs4_dbe_lock(oo->ro_dbe);
2141
2142 oo->ro_open_seqid++;
2143
2144 rfs4_dbe_unlock(oo->ro_dbe);
2145 }
2146
2147 void
2148 rfs4_update_open_resp(rfs4_openowner_t *oo, nfs_resop4 *resp, nfs_fh4 *fh)
2149 {
2150
2253 }
2254
2255 /* ARGSUSED */
2256 static bool_t
2257 rfs4_lockowner_expiry(rfs4_entry_t u_entry)
2258 {
2259 /*
2260 * Since expiry is called with no other references on
2261 * this struct, go ahead and have it removed.
2262 */
2263 return (TRUE);
2264 }
2265
2266 static bool_t
2267 rfs4_lockowner_create(rfs4_entry_t u_entry, void *arg)
2268 {
2269 rfs4_lockowner_t *lo = (rfs4_lockowner_t *)u_entry;
2270 lock_owner4 *lockowner = (lock_owner4 *)arg;
2271 rfs4_client_t *cp;
2272 bool_t create = FALSE;
2273
2274 rw_enter(&rfs4_findclient_lock, RW_READER);
2275
2276 cp = (rfs4_client_t *)rfs4_dbsearch(rfs4_clientid_idx,
2277 &lockowner->clientid,
2278 &create, NULL, RFS4_DBS_VALID);
2279
2280 rw_exit(&rfs4_findclient_lock);
2281
2282 if (cp == NULL)
2283 return (FALSE);
2284
2285 /* Reference client */
2286 lo->rl_client = cp;
2287 lo->rl_owner.clientid = lockowner->clientid;
2288 lo->rl_owner.owner_val = kmem_alloc(lockowner->owner_len, KM_SLEEP);
2289 bcopy(lockowner->owner_val, lo->rl_owner.owner_val,
2290 lockowner->owner_len);
2291 lo->rl_owner.owner_len = lockowner->owner_len;
2292 lo->rl_pid = rfs4_dbe_getid(lo->rl_dbe);
2293
2294 return (TRUE);
2295 }
2296
2297 rfs4_lockowner_t *
2298 rfs4_findlockowner(lock_owner4 *lockowner, bool_t *create)
2299 {
2300 rfs4_lockowner_t *lo;
2301
2302 lo = (rfs4_lockowner_t *)rfs4_dbsearch(rfs4_lockowner_idx, lockowner,
2303 create, lockowner, RFS4_DBS_VALID);
2304
2305 return (lo);
2306 }
2307
2308 rfs4_lockowner_t *
2309 rfs4_findlockowner_by_pid(pid_t pid)
2310 {
2311 rfs4_lockowner_t *lo;
2312 bool_t create = FALSE;
2313
2314 lo = (rfs4_lockowner_t *)rfs4_dbsearch(rfs4_lockowner_pid_idx,
2315 (void *)(uintptr_t)pid, &create, NULL, RFS4_DBS_VALID);
2316
2317 return (lo);
2318 }
2319
2320
2321 static uint32_t
2322 file_hash(void *key)
2323 {
2324 return (ADDRHASH(key));
2325 }
2326
2327 static void *
2328 file_mkkey(rfs4_entry_t u_entry)
2329 {
2330 rfs4_file_t *fp = (rfs4_file_t *)u_entry;
2331
2332 return (fp->rf_vp);
2333 }
2334
2405
2406 mutex_init(fp->rf_dinfo.rd_recall_lock, NULL, MUTEX_DEFAULT, NULL);
2407 cv_init(fp->rf_dinfo.rd_recall_cv, NULL, CV_DEFAULT, NULL);
2408
2409 fp->rf_dinfo.rd_dtype = OPEN_DELEGATE_NONE;
2410
2411 rw_init(&fp->rf_file_rwlock, NULL, RW_DEFAULT, NULL);
2412
2413 mutex_enter(&vp->v_vsd_lock);
2414 VERIFY(vsd_set(vp, nfs4_srv_vkey, (void *)fp) == 0);
2415 mutex_exit(&vp->v_vsd_lock);
2416
2417 return (TRUE);
2418 }
2419
2420 rfs4_file_t *
2421 rfs4_findfile(vnode_t *vp, nfs_fh4 *fh, bool_t *create)
2422 {
2423 rfs4_file_t *fp;
2424 rfs4_fcreate_arg arg;
2425
2426 arg.vp = vp;
2427 arg.fh = fh;
2428
2429 if (*create == TRUE)
2430 fp = (rfs4_file_t *)rfs4_dbsearch(rfs4_file_idx, vp, create,
2431 &arg, RFS4_DBS_VALID);
2432 else {
2433 mutex_enter(&vp->v_vsd_lock);
2434 fp = (rfs4_file_t *)vsd_get(vp, nfs4_srv_vkey);
2435 if (fp) {
2436 rfs4_dbe_lock(fp->rf_dbe);
2437 if (rfs4_dbe_is_invalid(fp->rf_dbe) ||
2438 (rfs4_dbe_refcnt(fp->rf_dbe) == 0)) {
2439 rfs4_dbe_unlock(fp->rf_dbe);
2440 fp = NULL;
2441 } else {
2442 rfs4_dbe_hold(fp->rf_dbe);
2443 rfs4_dbe_unlock(fp->rf_dbe);
2444 }
2445 }
2446 mutex_exit(&vp->v_vsd_lock);
2447 }
2448 return (fp);
2449 }
2450
2451 /*
2452 * Find a file in the db and once it is located, take the rw lock.
2453 * Need to check the vnode pointer and if it does not exist (it was
2454 * removed between the db location and check) redo the find. This
2455 * assumes that a file struct that has a NULL vnode pointer is marked
2456 * at 'invalid' and will not be found in the db the second time
2457 * around.
2458 */
2459 rfs4_file_t *
2460 rfs4_findfile_withlock(vnode_t *vp, nfs_fh4 *fh, bool_t *create)
2461 {
2462 rfs4_file_t *fp;
2463 rfs4_fcreate_arg arg;
2464 bool_t screate = *create;
2465
2466 if (screate == FALSE) {
2467 mutex_enter(&vp->v_vsd_lock);
2468 fp = (rfs4_file_t *)vsd_get(vp, nfs4_srv_vkey);
2469 if (fp) {
2470 rfs4_dbe_lock(fp->rf_dbe);
2471 if (rfs4_dbe_is_invalid(fp->rf_dbe) ||
2472 (rfs4_dbe_refcnt(fp->rf_dbe) == 0)) {
2473 rfs4_dbe_unlock(fp->rf_dbe);
2474 mutex_exit(&vp->v_vsd_lock);
2475 fp = NULL;
2476 } else {
2477 rfs4_dbe_hold(fp->rf_dbe);
2478 rfs4_dbe_unlock(fp->rf_dbe);
2479 mutex_exit(&vp->v_vsd_lock);
2480 rw_enter(&fp->rf_file_rwlock, RW_WRITER);
2481 if (fp->rf_vp == NULL) {
2482 rw_exit(&fp->rf_file_rwlock);
2483 rfs4_file_rele(fp);
2484 fp = NULL;
2485 }
2486 }
2487 } else {
2488 mutex_exit(&vp->v_vsd_lock);
2489 }
2490 } else {
2491 retry:
2492 arg.vp = vp;
2493 arg.fh = fh;
2494
2495 fp = (rfs4_file_t *)rfs4_dbsearch(rfs4_file_idx, vp, create,
2496 &arg, RFS4_DBS_VALID);
2497 if (fp != NULL) {
2498 rw_enter(&fp->rf_file_rwlock, RW_WRITER);
2499 if (fp->rf_vp == NULL) {
2500 rw_exit(&fp->rf_file_rwlock);
2501 rfs4_file_rele(fp);
2502 *create = screate;
2503 goto retry;
2504 }
2505 }
2506 }
2507
2508 return (fp);
2509 }
2510
2511 static uint32_t
2512 lo_state_hash(void *key)
2513 {
2514 stateid_t *id = key;
2515
2516 return (id->bits.ident+id->bits.pid);
2631 list_insert_tail(&sp->rs_lostatelist, lsp);
2632 rfs4_dbe_hold(sp->rs_dbe);
2633 rfs4_dbe_unlock(sp->rs_dbe);
2634
2635 return (TRUE);
2636 }
2637
2638 void
2639 rfs4_lo_state_rele(rfs4_lo_state_t *lsp, bool_t unlock_fp)
2640 {
2641 if (unlock_fp == TRUE)
2642 rw_exit(&lsp->rls_state->rs_finfo->rf_file_rwlock);
2643 rfs4_dbe_rele(lsp->rls_dbe);
2644 }
2645
2646 static rfs4_lo_state_t *
2647 rfs4_findlo_state(stateid_t *id, bool_t lock_fp)
2648 {
2649 rfs4_lo_state_t *lsp;
2650 bool_t create = FALSE;
2651
2652 lsp = (rfs4_lo_state_t *)rfs4_dbsearch(rfs4_lo_state_idx, id,
2653 &create, NULL, RFS4_DBS_VALID);
2654 if (lock_fp == TRUE && lsp != NULL)
2655 rw_enter(&lsp->rls_state->rs_finfo->rf_file_rwlock, RW_READER);
2656
2657 return (lsp);
2658 }
2659
2660
2661 static uint32_t
2662 lo_state_lo_hash(void *key)
2663 {
2664 rfs4_lo_state_t *lsp = key;
2665
2666 return (ADDRHASH(lsp->rls_locker) ^ ADDRHASH(lsp->rls_state));
2667 }
2668
2669 static bool_t
2670 lo_state_lo_compare(rfs4_entry_t u_entry, void *key)
2671 {
2672 rfs4_lo_state_t *lsp = (rfs4_lo_state_t *)u_entry;
2673 rfs4_lo_state_t *keyp = key;
2674
2675 return (keyp->rls_locker == lsp->rls_locker &&
2676 keyp->rls_state == lsp->rls_state);
2677 }
2678
2679 static void *
2680 lo_state_lo_mkkey(rfs4_entry_t u_entry)
2681 {
2682 return (u_entry);
2683 }
2684
2685 rfs4_lo_state_t *
2686 rfs4_findlo_state_by_owner(rfs4_lockowner_t *lo, rfs4_state_t *sp,
2687 bool_t *create)
2688 {
2689 rfs4_lo_state_t *lsp;
2690 rfs4_lo_state_t arg;
2691
2692 arg.rls_locker = lo;
2693 arg.rls_state = sp;
2694
2695 lsp = (rfs4_lo_state_t *)rfs4_dbsearch(rfs4_lo_state_owner_idx, &arg,
2696 create, &arg, RFS4_DBS_VALID);
2697
2698 return (lsp);
2699 }
2700
2701 static stateid_t
2702 get_stateid(id_t eid)
2703 {
2704 stateid_t id;
2705
2706 id.bits.boottime = rfs4_start_time;
2707 id.bits.ident = eid;
2708 id.bits.chgseq = 0;
2709 id.bits.type = 0;
2710 id.bits.pid = 0;
2711
2712 /*
2713 * If we are booted as a cluster node, embed our nodeid.
2714 * We've already done sanity checks in rfs4_client_create() so no
2715 * need to repeat them here.
2716 */
2717 id.bits.clnodeid = (cluster_bootflags & CLUSTER_BOOTED) ?
2718 clconf_get_nodeid() : 0;
2719
2720 return (id);
2721 }
2722
2723 /*
2724 * For use only when booted as a cluster node.
2725 * Returns TRUE if the embedded nodeid indicates that this stateid was
2726 * generated on another node.
2942 rfs4_deleg_state_destroy(rfs4_entry_t u_entry)
2943 {
2944 rfs4_deleg_state_t *dsp = (rfs4_deleg_state_t *)u_entry;
2945
2946 /* return delegation if necessary */
2947 rfs4_return_deleg(dsp, FALSE);
2948
2949 /* Were done with the file */
2950 rfs4_file_rele(dsp->rds_finfo);
2951 dsp->rds_finfo = NULL;
2952
2953 /* And now with the openowner */
2954 rfs4_client_rele(dsp->rds_client);
2955 dsp->rds_client = NULL;
2956 }
2957
2958 rfs4_deleg_state_t *
2959 rfs4_finddeleg(rfs4_state_t *sp, bool_t *create)
2960 {
2961 rfs4_deleg_state_t ds, *dsp;
2962
2963 ds.rds_client = sp->rs_owner->ro_client;
2964 ds.rds_finfo = sp->rs_finfo;
2965
2966 dsp = (rfs4_deleg_state_t *)rfs4_dbsearch(rfs4_deleg_idx, &ds,
2967 create, &ds, RFS4_DBS_VALID);
2968
2969 return (dsp);
2970 }
2971
2972 rfs4_deleg_state_t *
2973 rfs4_finddelegstate(stateid_t *id)
2974 {
2975 rfs4_deleg_state_t *dsp;
2976 bool_t create = FALSE;
2977
2978 dsp = (rfs4_deleg_state_t *)rfs4_dbsearch(rfs4_deleg_state_idx, id,
2979 &create, NULL, RFS4_DBS_VALID);
2980
2981 return (dsp);
2982 }
2983
2984 void
2985 rfs4_deleg_state_rele(rfs4_deleg_state_t *dsp)
2986 {
2987 rfs4_dbe_rele(dsp->rds_dbe);
2988 }
2989
2990 void
2991 rfs4_update_lock_sequence(rfs4_lo_state_t *lsp)
2992 {
2993
2994 rfs4_dbe_lock(lsp->rls_dbe);
2995
2996 /*
2997 * If we are skipping sequence id checking, this means that
2998 * this is the first lock request and therefore the sequence
2999 * id does not need to be updated. This only happens on the
3078 if (sp->rs_closed == TRUE)
3079 return (FALSE);
3080
3081 return (fp == sp->rs_finfo);
3082 }
3083
3084 static void *
3085 state_file_mkkey(rfs4_entry_t u_entry)
3086 {
3087 rfs4_state_t *sp = (rfs4_state_t *)u_entry;
3088
3089 return (sp->rs_finfo);
3090 }
3091
3092 rfs4_state_t *
3093 rfs4_findstate_by_owner_file(rfs4_openowner_t *oo, rfs4_file_t *fp,
3094 bool_t *create)
3095 {
3096 rfs4_state_t *sp;
3097 rfs4_state_t key;
3098
3099 key.rs_owner = oo;
3100 key.rs_finfo = fp;
3101
3102 sp = (rfs4_state_t *)rfs4_dbsearch(rfs4_state_owner_file_idx, &key,
3103 create, &key, RFS4_DBS_VALID);
3104
3105 return (sp);
3106 }
3107
3108 /* This returns ANY state struct that refers to this file */
3109 static rfs4_state_t *
3110 rfs4_findstate_by_file(rfs4_file_t *fp)
3111 {
3112 bool_t create = FALSE;
3113
3114 return ((rfs4_state_t *)rfs4_dbsearch(rfs4_state_file_idx, fp,
3115 &create, fp, RFS4_DBS_VALID));
3116 }
3117
3118 static bool_t
3119 rfs4_state_expiry(rfs4_entry_t u_entry)
3120 {
3121 rfs4_state_t *sp = (rfs4_state_t *)u_entry;
3122
3123 if (rfs4_dbe_is_invalid(sp->rs_dbe))
3124 return (TRUE);
3125
3126 if (sp->rs_closed == TRUE &&
3127 ((gethrestime_sec() - rfs4_dbe_get_timerele(sp->rs_dbe))
3128 > rfs4_lease_time))
3129 return (TRUE);
3130
3131 return ((gethrestime_sec() - sp->rs_owner->ro_client->rc_last_access
3132 > rfs4_lease_time));
3133 }
3134
3145 sp->rs_stateid.bits.type = OPENID;
3146 sp->rs_owner = oo;
3147 sp->rs_finfo = fp;
3148
3149 list_create(&sp->rs_lostatelist, sizeof (rfs4_lo_state_t),
3150 offsetof(rfs4_lo_state_t, rls_node));
3151
3152 /* Insert state on per open owner's list */
3153 rfs4_dbe_lock(oo->ro_dbe);
3154 list_insert_tail(&oo->ro_statelist, sp);
3155 rfs4_dbe_unlock(oo->ro_dbe);
3156
3157 return (TRUE);
3158 }
3159
3160 static rfs4_state_t *
3161 rfs4_findstate(stateid_t *id, rfs4_dbsearch_type_t find_invalid, bool_t lock_fp)
3162 {
3163 rfs4_state_t *sp;
3164 bool_t create = FALSE;
3165
3166 sp = (rfs4_state_t *)rfs4_dbsearch(rfs4_state_idx, id,
3167 &create, NULL, find_invalid);
3168 if (lock_fp == TRUE && sp != NULL)
3169 rw_enter(&sp->rs_finfo->rf_file_rwlock, RW_READER);
3170
3171 return (sp);
3172 }
3173
3174 void
3175 rfs4_state_close(rfs4_state_t *sp, bool_t lock_held, bool_t close_of_client,
3176 cred_t *cr)
3177 {
3178 /* Remove the associated lo_state owners */
3179 if (!lock_held)
3180 rfs4_dbe_lock(sp->rs_dbe);
3181
3182 /*
3183 * If refcnt == 0, the dbe is about to be destroyed.
3184 * lock state will be released by the reaper thread.
3185 */
3186
3214 }
3215
3216 void
3217 rfs4_client_close(rfs4_client_t *cp)
3218 {
3219 /* Mark client as going away. */
3220 rfs4_dbe_lock(cp->rc_dbe);
3221 rfs4_dbe_invalidate(cp->rc_dbe);
3222 rfs4_dbe_unlock(cp->rc_dbe);
3223
3224 rfs4_client_state_remove(cp);
3225
3226 /* Release the client */
3227 rfs4_client_rele(cp);
3228 }
3229
3230 nfsstat4
3231 rfs4_check_clientid(clientid4 *cp, int setclid_confirm)
3232 {
3233 cid *cidp = (cid *) cp;
3234
3235 /*
3236 * If we are booted as a cluster node, check the embedded nodeid.
3237 * If it indicates that this clientid was generated on another node,
3238 * inform the client accordingly.
3239 */
3240 if (cluster_bootflags & CLUSTER_BOOTED && foreign_clientid(cidp))
3241 return (NFS4ERR_STALE_CLIENTID);
3242
3243 /*
3244 * If the server start time matches the time provided
3245 * by the client (via the clientid) and this is NOT a
3246 * setclientid_confirm then return EXPIRED.
3247 */
3248 if (!setclid_confirm && cidp->impl_id.start_time == rfs4_start_time)
3249 return (NFS4ERR_EXPIRED);
3250
3251 return (NFS4ERR_STALE_CLIENTID);
3252 }
3253
3254 /*
3255 * This is used when a stateid has not been found amongst the
3256 * current server's state. Check the stateid to see if it
3257 * was from this server instantiation or not.
3258 */
3259 static nfsstat4
3260 what_stateid_error(stateid_t *id, stateid_type_t type)
3261 {
3262 /* If we are booted as a cluster node, was stateid locally generated? */
3263 if ((cluster_bootflags & CLUSTER_BOOTED) && foreign_stateid(id))
3264 return (NFS4ERR_STALE_STATEID);
3265
3266 /* If types don't match then no use checking further */
3267 if (type != id->bits.type)
3268 return (NFS4ERR_BAD_STATEID);
3269
3270 /* From a different server instantiation, return STALE */
3271 if (id->bits.boottime != rfs4_start_time)
3272 return (NFS4ERR_STALE_STATEID);
3273
3274 /*
3275 * From this server but the state is most likely beyond lease
3276 * timeout: return NFS4ERR_EXPIRED. However, there is the
3277 * case of a delegation stateid. For delegations, there is a
3278 * case where the state can be removed without the client's
3279 * knowledge/consent: revocation. In the case of delegation
3280 * revocation, the delegation state will be removed and will
3281 * not be found. If the client does something like a
3282 * DELEGRETURN or even a READ/WRITE with a delegatoin stateid
3283 * that has been revoked, the server should return BAD_STATEID
3284 * instead of the more common EXPIRED error.
3285 */
3286 if (id->bits.boottime == rfs4_start_time) {
3287 if (type == DELEGID)
3288 return (NFS4ERR_BAD_STATEID);
3289 else
3290 return (NFS4ERR_EXPIRED);
3291 }
3292
3293 return (NFS4ERR_BAD_STATEID);
3294 }
3295
3296 /*
3297 * Used later on to find the various state structs. When called from
3298 * rfs4_check_stateid()->rfs4_get_all_state(), no file struct lock is
3299 * taken (it is not needed) and helps on the read/write path with
3300 * respect to performance.
3301 */
3302 static nfsstat4
3303 rfs4_get_state_lockit(stateid4 *stateid, rfs4_state_t **spp,
3304 rfs4_dbsearch_type_t find_invalid, bool_t lock_fp)
3305 {
3306 stateid_t *id = (stateid_t *)stateid;
3768
3769 /*
3770 * This is a special function in that for the file struct provided the
3771 * server wants to remove/close all current state associated with the
3772 * file. The prime use of this would be with OP_REMOVE to force the
3773 * release of state and particularly of file locks.
3774 *
3775 * There is an assumption that there is no delegations outstanding on
3776 * this file at this point. The caller should have waited for those
3777 * to be returned or revoked.
3778 */
3779 void
3780 rfs4_close_all_state(rfs4_file_t *fp)
3781 {
3782 rfs4_state_t *sp;
3783
3784 rfs4_dbe_lock(fp->rf_dbe);
3785
3786 #ifdef DEBUG
3787 /* only applies when server is handing out delegations */
3788 if (rfs4_deleg_policy != SRV_NEVER_DELEGATE)
3789 ASSERT(fp->rf_dinfo.rd_hold_grant > 0);
3790 #endif
3791
3792 /* No delegations for this file */
3793 ASSERT(list_is_empty(&fp->rf_delegstatelist));
3794
3795 /* Make sure that it can not be found */
3796 rfs4_dbe_invalidate(fp->rf_dbe);
3797
3798 if (fp->rf_vp == NULL) {
3799 rfs4_dbe_unlock(fp->rf_dbe);
3800 return;
3801 }
3802 rfs4_dbe_unlock(fp->rf_dbe);
3803
3804 /*
3805 * Hold as writer to prevent other server threads from
3806 * processing requests related to the file while all state is
3807 * being removed.
3808 */
3974 OPEN_DELEGATE_WRITE) {
3975 (void) fem_uninstall(vp, deleg_wrops,
3976 (void *)fp);
3977 vn_open_downgrade(vp, FREAD|FWRITE);
3978 }
3979 mutex_enter(&vp->v_vsd_lock);
3980 (void) vsd_set(vp, nfs4_srv_vkey, NULL);
3981 mutex_exit(&vp->v_vsd_lock);
3982 VN_RELE(vp);
3983 fp->rf_vp = NULL;
3984 }
3985 rfs4_dbe_invalidate(fp->rf_dbe);
3986 }
3987 }
3988
3989 /*
3990 * Given a directory that is being unexported, cleanup/release all
3991 * state in the server that refers to objects residing underneath this
3992 * particular export. The ordering of the release is important.
3993 * Lock_owner, then state and then file.
3994 */
3995 void
3996 rfs4_clean_state_exi(struct exportinfo *exi)
3997 {
3998 mutex_enter(&rfs4_state_lock);
3999
4000 if (rfs4_server_state == NULL) {
4001 mutex_exit(&rfs4_state_lock);
4002 return;
4003 }
4004
4005 rfs4_dbe_walk(rfs4_lo_state_tab, rfs4_lo_state_walk_callout, exi);
4006 rfs4_dbe_walk(rfs4_state_tab, rfs4_state_walk_callout, exi);
4007 rfs4_dbe_walk(rfs4_deleg_state_tab, rfs4_deleg_state_walk_callout, exi);
4008 rfs4_dbe_walk(rfs4_file_tab, rfs4_file_walk_callout, exi);
4009
4010 mutex_exit(&rfs4_state_lock);
4011 }
|
1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
24 */
25
26 /*
27 * Copyright 2018 Nexenta Systems, Inc.
28 * Copyright 2019 Nexenta by DDN, Inc.
29 */
30
31 #include <sys/systm.h>
32 #include <sys/kmem.h>
33 #include <sys/cmn_err.h>
34 #include <sys/atomic.h>
35 #include <sys/clconf.h>
36 #include <sys/cladm.h>
37 #include <sys/flock.h>
38 #include <nfs/export.h>
39 #include <nfs/nfs.h>
40 #include <nfs/nfs4.h>
41 #include <nfs/nfssys.h>
42 #include <nfs/lm.h>
43 #include <sys/pathname.h>
44 #include <sys/sdt.h>
45 #include <sys/nvpair.h>
46
47 extern u_longlong_t nfs4_srv_caller_id;
48
49 extern uint_t nfs4_srv_vkey;
50
51 stateid4 special0 = {
52 0,
53 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }
54 };
55
56 stateid4 special1 = {
57 0xffffffff,
58 {
59 (char)0xff, (char)0xff, (char)0xff, (char)0xff,
60 (char)0xff, (char)0xff, (char)0xff, (char)0xff,
61 (char)0xff, (char)0xff, (char)0xff, (char)0xff
62 }
63 };
64
65
66 #define ISSPECIAL(id) (stateid4_cmp(id, &special0) || \
67 stateid4_cmp(id, &special1))
68
69 /* For embedding the cluster nodeid into our clientid */
70 #define CLUSTER_NODEID_SHIFT 24
71 #define CLUSTER_MAX_NODEID 255
72
73 #ifdef DEBUG
74 int rfs4_debug;
75 #endif
76
77 static uint32_t rfs4_database_debug = 0x00;
78
79 /* CSTYLED */
80 static void rfs4_ss_clid_write(nfs4_srv_t *nsrv4, rfs4_client_t *cp, char *leaf);
81 static void rfs4_ss_clid_write_one(rfs4_client_t *cp, char *dir, char *leaf);
82 static void rfs4_dss_clear_oldstate(rfs4_servinst_t *sip);
83 static void rfs4_ss_chkclid_sip(rfs4_client_t *cp, rfs4_servinst_t *sip);
84
85 /*
86 * Couple of simple init/destroy functions for a general waiter
87 */
88 void
89 rfs4_sw_init(rfs4_state_wait_t *swp)
90 {
91 mutex_init(swp->sw_cv_lock, NULL, MUTEX_DEFAULT, NULL);
92 cv_init(swp->sw_cv, NULL, CV_DEFAULT, NULL);
93 swp->sw_active = FALSE;
94 swp->sw_wait_count = 0;
95 }
96
97 void
98 rfs4_sw_destroy(rfs4_state_wait_t *swp)
99 {
100 mutex_destroy(swp->sw_cv_lock);
109 swp->sw_wait_count++;
110 cv_wait(swp->sw_cv, swp->sw_cv_lock);
111 swp->sw_wait_count--;
112 }
113 ASSERT(swp->sw_active == FALSE);
114 swp->sw_active = TRUE;
115 mutex_exit(swp->sw_cv_lock);
116 }
117
118 void
119 rfs4_sw_exit(rfs4_state_wait_t *swp)
120 {
121 mutex_enter(swp->sw_cv_lock);
122 ASSERT(swp->sw_active == TRUE);
123 swp->sw_active = FALSE;
124 if (swp->sw_wait_count != 0)
125 cv_broadcast(swp->sw_cv);
126 mutex_exit(swp->sw_cv_lock);
127 }
128
129 static void
130 deep_lock_copy(LOCK4res *dres, LOCK4res *sres)
131 {
132 lock_owner4 *slo = &sres->LOCK4res_u.denied.owner;
133 lock_owner4 *dlo = &dres->LOCK4res_u.denied.owner;
134
135 if (sres->status == NFS4ERR_DENIED) {
136 dlo->owner_val = kmem_alloc(slo->owner_len, KM_SLEEP);
137 bcopy(slo->owner_val, dlo->owner_val, slo->owner_len);
138 }
139 }
140
141 /*
142 * CPR callback id -- not related to v4 callbacks
143 */
144 static callb_id_t cpr_id = 0;
145
146 static void
147 deep_lock_free(LOCK4res *res)
148 {
149 lock_owner4 *lo = &res->LOCK4res_u.denied.owner;
150
151 if (res->status == NFS4ERR_DENIED)
152 kmem_free(lo->owner_val, lo->owner_len);
153 }
154
155 static void
156 deep_open_copy(OPEN4res *dres, OPEN4res *sres)
157 {
158 nfsace4 *sacep, *dacep;
159
160 if (sres->status != NFS4_OK) {
161 return;
162 }
163
164 dres->attrset = sres->attrset;
165
261 /*
262 * This code is some what prototypical for now. Its purpose currently is to
263 * implement the interfaces sufficiently to finish the higher protocol
264 * elements. This will be replaced by a dynamically resizeable tables
265 * backed by kmem_cache allocator. However synchronization is handled
266 * correctly (I hope) and will not change by much. The mutexes for
267 * the hash buckets that can be used to create new instances of data
268 * structures might be good candidates to evolve into reader writer
269 * locks. If it has to do a creation, it would be holding the
270 * mutex across a kmem_alloc with KM_SLEEP specified.
271 */
272
273 #ifdef DEBUG
274 #define TABSIZE 17
275 #else
276 #define TABSIZE 2047
277 #endif
278
279 #define ADDRHASH(key) ((unsigned long)(key) >> 3)
280
281 #define MAXTABSZ 1024*1024
282
283 /* The values below are rfs4_lease_time units */
284
285 #ifdef DEBUG
286 #define CLIENT_CACHE_TIME 1
287 #define OPENOWNER_CACHE_TIME 1
288 #define STATE_CACHE_TIME 1
289 #define LO_STATE_CACHE_TIME 1
290 #define LOCKOWNER_CACHE_TIME 1
291 #define FILE_CACHE_TIME 3
292 #define DELEG_STATE_CACHE_TIME 1
293 #else
294 #define CLIENT_CACHE_TIME 10
295 #define OPENOWNER_CACHE_TIME 5
296 #define STATE_CACHE_TIME 1
297 #define LO_STATE_CACHE_TIME 1
298 #define LOCKOWNER_CACHE_TIME 3
299 #define FILE_CACHE_TIME 40
300 #define DELEG_STATE_CACHE_TIME 1
301 #endif
302
303 /*
304 * NFSv4 server state databases
305 *
306 * Initilized when the module is loaded and used by NFSv4 state tables.
307 * These kmem_cache databases are global, the tables that make use of these
308 * are per zone.
309 */
310 kmem_cache_t *rfs4_client_mem_cache;
311 kmem_cache_t *rfs4_clntIP_mem_cache;
312 kmem_cache_t *rfs4_openown_mem_cache;
313 kmem_cache_t *rfs4_openstID_mem_cache;
314 kmem_cache_t *rfs4_lockstID_mem_cache;
315 kmem_cache_t *rfs4_lockown_mem_cache;
316 kmem_cache_t *rfs4_file_mem_cache;
317 kmem_cache_t *rfs4_delegstID_mem_cache;
318
319 /*
320 * NFSv4 state table functions
321 */
322 static bool_t rfs4_client_create(rfs4_entry_t, void *);
323 static void rfs4_dss_remove_cpleaf(rfs4_client_t *);
324 static void rfs4_dss_remove_leaf(rfs4_servinst_t *, char *, char *);
325 static void rfs4_client_destroy(rfs4_entry_t);
326 static bool_t rfs4_client_expiry(rfs4_entry_t);
327 static uint32_t clientid_hash(void *);
328 static bool_t clientid_compare(rfs4_entry_t, void *);
329 static void *clientid_mkkey(rfs4_entry_t);
330 static uint32_t nfsclnt_hash(void *);
331 static bool_t nfsclnt_compare(rfs4_entry_t, void *);
332 static void *nfsclnt_mkkey(rfs4_entry_t);
333 static bool_t rfs4_clntip_expiry(rfs4_entry_t);
334 static void rfs4_clntip_destroy(rfs4_entry_t);
335 static bool_t rfs4_clntip_create(rfs4_entry_t, void *);
336 static uint32_t clntip_hash(void *);
337 static bool_t clntip_compare(rfs4_entry_t, void *);
338 static void *clntip_mkkey(rfs4_entry_t);
339 static bool_t rfs4_openowner_create(rfs4_entry_t, void *);
340 static void rfs4_openowner_destroy(rfs4_entry_t);
341 static bool_t rfs4_openowner_expiry(rfs4_entry_t);
667 cl_ss->ss_pn = rfs4_ss_movestate(
668 statedir, destdir, dep->d_name);
669 } else {
670 cl_ss->ss_pn = ss_pn;
671 }
672 insque(cl_ss, oldstate);
673 } else {
674 rfs4_ss_pnfree(ss_pn);
675 }
676 }
677 }
678
679 out:
680 (void) VOP_CLOSE(dvp, FREAD, 1, (offset_t)0, CRED(), NULL);
681 VN_RELE(dvp);
682 if (dirt)
683 kmem_free((caddr_t)dirt, RFS4_SS_DIRSIZE);
684 }
685
686 static void
687 rfs4_ss_init(nfs4_srv_t *nsrv4)
688 {
689 int npaths = 1;
690 char *default_dss_path = NFS4_DSS_VAR_DIR;
691
692 /* read the default stable storage state */
693 rfs4_dss_readstate(nsrv4, npaths, &default_dss_path);
694
695 rfs4_ss_enabled = 1;
696 }
697
698 static void
699 rfs4_ss_fini(nfs4_srv_t *nsrv4)
700 {
701 rfs4_servinst_t *sip;
702
703 mutex_enter(&nsrv4->servinst_lock);
704 sip = nsrv4->nfs4_cur_servinst;
705 while (sip != NULL) {
706 rfs4_dss_clear_oldstate(sip);
707 sip = sip->next;
708 }
709 mutex_exit(&nsrv4->servinst_lock);
710 }
711
712 /*
713 * Remove all oldstate files referenced by this servinst.
714 */
715 static void
716 rfs4_dss_clear_oldstate(rfs4_servinst_t *sip)
717 {
718 rfs4_oldstate_t *os_head, *osp;
719
720 rw_enter(&sip->oldstate_lock, RW_WRITER);
721 os_head = sip->oldstate;
722
723 if (os_head == NULL) {
724 rw_exit(&sip->oldstate_lock);
725 return;
726 }
727
728 /* skip dummy entry */
729 osp = os_head->next;
733
734 rfs4_dss_remove_leaf(sip, NFS4_DSS_OLDSTATE_LEAF, leaf);
735
736 if (osp->cl_id4.id_val)
737 kmem_free(osp->cl_id4.id_val, osp->cl_id4.id_len);
738 rfs4_ss_pnfree(osp->ss_pn);
739
740 os_next = osp->next;
741 remque(osp);
742 kmem_free(osp, sizeof (rfs4_oldstate_t));
743 osp = os_next;
744 }
745
746 rw_exit(&sip->oldstate_lock);
747 }
748
749 /*
750 * Form the state and oldstate paths, and read in the stable storage files.
751 */
752 void
753 rfs4_dss_readstate(nfs4_srv_t *nsrv4, int npaths, char **paths)
754 {
755 int i;
756 char *state, *oldstate;
757
758 state = kmem_alloc(MAXPATHLEN, KM_SLEEP);
759 oldstate = kmem_alloc(MAXPATHLEN, KM_SLEEP);
760
761 for (i = 0; i < npaths; i++) {
762 char *path = paths[i];
763
764 (void) sprintf(state, "%s/%s", path, NFS4_DSS_STATE_LEAF);
765 (void) sprintf(oldstate, "%s/%s", path, NFS4_DSS_OLDSTATE_LEAF);
766
767 /*
768 * Populate the current server instance's oldstate list.
769 *
770 * 1. Read stable storage data from old state directory,
771 * leaving its contents alone.
772 *
773 * 2. Read stable storage data from state directory,
774 * and move the latter's contents to old state
775 * directory.
776 */
777 /* CSTYLED */
778 rfs4_ss_oldstate(nsrv4->nfs4_cur_servinst->oldstate, oldstate, NULL);
779 /* CSTYLED */
780 rfs4_ss_oldstate(nsrv4->nfs4_cur_servinst->oldstate, state, oldstate);
781 }
782
783 kmem_free(state, MAXPATHLEN);
784 kmem_free(oldstate, MAXPATHLEN);
785 }
786
787
788 /*
789 * Check if we are still in grace and if the client can be
790 * granted permission to perform reclaims.
791 */
792 void
793 rfs4_ss_chkclid(nfs4_srv_t *nsrv4, rfs4_client_t *cp)
794 {
795 rfs4_servinst_t *sip;
796
797 /*
798 * It should be sufficient to check the oldstate data for just
799 * this client's instance. However, since our per-instance
800 * client grouping is solely temporal, HA-NFSv4 RG failover
801 * might result in clients of the same RG being partitioned into
802 * separate instances.
803 *
804 * Until the client grouping is improved, we must check the
805 * oldstate data for all instances with an active grace period.
806 *
807 * This also serves as the mechanism to remove stale oldstate data.
808 * The first time we check an instance after its grace period has
809 * expired, the oldstate data should be cleared.
810 *
811 * Start at the current instance, and walk the list backwards
812 * to the first.
813 */
814 mutex_enter(&nsrv4->servinst_lock);
815 for (sip = nsrv4->nfs4_cur_servinst; sip != NULL; sip = sip->prev) {
816 rfs4_ss_chkclid_sip(cp, sip);
817
818 /* if the above check found this client, we're done */
819 if (cp->rc_can_reclaim)
820 break;
821 }
822 mutex_exit(&nsrv4->servinst_lock);
823 }
824
825 static void
826 rfs4_ss_chkclid_sip(rfs4_client_t *cp, rfs4_servinst_t *sip)
827 {
828 rfs4_oldstate_t *osp, *os_head;
829
830 /* short circuit everything if this server instance has no oldstate */
831 rw_enter(&sip->oldstate_lock, RW_READER);
832 os_head = sip->oldstate;
833 rw_exit(&sip->oldstate_lock);
834 if (os_head == NULL)
835 return;
836
837 /*
838 * If this server instance is no longer in a grace period then
839 * the client won't be able to reclaim. No further need for this
840 * instance's oldstate data, so it can be cleared.
841 */
842 if (!rfs4_servinst_in_grace(sip))
852 while (osp != os_head) {
853 if (osp->cl_id4.id_len == cp->rc_nfs_client.id_len) {
854 if (bcmp(osp->cl_id4.id_val, cp->rc_nfs_client.id_val,
855 osp->cl_id4.id_len) == 0) {
856 cp->rc_can_reclaim = 1;
857 break;
858 }
859 }
860 osp = osp->next;
861 }
862
863 rw_exit(&sip->oldstate_lock);
864 }
865
866 /*
867 * Place client information into stable storage: 1/3.
868 * First, generate the leaf filename, from the client's IP address and
869 * the server-generated short-hand clientid.
870 */
871 void
872 rfs4_ss_clid(nfs4_srv_t *nsrv4, rfs4_client_t *cp)
873 {
874 const char *kinet_ntop6(uchar_t *, char *, size_t);
875 char leaf[MAXNAMELEN], buf[INET6_ADDRSTRLEN];
876 struct sockaddr *ca;
877 uchar_t *b;
878
879 if (rfs4_ss_enabled == 0) {
880 return;
881 }
882
883 buf[0] = 0;
884
885 ca = (struct sockaddr *)&cp->rc_addr;
886
887 /*
888 * Convert the caller's IP address to a dotted string
889 */
890 if (ca->sa_family == AF_INET) {
891 b = (uchar_t *)&((struct sockaddr_in *)ca)->sin_addr;
892 (void) sprintf(buf, "%03d.%03d.%03d.%03d", b[0] & 0xFF,
893 b[1] & 0xFF, b[2] & 0xFF, b[3] & 0xFF);
894 } else if (ca->sa_family == AF_INET6) {
895 struct sockaddr_in6 *sin6;
896
897 sin6 = (struct sockaddr_in6 *)ca;
898 (void) kinet_ntop6((uchar_t *)&sin6->sin6_addr,
899 buf, INET6_ADDRSTRLEN);
900 }
901
902 (void) snprintf(leaf, MAXNAMELEN, "%s-%llx", buf,
903 (longlong_t)cp->rc_clientid);
904 rfs4_ss_clid_write(nsrv4, cp, leaf);
905 }
906
907 /*
908 * Place client information into stable storage: 2/3.
909 * DSS: distributed stable storage: the file may need to be written to
910 * multiple directories.
911 */
912 static void
913 rfs4_ss_clid_write(nfs4_srv_t *nsrv4, rfs4_client_t *cp, char *leaf)
914 {
915 rfs4_servinst_t *sip;
916
917 /*
918 * It should be sufficient to write the leaf file to (all) DSS paths
919 * associated with just this client's instance. However, since our
920 * per-instance client grouping is solely temporal, HA-NFSv4 RG
921 * failover might result in us losing DSS data.
922 *
923 * Until the client grouping is improved, we must write the DSS data
924 * to all instances' paths. Start at the current instance, and
925 * walk the list backwards to the first.
926 */
927 mutex_enter(&nsrv4->servinst_lock);
928 for (sip = nsrv4->nfs4_cur_servinst; sip != NULL; sip = sip->prev) {
929 int i, npaths = sip->dss_npaths;
930
931 /* write the leaf file to all DSS paths */
932 for (i = 0; i < npaths; i++) {
933 rfs4_dss_path_t *dss_path = sip->dss_paths[i];
934
935 /* HA-NFSv4 path might have been failed-away from us */
936 if (dss_path == NULL)
937 continue;
938
939 rfs4_ss_clid_write_one(cp, dss_path->path, leaf);
940 }
941 }
942 mutex_exit(&nsrv4->servinst_lock);
943 }
944
945 /*
946 * Place client information into stable storage: 3/3.
947 * Write the stable storage data to the requested file.
948 */
949 static void
950 rfs4_ss_clid_write_one(rfs4_client_t *cp, char *dss_path, char *leaf)
951 {
952 int ioflag;
953 int file_vers = NFS4_SS_VERSION;
954 size_t dirlen;
955 struct uio uio;
956 struct iovec iov[4];
957 char *dir;
958 rfs4_ss_pn_t *ss_pn;
959 vnode_t *vp;
960 nfs_client_id4 *cl_id4 = &(cp->rc_nfs_client);
961
962 /* allow 2 extra bytes for '/' & NUL */
1115 * for forced expiration
1116 */
1117 if (ent_sin->sin_addr.s_addr == clr_in.s_addr) {
1118 cp->rc_forced_expire = 1;
1119 }
1120 break;
1121
1122 default:
1123 /* force this assert to fail */
1124 ASSERT(clr->addr_type != clr->addr_type);
1125 }
1126 }
1127
1128 /*
1129 * This is called from nfssys() in order to clear server state
1130 * for the specified client IP Address.
1131 */
1132 void
1133 rfs4_clear_client_state(struct nfs4clrst_args *clr)
1134 {
1135 nfs4_srv_t *nsrv4;
1136 nsrv4 = nfs4_get_srv();
1137 (void) rfs4_dbe_walk(nsrv4->rfs4_client_tab, rfs4_client_scrub, clr);
1138 }
1139
1140 /*
1141 * Used to initialize the NFSv4 server's state or database. All of
1142 * the tables are created and timers are set.
1143 */
1144 void
1145 rfs4_state_g_init()
1146 {
1147 extern boolean_t rfs4_cpr_callb(void *, int);
1148 /*
1149 * Add a CPR callback so that we can update client
1150 * access times to extend the lease after a suspend
1151 * and resume (using the same class as rpcmod/connmgr)
1152 */
1153 cpr_id = callb_add(rfs4_cpr_callb, 0, CB_CL_CPR_RPC, "rfs4");
1154
1155 /*
1156 * NFSv4 server state databases
1157 *
1158 * Initialized when the module is loaded and used by NFSv4 state
1159 * tables. These kmem_cache free pools are used globally, the NFSv4
1160 * state tables which make use of these kmem_cache free pools are per
1161 * zone.
1162 *
1163 * initialize the global kmem_cache free pools which will be used by
1164 * the NFSv4 state tables.
1165 */
1166 /* CSTYLED */
1167 rfs4_client_mem_cache = nfs4_init_mem_cache("Client_entry_cache", 2, sizeof (rfs4_client_t), 0);
1168 /* CSTYLED */
1169 rfs4_clntIP_mem_cache = nfs4_init_mem_cache("ClntIP_entry_cache", 1, sizeof (rfs4_clntip_t), 1);
1170 /* CSTYLED */
1171 rfs4_openown_mem_cache = nfs4_init_mem_cache("OpenOwner_entry_cache", 1, sizeof (rfs4_openowner_t), 2);
1172 /* CSTYLED */
1173 rfs4_openstID_mem_cache = nfs4_init_mem_cache("OpenStateID_entry_cache", 3, sizeof (rfs4_state_t), 3);
1174 /* CSTYLED */
1175 rfs4_lockstID_mem_cache = nfs4_init_mem_cache("LockStateID_entry_cache", 3, sizeof (rfs4_lo_state_t), 4);
1176 /* CSTYLED */
1177 rfs4_lockown_mem_cache = nfs4_init_mem_cache("Lockowner_entry_cache", 2, sizeof (rfs4_lockowner_t), 5);
1178 /* CSTYLED */
1179 rfs4_file_mem_cache = nfs4_init_mem_cache("File_entry_cache", 1, sizeof (rfs4_file_t), 6);
1180 /* CSTYLED */
1181 rfs4_delegstID_mem_cache = nfs4_init_mem_cache("DelegStateID_entry_cache", 2, sizeof (rfs4_deleg_state_t), 7);
1182
1183 rfs4_client_clrst = rfs4_clear_client_state;
1184 }
1185
1186
1187 /*
1188 * Used at server shutdown to cleanup all of the NFSv4 server's structures
1189 * and other state.
1190 */
1191 void
1192 rfs4_state_g_fini()
1193 {
1194 int i;
1195 /*
1196 * Cleanup the CPR callback.
1197 */
1198 if (cpr_id)
1199 (void) callb_delete(cpr_id);
1200
1201 rfs4_client_clrst = NULL;
1202
1203 /* free the NFSv4 state databases */
1204 for (i = 0; i < RFS4_DB_MEM_CACHE_NUM; i++) {
1205 kmem_cache_destroy(rfs4_db_mem_cache_table[i].r_db_mem_cache);
1206 rfs4_db_mem_cache_table[i].r_db_mem_cache = NULL;
1207 }
1208
1209 rfs4_client_mem_cache = NULL;
1210 rfs4_clntIP_mem_cache = NULL;
1211 rfs4_openown_mem_cache = NULL;
1212 rfs4_openstID_mem_cache = NULL;
1213 rfs4_lockstID_mem_cache = NULL;
1214 rfs4_lockown_mem_cache = NULL;
1215 rfs4_file_mem_cache = NULL;
1216 rfs4_delegstID_mem_cache = NULL;
1217
1218 /* DSS: distributed stable storage */
1219 nvlist_free(rfs4_dss_oldpaths);
1220 nvlist_free(rfs4_dss_paths);
1221 rfs4_dss_paths = rfs4_dss_oldpaths = NULL;
1222 }
1223
1224 /*
1225 * Used to initialize the per zone NFSv4 server's state
1226 */
1227 void
1228 rfs4_state_zone_init(nfs4_srv_t *nsrv4)
1229 {
1230 time_t start_time;
1231 int start_grace;
1232 char *dss_path = NFS4_DSS_VAR_DIR;
1233
1234 /* DSS: distributed stable storage: initialise served paths list */
1235 nsrv4->dss_pathlist = NULL;
1236
1237 /*
1238 * Set the boot time. If the server
1239 * has been restarted quickly and has had the opportunity to
1240 * service clients, then the start_time needs to be bumped
1241 * regardless. A small window but it exists...
1242 */
1243 start_time = gethrestime_sec();
1244 if (nsrv4->rfs4_start_time < start_time)
1245 nsrv4->rfs4_start_time = start_time;
1246 else
1247 nsrv4->rfs4_start_time++;
1248
1249 /*
1250 * Create the first server instance, or a new one if the server has
1251 * been restarted; see above comments on rfs4_start_time. Don't
1252 * start its grace period; that will be done later, to maximise the
1253 * clients' recovery window.
1254 */
1255 start_grace = 0;
1256 if (curzone == global_zone && rfs4_dss_numnewpaths > 0) {
1257 int i;
1258 char **dss_allpaths = NULL;
1259 dss_allpaths = kmem_alloc(sizeof (char *) *
1260 (rfs4_dss_numnewpaths + 1), KM_SLEEP);
1261 /*
1262 * Add the default path into the list of paths for saving
1263 * state informantion.
1264 */
1265 dss_allpaths[0] = dss_path;
1266 for (i = 0; i < rfs4_dss_numnewpaths; i++) {
1267 dss_allpaths[i + 1] = rfs4_dss_newpaths[i];
1268 }
1269 rfs4_servinst_create(nsrv4, start_grace,
1270 (rfs4_dss_numnewpaths + 1), dss_allpaths);
1271 kmem_free(dss_allpaths,
1272 (sizeof (char *) * (rfs4_dss_numnewpaths + 1)));
1273 } else {
1274 rfs4_servinst_create(nsrv4, start_grace, 1, &dss_path);
1275 }
1276
1277 /* reset the "first NFSv4 request" status */
1278 nsrv4->seen_first_compound = 0;
1279
1280 mutex_enter(&nsrv4->state_lock);
1281
1282 /*
1283 * If the server state database has already been initialized,
1284 * skip it
1285 */
1286 if (nsrv4->nfs4_server_state != NULL) {
1287 mutex_exit(&nsrv4->state_lock);
1288 return;
1289 }
1290
1291 rw_init(&nsrv4->rfs4_findclient_lock, NULL, RW_DEFAULT, NULL);
1292
1293 /* set the various cache timers for table creation */
1294 if (nsrv4->rfs4_client_cache_time == 0)
1295 nsrv4->rfs4_client_cache_time = CLIENT_CACHE_TIME;
1296 if (nsrv4->rfs4_openowner_cache_time == 0)
1297 nsrv4->rfs4_openowner_cache_time = OPENOWNER_CACHE_TIME;
1298 if (nsrv4->rfs4_state_cache_time == 0)
1299 nsrv4->rfs4_state_cache_time = STATE_CACHE_TIME;
1300 if (nsrv4->rfs4_lo_state_cache_time == 0)
1301 nsrv4->rfs4_lo_state_cache_time = LO_STATE_CACHE_TIME;
1302 if (nsrv4->rfs4_lockowner_cache_time == 0)
1303 nsrv4->rfs4_lockowner_cache_time = LOCKOWNER_CACHE_TIME;
1304 if (nsrv4->rfs4_file_cache_time == 0)
1305 nsrv4->rfs4_file_cache_time = FILE_CACHE_TIME;
1306 if (nsrv4->rfs4_deleg_state_cache_time == 0)
1307 nsrv4->rfs4_deleg_state_cache_time = DELEG_STATE_CACHE_TIME;
1308
1309 /* Create the overall database to hold all server state */
1310 nsrv4->nfs4_server_state = rfs4_database_create(rfs4_database_debug);
1311
1312 /* Now create the individual tables */
1313 nsrv4->rfs4_client_cache_time *= rfs4_lease_time;
1314 nsrv4->rfs4_client_tab = rfs4_table_create(nsrv4->nfs4_server_state,
1315 "Client",
1316 nsrv4->rfs4_client_cache_time,
1317 2,
1318 rfs4_client_create,
1319 rfs4_client_destroy,
1320 rfs4_client_expiry,
1321 sizeof (rfs4_client_t),
1322 TABSIZE,
1323 MAXTABSZ/8, 100);
1324 nsrv4->rfs4_nfsclnt_idx = rfs4_index_create(nsrv4->rfs4_client_tab,
1325 "nfs_client_id4", nfsclnt_hash,
1326 nfsclnt_compare, nfsclnt_mkkey,
1327 TRUE);
1328 nsrv4->rfs4_clientid_idx = rfs4_index_create(nsrv4->rfs4_client_tab,
1329 "client_id", clientid_hash,
1330 clientid_compare, clientid_mkkey,
1331 FALSE);
1332
1333 nsrv4->rfs4_clntip_cache_time = 86400 * 365; /* about a year */
1334 nsrv4->rfs4_clntip_tab = rfs4_table_create(nsrv4->nfs4_server_state,
1335 "ClntIP",
1336 nsrv4->rfs4_clntip_cache_time,
1337 1,
1338 rfs4_clntip_create,
1339 rfs4_clntip_destroy,
1340 rfs4_clntip_expiry,
1341 sizeof (rfs4_clntip_t),
1342 TABSIZE,
1343 MAXTABSZ, 100);
1344 nsrv4->rfs4_clntip_idx = rfs4_index_create(nsrv4->rfs4_clntip_tab,
1345 "client_ip", clntip_hash,
1346 clntip_compare, clntip_mkkey,
1347 TRUE);
1348
1349 nsrv4->rfs4_openowner_cache_time *= rfs4_lease_time;
1350 nsrv4->rfs4_openowner_tab = rfs4_table_create(nsrv4->nfs4_server_state,
1351 "OpenOwner",
1352 nsrv4->rfs4_openowner_cache_time,
1353 1,
1354 rfs4_openowner_create,
1355 rfs4_openowner_destroy,
1356 rfs4_openowner_expiry,
1357 sizeof (rfs4_openowner_t),
1358 TABSIZE,
1359 MAXTABSZ, 100);
1360 nsrv4->rfs4_openowner_idx = rfs4_index_create(nsrv4->rfs4_openowner_tab,
1361 "open_owner4", openowner_hash,
1362 openowner_compare,
1363 openowner_mkkey, TRUE);
1364
1365 nsrv4->rfs4_state_cache_time *= rfs4_lease_time;
1366 nsrv4->rfs4_state_tab = rfs4_table_create(nsrv4->nfs4_server_state,
1367 "OpenStateID",
1368 nsrv4->rfs4_state_cache_time,
1369 3,
1370 rfs4_state_create,
1371 rfs4_state_destroy,
1372 rfs4_state_expiry,
1373 sizeof (rfs4_state_t),
1374 TABSIZE,
1375 MAXTABSZ, 100);
1376
1377 /* CSTYLED */
1378 nsrv4->rfs4_state_owner_file_idx = rfs4_index_create(nsrv4->rfs4_state_tab,
1379 "Openowner-File",
1380 state_owner_file_hash,
1381 state_owner_file_compare,
1382 state_owner_file_mkkey, TRUE);
1383
1384 nsrv4->rfs4_state_idx = rfs4_index_create(nsrv4->rfs4_state_tab,
1385 "State-id", state_hash,
1386 state_compare, state_mkkey, FALSE);
1387
1388 nsrv4->rfs4_state_file_idx = rfs4_index_create(nsrv4->rfs4_state_tab,
1389 "File", state_file_hash,
1390 state_file_compare, state_file_mkkey,
1391 FALSE);
1392
1393 nsrv4->rfs4_lo_state_cache_time *= rfs4_lease_time;
1394 nsrv4->rfs4_lo_state_tab = rfs4_table_create(nsrv4->nfs4_server_state,
1395 "LockStateID",
1396 nsrv4->rfs4_lo_state_cache_time,
1397 2,
1398 rfs4_lo_state_create,
1399 rfs4_lo_state_destroy,
1400 rfs4_lo_state_expiry,
1401 sizeof (rfs4_lo_state_t),
1402 TABSIZE,
1403 MAXTABSZ, 100);
1404
1405 /* CSTYLED */
1406 nsrv4->rfs4_lo_state_owner_idx = rfs4_index_create(nsrv4->rfs4_lo_state_tab,
1407 "lockownerxstate",
1408 lo_state_lo_hash,
1409 lo_state_lo_compare,
1410 lo_state_lo_mkkey, TRUE);
1411
1412 nsrv4->rfs4_lo_state_idx = rfs4_index_create(nsrv4->rfs4_lo_state_tab,
1413 "State-id",
1414 lo_state_hash, lo_state_compare,
1415 lo_state_mkkey, FALSE);
1416
1417 nsrv4->rfs4_lockowner_cache_time *= rfs4_lease_time;
1418
1419 nsrv4->rfs4_lockowner_tab = rfs4_table_create(nsrv4->nfs4_server_state,
1420 "Lockowner",
1421 nsrv4->rfs4_lockowner_cache_time,
1422 2,
1423 rfs4_lockowner_create,
1424 rfs4_lockowner_destroy,
1425 rfs4_lockowner_expiry,
1426 sizeof (rfs4_lockowner_t),
1427 TABSIZE,
1428 MAXTABSZ, 100);
1429
1430 nsrv4->rfs4_lockowner_idx = rfs4_index_create(nsrv4->rfs4_lockowner_tab,
1431 "lock_owner4", lockowner_hash,
1432 lockowner_compare,
1433 lockowner_mkkey, TRUE);
1434
1435 /* CSTYLED */
1436 nsrv4->rfs4_lockowner_pid_idx = rfs4_index_create(nsrv4->rfs4_lockowner_tab,
1437 "pid", pid_hash,
1438 pid_compare, pid_mkkey,
1439 FALSE);
1440
1441 nsrv4->rfs4_file_cache_time *= rfs4_lease_time;
1442 nsrv4->rfs4_file_tab = rfs4_table_create(nsrv4->nfs4_server_state,
1443 "File",
1444 nsrv4->rfs4_file_cache_time,
1445 1,
1446 rfs4_file_create,
1447 rfs4_file_destroy,
1448 NULL,
1449 sizeof (rfs4_file_t),
1450 TABSIZE,
1451 MAXTABSZ, -1);
1452
1453 nsrv4->rfs4_file_idx = rfs4_index_create(nsrv4->rfs4_file_tab,
1454 "Filehandle", file_hash,
1455 file_compare, file_mkkey, TRUE);
1456
1457 nsrv4->rfs4_deleg_state_cache_time *= rfs4_lease_time;
1458 /* CSTYLED */
1459 nsrv4->rfs4_deleg_state_tab = rfs4_table_create(nsrv4->nfs4_server_state,
1460 "DelegStateID",
1461 nsrv4->rfs4_deleg_state_cache_time,
1462 2,
1463 rfs4_deleg_state_create,
1464 rfs4_deleg_state_destroy,
1465 rfs4_deleg_state_expiry,
1466 sizeof (rfs4_deleg_state_t),
1467 TABSIZE,
1468 MAXTABSZ, 100);
1469 nsrv4->rfs4_deleg_idx = rfs4_index_create(nsrv4->rfs4_deleg_state_tab,
1470 "DelegByFileClient",
1471 deleg_hash,
1472 deleg_compare,
1473 deleg_mkkey, TRUE);
1474
1475 /* CSTYLED */
1476 nsrv4->rfs4_deleg_state_idx = rfs4_index_create(nsrv4->rfs4_deleg_state_tab,
1477 "DelegState",
1478 deleg_state_hash,
1479 deleg_state_compare,
1480 deleg_state_mkkey, FALSE);
1481
1482 mutex_exit(&nsrv4->state_lock);
1483
1484 /*
1485 * Init the stable storage.
1486 */
1487 rfs4_ss_init(nsrv4);
1488 }
1489
1490 /*
1491 * Used at server shutdown to cleanup all of NFSv4 server's zone structures
1492 * and state.
1493 */
1494 void
1495 rfs4_state_zone_fini()
1496 {
1497 rfs4_database_t *dbp;
1498 nfs4_srv_t *nsrv4;
1499 nsrv4 = nfs4_get_srv();
1500
1501 rfs4_set_deleg_policy(nsrv4, SRV_NEVER_DELEGATE);
1502
1503 /*
1504 * Clean up any dangling stable storage structures BEFORE calling
1505 * rfs4_servinst_destroy_all() so there are no dangling structures
1506 * (i.e. the srvinsts are all cleared of danglers BEFORE they get
1507 * freed).
1508 */
1509 rfs4_ss_fini(nsrv4);
1510
1511 mutex_enter(&nsrv4->state_lock);
1512
1513 if (nsrv4->nfs4_server_state == NULL) {
1514 mutex_exit(&nsrv4->state_lock);
1515 return;
1516 }
1517
1518 /* destroy server instances and current instance ptr */
1519 rfs4_servinst_destroy_all(nsrv4);
1520
1521 /* reset the "first NFSv4 request" status */
1522 nsrv4->seen_first_compound = 0;
1523
1524 dbp = nsrv4->nfs4_server_state;
1525 nsrv4->nfs4_server_state = NULL;
1526
1527 rw_destroy(&nsrv4->rfs4_findclient_lock);
1528
1529 /* First stop all of the reaper threads in the database */
1530 rfs4_database_shutdown(dbp);
1531
1532 /*
1533 * WARNING: There may be consumers of the rfs4 database still
1534 * active as we destroy these. IF that's the case, consider putting
1535 * some of their _zone_fini()-like functions into the zsd key as
1536 * ~~SHUTDOWN~~ functions instead of ~~DESTROY~~ functions. We can
1537 * maintain some ordering guarantees better that way.
1538 */
1539 /* Now destroy/release the database tables */
1540 rfs4_database_destroy(dbp);
1541
1542 /* Reset the cache timers for next time */
1543 nsrv4->rfs4_client_cache_time = 0;
1544 nsrv4->rfs4_openowner_cache_time = 0;
1545 nsrv4->rfs4_state_cache_time = 0;
1546 nsrv4->rfs4_lo_state_cache_time = 0;
1547 nsrv4->rfs4_lockowner_cache_time = 0;
1548 nsrv4->rfs4_file_cache_time = 0;
1549 nsrv4->rfs4_deleg_state_cache_time = 0;
1550
1551 mutex_exit(&nsrv4->state_lock);
1552 }
1553
1554 typedef union {
1555 struct {
1556 uint32_t start_time;
1557 uint32_t c_id;
1558 } impl_id;
1559 clientid4 id4;
1560 } cid;
1561
1562 static int foreign_stateid(stateid_t *id);
1563 static int foreign_clientid(cid *cidp);
1564 static void embed_nodeid(cid *cidp);
1565
1566 typedef union {
1567 struct {
1568 uint32_t c_id;
1569 uint32_t gen_num;
1570 } cv_impl;
1571 verifier4 confirm_verf;
1646 * If the sysadmin has used clear_locks for this
1647 * entry then forced_expire will be set and we
1648 * want this entry to be reaped. Or the entry
1649 * has exceeded its lease period.
1650 */
1651 cp_expired = (cp->rc_forced_expire ||
1652 (gethrestime_sec() - cp->rc_last_access
1653 > rfs4_lease_time));
1654
1655 if (!cp->rc_ss_remove && cp_expired)
1656 cp->rc_ss_remove = 1;
1657 return (cp_expired);
1658 }
1659
1660 /*
1661 * Remove the leaf file from all distributed stable storage paths.
1662 */
1663 static void
1664 rfs4_dss_remove_cpleaf(rfs4_client_t *cp)
1665 {
1666 nfs4_srv_t *nsrv4;
1667 rfs4_servinst_t *sip;
1668 char *leaf = cp->rc_ss_pn->leaf;
1669
1670 /*
1671 * since the state files are written to all DSS
1672 * paths we must remove this leaf file instance
1673 * from all server instances.
1674 */
1675
1676 nsrv4 = nfs4_get_srv();
1677 mutex_enter(&nsrv4->servinst_lock);
1678 for (sip = nsrv4->nfs4_cur_servinst; sip != NULL; sip = sip->prev) {
1679 /* remove the leaf file associated with this server instance */
1680 rfs4_dss_remove_leaf(sip, NFS4_DSS_STATE_LEAF, leaf);
1681 }
1682 mutex_exit(&nsrv4->servinst_lock);
1683 }
1684
1685 static void
1686 rfs4_dss_remove_leaf(rfs4_servinst_t *sip, char *dir_leaf, char *leaf)
1687 {
1688 int i, npaths = sip->dss_npaths;
1689
1690 for (i = 0; i < npaths; i++) {
1691 rfs4_dss_path_t *dss_path = sip->dss_paths[i];
1692 char *path, *dir;
1693 size_t pathlen;
1694
1695 /* the HA-NFSv4 path might have been failed-over away from us */
1696 if (dss_path == NULL)
1697 continue;
1698
1699 dir = dss_path->path;
1700
1701 /* allow 3 extra bytes for two '/' & a NUL */
1702 pathlen = strlen(dir) + strlen(dir_leaf) + strlen(leaf) + 3;
1730 if (cp->rc_ss_remove)
1731 rfs4_dss_remove_cpleaf(cp);
1732 rfs4_ss_pnfree(cp->rc_ss_pn);
1733 }
1734
1735 /* Free the client supplied client id */
1736 kmem_free(cp->rc_nfs_client.id_val, cp->rc_nfs_client.id_len);
1737
1738 if (cp->rc_sysidt != LM_NOSYSID)
1739 lm_free_sysidt(cp->rc_sysidt);
1740 }
1741
1742 static bool_t
1743 rfs4_client_create(rfs4_entry_t u_entry, void *arg)
1744 {
1745 rfs4_client_t *cp = (rfs4_client_t *)u_entry;
1746 nfs_client_id4 *client = (nfs_client_id4 *)arg;
1747 struct sockaddr *ca;
1748 cid *cidp;
1749 scid_confirm_verf *scvp;
1750 nfs4_srv_t *nsrv4;
1751
1752 nsrv4 = nfs4_get_srv();
1753
1754 /* Get a clientid to give to the client */
1755 cidp = (cid *)&cp->rc_clientid;
1756 cidp->impl_id.start_time = nsrv4->rfs4_start_time;
1757 cidp->impl_id.c_id = (uint32_t)rfs4_dbe_getid(cp->rc_dbe);
1758
1759 /* If we are booted as a cluster node, embed our nodeid */
1760 if (cluster_bootflags & CLUSTER_BOOTED)
1761 embed_nodeid(cidp);
1762
1763 /* Allocate and copy client's client id value */
1764 cp->rc_nfs_client.id_val = kmem_alloc(client->id_len, KM_SLEEP);
1765 cp->rc_nfs_client.id_len = client->id_len;
1766 bcopy(client->id_val, cp->rc_nfs_client.id_val, client->id_len);
1767 cp->rc_nfs_client.verifier = client->verifier;
1768
1769 /* Copy client's IP address */
1770 ca = client->cl_addr;
1771 if (ca->sa_family == AF_INET)
1772 bcopy(ca, &cp->rc_addr, sizeof (struct sockaddr_in));
1773 else if (ca->sa_family == AF_INET6)
1774 bcopy(ca, &cp->rc_addr, sizeof (struct sockaddr_in6));
1775 cp->rc_nfs_client.cl_addr = (struct sockaddr *)&cp->rc_addr;
1776
1794
1795 cp->rc_cr_set = NULL;
1796
1797 cp->rc_sysidt = LM_NOSYSID;
1798
1799 list_create(&cp->rc_openownerlist, sizeof (rfs4_openowner_t),
1800 offsetof(rfs4_openowner_t, ro_node));
1801
1802 /* set up the callback control structure */
1803 cp->rc_cbinfo.cb_state = CB_UNINIT;
1804 mutex_init(cp->rc_cbinfo.cb_lock, NULL, MUTEX_DEFAULT, NULL);
1805 cv_init(cp->rc_cbinfo.cb_cv, NULL, CV_DEFAULT, NULL);
1806 cv_init(cp->rc_cbinfo.cb_cv_nullcaller, NULL, CV_DEFAULT, NULL);
1807
1808 /*
1809 * Associate the client_t with the current server instance.
1810 * The hold is solely to satisfy the calling requirement of
1811 * rfs4_servinst_assign(). In this case it's not strictly necessary.
1812 */
1813 rfs4_dbe_hold(cp->rc_dbe);
1814 rfs4_servinst_assign(nsrv4, cp, nsrv4->nfs4_cur_servinst);
1815 rfs4_dbe_rele(cp->rc_dbe);
1816
1817 return (TRUE);
1818 }
1819
1820 /*
1821 * Caller wants to generate/update the setclientid_confirm verifier
1822 * associated with a client. This is done during the SETCLIENTID
1823 * processing.
1824 */
1825 void
1826 rfs4_client_scv_next(rfs4_client_t *cp)
1827 {
1828 scid_confirm_verf *scvp;
1829
1830 /* Init the value for the SETCLIENTID_CONFIRM verifier */
1831 scvp = (scid_confirm_verf *)&cp->rc_confirm_verf;
1832 scvp->cv_impl.gen_num++;
1833 }
1834
1835 void
1836 rfs4_client_rele(rfs4_client_t *cp)
1837 {
1838 rfs4_dbe_rele(cp->rc_dbe);
1839 }
1840
1841 rfs4_client_t *
1842 rfs4_findclient(nfs_client_id4 *client, bool_t *create, rfs4_client_t *oldcp)
1843 {
1844 rfs4_client_t *cp;
1845 nfs4_srv_t *nsrv4;
1846 nsrv4 = nfs4_get_srv();
1847
1848
1849 if (oldcp) {
1850 rw_enter(&nsrv4->rfs4_findclient_lock, RW_WRITER);
1851 rfs4_dbe_hide(oldcp->rc_dbe);
1852 } else {
1853 rw_enter(&nsrv4->rfs4_findclient_lock, RW_READER);
1854 }
1855
1856 cp = (rfs4_client_t *)rfs4_dbsearch(nsrv4->rfs4_nfsclnt_idx, client,
1857 create, (void *)client, RFS4_DBS_VALID);
1858
1859 if (oldcp)
1860 rfs4_dbe_unhide(oldcp->rc_dbe);
1861
1862 rw_exit(&nsrv4->rfs4_findclient_lock);
1863
1864 return (cp);
1865 }
1866
1867 rfs4_client_t *
1868 rfs4_findclient_by_id(clientid4 clientid, bool_t find_unconfirmed)
1869 {
1870 rfs4_client_t *cp;
1871 bool_t create = FALSE;
1872 cid *cidp = (cid *)&clientid;
1873 nfs4_srv_t *nsrv4 = nfs4_get_srv();
1874
1875 /* If we're a cluster and the nodeid isn't right, short-circuit */
1876 if (cluster_bootflags & CLUSTER_BOOTED && foreign_clientid(cidp))
1877 return (NULL);
1878
1879 rw_enter(&nsrv4->rfs4_findclient_lock, RW_READER);
1880
1881 cp = (rfs4_client_t *)rfs4_dbsearch(nsrv4->rfs4_clientid_idx, &clientid,
1882 &create, NULL, RFS4_DBS_VALID);
1883
1884 rw_exit(&nsrv4->rfs4_findclient_lock);
1885
1886 if (cp && cp->rc_need_confirm && find_unconfirmed == FALSE) {
1887 rfs4_client_rele(cp);
1888 return (NULL);
1889 } else {
1890 return (cp);
1891 }
1892 }
1893
1894 static uint32_t
1895 clntip_hash(void *key)
1896 {
1897 struct sockaddr *addr = key;
1898 int i, len = 0;
1899 uint32_t hash = 0;
1900 char *ptr;
1901
1902 if (addr->sa_family == AF_INET) {
1903 struct sockaddr_in *a = (struct sockaddr_in *)addr;
1904 len = sizeof (struct in_addr);
1972 {
1973 rfs4_clntip_t *cp = (rfs4_clntip_t *)u_entry;
1974 struct sockaddr *ca = (struct sockaddr *)arg;
1975
1976 /* Copy client's IP address */
1977 if (ca->sa_family == AF_INET)
1978 bcopy(ca, &cp->ri_addr, sizeof (struct sockaddr_in));
1979 else if (ca->sa_family == AF_INET6)
1980 bcopy(ca, &cp->ri_addr, sizeof (struct sockaddr_in6));
1981 else
1982 return (FALSE);
1983 cp->ri_no_referrals = 1;
1984
1985 return (TRUE);
1986 }
1987
1988 rfs4_clntip_t *
1989 rfs4_find_clntip(struct sockaddr *addr, bool_t *create)
1990 {
1991 rfs4_clntip_t *cp;
1992 nfs4_srv_t *nsrv4;
1993
1994 nsrv4 = nfs4_get_srv();
1995
1996 rw_enter(&nsrv4->rfs4_findclient_lock, RW_READER);
1997
1998 cp = (rfs4_clntip_t *)rfs4_dbsearch(nsrv4->rfs4_clntip_idx, addr,
1999 create, addr, RFS4_DBS_VALID);
2000
2001 rw_exit(&nsrv4->rfs4_findclient_lock);
2002
2003 return (cp);
2004 }
2005
2006 void
2007 rfs4_invalidate_clntip(struct sockaddr *addr)
2008 {
2009 rfs4_clntip_t *cp;
2010 bool_t create = FALSE;
2011 nfs4_srv_t *nsrv4 = nfs4_get_srv();
2012
2013 rw_enter(&nsrv4->rfs4_findclient_lock, RW_READER);
2014
2015 cp = (rfs4_clntip_t *)rfs4_dbsearch(nsrv4->rfs4_clntip_idx, addr,
2016 &create, NULL, RFS4_DBS_VALID);
2017 if (cp == NULL) {
2018 rw_exit(&nsrv4->rfs4_findclient_lock);
2019 return;
2020 }
2021 rfs4_dbe_invalidate(cp->ri_dbe);
2022 rfs4_dbe_rele(cp->ri_dbe);
2023
2024 rw_exit(&nsrv4->rfs4_findclient_lock);
2025 }
2026
2027 bool_t
2028 rfs4_lease_expired(rfs4_client_t *cp)
2029 {
2030 bool_t rc;
2031
2032 rfs4_dbe_lock(cp->rc_dbe);
2033
2034 /*
2035 * If the admin has executed clear_locks for this
2036 * client id, force expire will be set, so no need
2037 * to calculate anything because it's "outa here".
2038 */
2039 if (cp->rc_forced_expire) {
2040 rc = TRUE;
2041 } else {
2042 rc = (gethrestime_sec() - cp->rc_last_access > rfs4_lease_time);
2043 }
2044
2152
2153 /* Free the lock owner id */
2154 kmem_free(oo->ro_owner.owner_val, oo->ro_owner.owner_len);
2155 }
2156
2157 void
2158 rfs4_openowner_rele(rfs4_openowner_t *oo)
2159 {
2160 rfs4_dbe_rele(oo->ro_dbe);
2161 }
2162
2163 static bool_t
2164 rfs4_openowner_create(rfs4_entry_t u_entry, void *arg)
2165 {
2166 rfs4_openowner_t *oo = (rfs4_openowner_t *)u_entry;
2167 rfs4_openowner_t *argp = (rfs4_openowner_t *)arg;
2168 open_owner4 *openowner = &argp->ro_owner;
2169 seqid4 seqid = argp->ro_open_seqid;
2170 rfs4_client_t *cp;
2171 bool_t create = FALSE;
2172 nfs4_srv_t *nsrv4 = nfs4_get_srv();
2173
2174 rw_enter(&nsrv4->rfs4_findclient_lock, RW_READER);
2175
2176 cp = (rfs4_client_t *)rfs4_dbsearch(nsrv4->rfs4_clientid_idx,
2177 &openowner->clientid,
2178 &create, NULL, RFS4_DBS_VALID);
2179
2180 rw_exit(&nsrv4->rfs4_findclient_lock);
2181
2182 if (cp == NULL)
2183 return (FALSE);
2184
2185 oo->ro_reply_fh.nfs_fh4_len = 0;
2186 oo->ro_reply_fh.nfs_fh4_val = NULL;
2187
2188 oo->ro_owner.clientid = openowner->clientid;
2189 oo->ro_owner.owner_val =
2190 kmem_alloc(openowner->owner_len, KM_SLEEP);
2191
2192 bcopy(openowner->owner_val,
2193 oo->ro_owner.owner_val, openowner->owner_len);
2194
2195 oo->ro_owner.owner_len = openowner->owner_len;
2196
2197 oo->ro_need_confirm = TRUE;
2198
2199 rfs4_sw_init(&oo->ro_sw);
2200
2202 bzero(&oo->ro_reply, sizeof (nfs_resop4));
2203 oo->ro_client = cp;
2204 oo->ro_cr_set = NULL;
2205
2206 list_create(&oo->ro_statelist, sizeof (rfs4_state_t),
2207 offsetof(rfs4_state_t, rs_node));
2208
2209 /* Insert openowner into client's open owner list */
2210 rfs4_dbe_lock(cp->rc_dbe);
2211 list_insert_tail(&cp->rc_openownerlist, oo);
2212 rfs4_dbe_unlock(cp->rc_dbe);
2213
2214 return (TRUE);
2215 }
2216
2217 rfs4_openowner_t *
2218 rfs4_findopenowner(open_owner4 *openowner, bool_t *create, seqid4 seqid)
2219 {
2220 rfs4_openowner_t *oo;
2221 rfs4_openowner_t arg;
2222 nfs4_srv_t *nsrv4 = nfs4_get_srv();
2223
2224 arg.ro_owner = *openowner;
2225 arg.ro_open_seqid = seqid;
2226 /* CSTYLED */
2227 oo = (rfs4_openowner_t *)rfs4_dbsearch(nsrv4->rfs4_openowner_idx, openowner,
2228 create, &arg, RFS4_DBS_VALID);
2229
2230 return (oo);
2231 }
2232
2233 void
2234 rfs4_update_open_sequence(rfs4_openowner_t *oo)
2235 {
2236
2237 rfs4_dbe_lock(oo->ro_dbe);
2238
2239 oo->ro_open_seqid++;
2240
2241 rfs4_dbe_unlock(oo->ro_dbe);
2242 }
2243
2244 void
2245 rfs4_update_open_resp(rfs4_openowner_t *oo, nfs_resop4 *resp, nfs_fh4 *fh)
2246 {
2247
2350 }
2351
2352 /* ARGSUSED */
2353 static bool_t
2354 rfs4_lockowner_expiry(rfs4_entry_t u_entry)
2355 {
2356 /*
2357 * Since expiry is called with no other references on
2358 * this struct, go ahead and have it removed.
2359 */
2360 return (TRUE);
2361 }
2362
2363 static bool_t
2364 rfs4_lockowner_create(rfs4_entry_t u_entry, void *arg)
2365 {
2366 rfs4_lockowner_t *lo = (rfs4_lockowner_t *)u_entry;
2367 lock_owner4 *lockowner = (lock_owner4 *)arg;
2368 rfs4_client_t *cp;
2369 bool_t create = FALSE;
2370 nfs4_srv_t *nsrv4 = nfs4_get_srv();
2371
2372 rw_enter(&nsrv4->rfs4_findclient_lock, RW_READER);
2373
2374 cp = (rfs4_client_t *)rfs4_dbsearch(nsrv4->rfs4_clientid_idx,
2375 &lockowner->clientid,
2376 &create, NULL, RFS4_DBS_VALID);
2377
2378 rw_exit(&nsrv4->rfs4_findclient_lock);
2379
2380 if (cp == NULL)
2381 return (FALSE);
2382
2383 /* Reference client */
2384 lo->rl_client = cp;
2385 lo->rl_owner.clientid = lockowner->clientid;
2386 lo->rl_owner.owner_val = kmem_alloc(lockowner->owner_len, KM_SLEEP);
2387 bcopy(lockowner->owner_val, lo->rl_owner.owner_val,
2388 lockowner->owner_len);
2389 lo->rl_owner.owner_len = lockowner->owner_len;
2390 lo->rl_pid = rfs4_dbe_getid(lo->rl_dbe);
2391
2392 return (TRUE);
2393 }
2394
2395 rfs4_lockowner_t *
2396 rfs4_findlockowner(lock_owner4 *lockowner, bool_t *create)
2397 {
2398 rfs4_lockowner_t *lo;
2399 nfs4_srv_t *nsrv4 = nfs4_get_srv();
2400
2401 /* CSTYLED */
2402 lo = (rfs4_lockowner_t *)rfs4_dbsearch(nsrv4->rfs4_lockowner_idx, lockowner,
2403 create, lockowner, RFS4_DBS_VALID);
2404
2405 return (lo);
2406 }
2407
2408 rfs4_lockowner_t *
2409 rfs4_findlockowner_by_pid(pid_t pid)
2410 {
2411 rfs4_lockowner_t *lo;
2412 bool_t create = FALSE;
2413 nfs4_srv_t *nsrv4 = nfs4_get_srv();
2414
2415 lo = (rfs4_lockowner_t *)rfs4_dbsearch(nsrv4->rfs4_lockowner_pid_idx,
2416 (void *)(uintptr_t)pid, &create, NULL, RFS4_DBS_VALID);
2417
2418 return (lo);
2419 }
2420
2421
2422 static uint32_t
2423 file_hash(void *key)
2424 {
2425 return (ADDRHASH(key));
2426 }
2427
2428 static void *
2429 file_mkkey(rfs4_entry_t u_entry)
2430 {
2431 rfs4_file_t *fp = (rfs4_file_t *)u_entry;
2432
2433 return (fp->rf_vp);
2434 }
2435
2506
2507 mutex_init(fp->rf_dinfo.rd_recall_lock, NULL, MUTEX_DEFAULT, NULL);
2508 cv_init(fp->rf_dinfo.rd_recall_cv, NULL, CV_DEFAULT, NULL);
2509
2510 fp->rf_dinfo.rd_dtype = OPEN_DELEGATE_NONE;
2511
2512 rw_init(&fp->rf_file_rwlock, NULL, RW_DEFAULT, NULL);
2513
2514 mutex_enter(&vp->v_vsd_lock);
2515 VERIFY(vsd_set(vp, nfs4_srv_vkey, (void *)fp) == 0);
2516 mutex_exit(&vp->v_vsd_lock);
2517
2518 return (TRUE);
2519 }
2520
2521 rfs4_file_t *
2522 rfs4_findfile(vnode_t *vp, nfs_fh4 *fh, bool_t *create)
2523 {
2524 rfs4_file_t *fp;
2525 rfs4_fcreate_arg arg;
2526 nfs4_srv_t *nsrv4 = nfs4_get_srv();
2527
2528 arg.vp = vp;
2529 arg.fh = fh;
2530
2531 if (*create == TRUE)
2532 /* CSTYLED */
2533 fp = (rfs4_file_t *)rfs4_dbsearch(nsrv4->rfs4_file_idx, vp, create,
2534 &arg, RFS4_DBS_VALID);
2535 else {
2536 mutex_enter(&vp->v_vsd_lock);
2537 fp = (rfs4_file_t *)vsd_get(vp, nfs4_srv_vkey);
2538 if (fp) {
2539 rfs4_dbe_lock(fp->rf_dbe);
2540 if (rfs4_dbe_is_invalid(fp->rf_dbe) ||
2541 (rfs4_dbe_refcnt(fp->rf_dbe) == 0)) {
2542 rfs4_dbe_unlock(fp->rf_dbe);
2543 fp = NULL;
2544 } else {
2545 rfs4_dbe_hold(fp->rf_dbe);
2546 rfs4_dbe_unlock(fp->rf_dbe);
2547 }
2548 }
2549 mutex_exit(&vp->v_vsd_lock);
2550 }
2551 return (fp);
2552 }
2553
2554 /*
2555 * Find a file in the db and once it is located, take the rw lock.
2556 * Need to check the vnode pointer and if it does not exist (it was
2557 * removed between the db location and check) redo the find. This
2558 * assumes that a file struct that has a NULL vnode pointer is marked
2559 * at 'invalid' and will not be found in the db the second time
2560 * around.
2561 */
2562 rfs4_file_t *
2563 rfs4_findfile_withlock(vnode_t *vp, nfs_fh4 *fh, bool_t *create)
2564 {
2565 rfs4_file_t *fp;
2566 rfs4_fcreate_arg arg;
2567 bool_t screate = *create;
2568 nfs4_srv_t *nsrv4 = nfs4_get_srv();
2569
2570 if (screate == FALSE) {
2571 mutex_enter(&vp->v_vsd_lock);
2572 fp = (rfs4_file_t *)vsd_get(vp, nfs4_srv_vkey);
2573 if (fp) {
2574 rfs4_dbe_lock(fp->rf_dbe);
2575 if (rfs4_dbe_is_invalid(fp->rf_dbe) ||
2576 (rfs4_dbe_refcnt(fp->rf_dbe) == 0)) {
2577 rfs4_dbe_unlock(fp->rf_dbe);
2578 mutex_exit(&vp->v_vsd_lock);
2579 fp = NULL;
2580 } else {
2581 rfs4_dbe_hold(fp->rf_dbe);
2582 rfs4_dbe_unlock(fp->rf_dbe);
2583 mutex_exit(&vp->v_vsd_lock);
2584 rw_enter(&fp->rf_file_rwlock, RW_WRITER);
2585 if (fp->rf_vp == NULL) {
2586 rw_exit(&fp->rf_file_rwlock);
2587 rfs4_file_rele(fp);
2588 fp = NULL;
2589 }
2590 }
2591 } else {
2592 mutex_exit(&vp->v_vsd_lock);
2593 }
2594 } else {
2595 retry:
2596 arg.vp = vp;
2597 arg.fh = fh;
2598
2599 fp = (rfs4_file_t *)rfs4_dbsearch(nsrv4->rfs4_file_idx, vp,
2600 create, &arg, RFS4_DBS_VALID);
2601 if (fp != NULL) {
2602 rw_enter(&fp->rf_file_rwlock, RW_WRITER);
2603 if (fp->rf_vp == NULL) {
2604 rw_exit(&fp->rf_file_rwlock);
2605 rfs4_file_rele(fp);
2606 *create = screate;
2607 goto retry;
2608 }
2609 }
2610 }
2611
2612 return (fp);
2613 }
2614
2615 static uint32_t
2616 lo_state_hash(void *key)
2617 {
2618 stateid_t *id = key;
2619
2620 return (id->bits.ident+id->bits.pid);
2735 list_insert_tail(&sp->rs_lostatelist, lsp);
2736 rfs4_dbe_hold(sp->rs_dbe);
2737 rfs4_dbe_unlock(sp->rs_dbe);
2738
2739 return (TRUE);
2740 }
2741
2742 void
2743 rfs4_lo_state_rele(rfs4_lo_state_t *lsp, bool_t unlock_fp)
2744 {
2745 if (unlock_fp == TRUE)
2746 rw_exit(&lsp->rls_state->rs_finfo->rf_file_rwlock);
2747 rfs4_dbe_rele(lsp->rls_dbe);
2748 }
2749
2750 static rfs4_lo_state_t *
2751 rfs4_findlo_state(stateid_t *id, bool_t lock_fp)
2752 {
2753 rfs4_lo_state_t *lsp;
2754 bool_t create = FALSE;
2755 nfs4_srv_t *nsrv4 = nfs4_get_srv();
2756
2757 lsp = (rfs4_lo_state_t *)rfs4_dbsearch(nsrv4->rfs4_lo_state_idx, id,
2758 &create, NULL, RFS4_DBS_VALID);
2759 if (lock_fp == TRUE && lsp != NULL)
2760 rw_enter(&lsp->rls_state->rs_finfo->rf_file_rwlock, RW_READER);
2761
2762 return (lsp);
2763 }
2764
2765
2766 static uint32_t
2767 lo_state_lo_hash(void *key)
2768 {
2769 rfs4_lo_state_t *lsp = key;
2770
2771 return (ADDRHASH(lsp->rls_locker) ^ ADDRHASH(lsp->rls_state));
2772 }
2773
2774 static bool_t
2775 lo_state_lo_compare(rfs4_entry_t u_entry, void *key)
2776 {
2777 rfs4_lo_state_t *lsp = (rfs4_lo_state_t *)u_entry;
2778 rfs4_lo_state_t *keyp = key;
2779
2780 return (keyp->rls_locker == lsp->rls_locker &&
2781 keyp->rls_state == lsp->rls_state);
2782 }
2783
2784 static void *
2785 lo_state_lo_mkkey(rfs4_entry_t u_entry)
2786 {
2787 return (u_entry);
2788 }
2789
2790 rfs4_lo_state_t *
2791 rfs4_findlo_state_by_owner(rfs4_lockowner_t *lo, rfs4_state_t *sp,
2792 bool_t *create)
2793 {
2794 rfs4_lo_state_t *lsp;
2795 rfs4_lo_state_t arg;
2796 nfs4_srv_t *nsrv4 = nfs4_get_srv();
2797
2798 arg.rls_locker = lo;
2799 arg.rls_state = sp;
2800
2801 lsp = (rfs4_lo_state_t *)rfs4_dbsearch(nsrv4->rfs4_lo_state_owner_idx,
2802 &arg, create, &arg, RFS4_DBS_VALID);
2803
2804 return (lsp);
2805 }
2806
2807 static stateid_t
2808 get_stateid(id_t eid)
2809 {
2810 stateid_t id;
2811 nfs4_srv_t *nsrv4;
2812
2813 nsrv4 = nfs4_get_srv();
2814
2815 id.bits.boottime = nsrv4->rfs4_start_time;
2816 id.bits.ident = eid;
2817 id.bits.chgseq = 0;
2818 id.bits.type = 0;
2819 id.bits.pid = 0;
2820
2821 /*
2822 * If we are booted as a cluster node, embed our nodeid.
2823 * We've already done sanity checks in rfs4_client_create() so no
2824 * need to repeat them here.
2825 */
2826 id.bits.clnodeid = (cluster_bootflags & CLUSTER_BOOTED) ?
2827 clconf_get_nodeid() : 0;
2828
2829 return (id);
2830 }
2831
2832 /*
2833 * For use only when booted as a cluster node.
2834 * Returns TRUE if the embedded nodeid indicates that this stateid was
2835 * generated on another node.
3051 rfs4_deleg_state_destroy(rfs4_entry_t u_entry)
3052 {
3053 rfs4_deleg_state_t *dsp = (rfs4_deleg_state_t *)u_entry;
3054
3055 /* return delegation if necessary */
3056 rfs4_return_deleg(dsp, FALSE);
3057
3058 /* Were done with the file */
3059 rfs4_file_rele(dsp->rds_finfo);
3060 dsp->rds_finfo = NULL;
3061
3062 /* And now with the openowner */
3063 rfs4_client_rele(dsp->rds_client);
3064 dsp->rds_client = NULL;
3065 }
3066
3067 rfs4_deleg_state_t *
3068 rfs4_finddeleg(rfs4_state_t *sp, bool_t *create)
3069 {
3070 rfs4_deleg_state_t ds, *dsp;
3071 nfs4_srv_t *nsrv4 = nfs4_get_srv();
3072
3073 ds.rds_client = sp->rs_owner->ro_client;
3074 ds.rds_finfo = sp->rs_finfo;
3075
3076 dsp = (rfs4_deleg_state_t *)rfs4_dbsearch(nsrv4->rfs4_deleg_idx, &ds,
3077 create, &ds, RFS4_DBS_VALID);
3078
3079 return (dsp);
3080 }
3081
3082 rfs4_deleg_state_t *
3083 rfs4_finddelegstate(stateid_t *id)
3084 {
3085 rfs4_deleg_state_t *dsp;
3086 bool_t create = FALSE;
3087 nfs4_srv_t *nsrv4 = nfs4_get_srv();
3088
3089 dsp = (rfs4_deleg_state_t *)rfs4_dbsearch(nsrv4->rfs4_deleg_state_idx,
3090 id, &create, NULL, RFS4_DBS_VALID);
3091
3092 return (dsp);
3093 }
3094
3095 void
3096 rfs4_deleg_state_rele(rfs4_deleg_state_t *dsp)
3097 {
3098 rfs4_dbe_rele(dsp->rds_dbe);
3099 }
3100
3101 void
3102 rfs4_update_lock_sequence(rfs4_lo_state_t *lsp)
3103 {
3104
3105 rfs4_dbe_lock(lsp->rls_dbe);
3106
3107 /*
3108 * If we are skipping sequence id checking, this means that
3109 * this is the first lock request and therefore the sequence
3110 * id does not need to be updated. This only happens on the
3189 if (sp->rs_closed == TRUE)
3190 return (FALSE);
3191
3192 return (fp == sp->rs_finfo);
3193 }
3194
3195 static void *
3196 state_file_mkkey(rfs4_entry_t u_entry)
3197 {
3198 rfs4_state_t *sp = (rfs4_state_t *)u_entry;
3199
3200 return (sp->rs_finfo);
3201 }
3202
3203 rfs4_state_t *
3204 rfs4_findstate_by_owner_file(rfs4_openowner_t *oo, rfs4_file_t *fp,
3205 bool_t *create)
3206 {
3207 rfs4_state_t *sp;
3208 rfs4_state_t key;
3209 nfs4_srv_t *nsrv4 = nfs4_get_srv();
3210
3211 key.rs_owner = oo;
3212 key.rs_finfo = fp;
3213
3214 sp = (rfs4_state_t *)rfs4_dbsearch(nsrv4->rfs4_state_owner_file_idx,
3215 &key, create, &key, RFS4_DBS_VALID);
3216
3217 return (sp);
3218 }
3219
3220 /* This returns ANY state struct that refers to this file */
3221 static rfs4_state_t *
3222 rfs4_findstate_by_file(rfs4_file_t *fp)
3223 {
3224 bool_t create = FALSE;
3225 nfs4_srv_t *nsrv4 = nfs4_get_srv();
3226
3227 return ((rfs4_state_t *)rfs4_dbsearch(nsrv4->rfs4_state_file_idx, fp,
3228 &create, fp, RFS4_DBS_VALID));
3229 }
3230
3231 static bool_t
3232 rfs4_state_expiry(rfs4_entry_t u_entry)
3233 {
3234 rfs4_state_t *sp = (rfs4_state_t *)u_entry;
3235
3236 if (rfs4_dbe_is_invalid(sp->rs_dbe))
3237 return (TRUE);
3238
3239 if (sp->rs_closed == TRUE &&
3240 ((gethrestime_sec() - rfs4_dbe_get_timerele(sp->rs_dbe))
3241 > rfs4_lease_time))
3242 return (TRUE);
3243
3244 return ((gethrestime_sec() - sp->rs_owner->ro_client->rc_last_access
3245 > rfs4_lease_time));
3246 }
3247
3258 sp->rs_stateid.bits.type = OPENID;
3259 sp->rs_owner = oo;
3260 sp->rs_finfo = fp;
3261
3262 list_create(&sp->rs_lostatelist, sizeof (rfs4_lo_state_t),
3263 offsetof(rfs4_lo_state_t, rls_node));
3264
3265 /* Insert state on per open owner's list */
3266 rfs4_dbe_lock(oo->ro_dbe);
3267 list_insert_tail(&oo->ro_statelist, sp);
3268 rfs4_dbe_unlock(oo->ro_dbe);
3269
3270 return (TRUE);
3271 }
3272
3273 static rfs4_state_t *
3274 rfs4_findstate(stateid_t *id, rfs4_dbsearch_type_t find_invalid, bool_t lock_fp)
3275 {
3276 rfs4_state_t *sp;
3277 bool_t create = FALSE;
3278 nfs4_srv_t *nsrv4 = nfs4_get_srv();
3279
3280 sp = (rfs4_state_t *)rfs4_dbsearch(nsrv4->rfs4_state_idx, id,
3281 &create, NULL, find_invalid);
3282 if (lock_fp == TRUE && sp != NULL)
3283 rw_enter(&sp->rs_finfo->rf_file_rwlock, RW_READER);
3284
3285 return (sp);
3286 }
3287
3288 void
3289 rfs4_state_close(rfs4_state_t *sp, bool_t lock_held, bool_t close_of_client,
3290 cred_t *cr)
3291 {
3292 /* Remove the associated lo_state owners */
3293 if (!lock_held)
3294 rfs4_dbe_lock(sp->rs_dbe);
3295
3296 /*
3297 * If refcnt == 0, the dbe is about to be destroyed.
3298 * lock state will be released by the reaper thread.
3299 */
3300
3328 }
3329
3330 void
3331 rfs4_client_close(rfs4_client_t *cp)
3332 {
3333 /* Mark client as going away. */
3334 rfs4_dbe_lock(cp->rc_dbe);
3335 rfs4_dbe_invalidate(cp->rc_dbe);
3336 rfs4_dbe_unlock(cp->rc_dbe);
3337
3338 rfs4_client_state_remove(cp);
3339
3340 /* Release the client */
3341 rfs4_client_rele(cp);
3342 }
3343
3344 nfsstat4
3345 rfs4_check_clientid(clientid4 *cp, int setclid_confirm)
3346 {
3347 cid *cidp = (cid *) cp;
3348 nfs4_srv_t *nsrv4;
3349
3350 nsrv4 = nfs4_get_srv();
3351
3352 /*
3353 * If we are booted as a cluster node, check the embedded nodeid.
3354 * If it indicates that this clientid was generated on another node,
3355 * inform the client accordingly.
3356 */
3357 if (cluster_bootflags & CLUSTER_BOOTED && foreign_clientid(cidp))
3358 return (NFS4ERR_STALE_CLIENTID);
3359
3360 /*
3361 * If the server start time matches the time provided
3362 * by the client (via the clientid) and this is NOT a
3363 * setclientid_confirm then return EXPIRED.
3364 */
3365 if (!setclid_confirm &&
3366 cidp->impl_id.start_time == nsrv4->rfs4_start_time)
3367 return (NFS4ERR_EXPIRED);
3368
3369 return (NFS4ERR_STALE_CLIENTID);
3370 }
3371
3372 /*
3373 * This is used when a stateid has not been found amongst the
3374 * current server's state. Check the stateid to see if it
3375 * was from this server instantiation or not.
3376 */
3377 static nfsstat4
3378 what_stateid_error(stateid_t *id, stateid_type_t type)
3379 {
3380 nfs4_srv_t *nsrv4;
3381
3382 nsrv4 = nfs4_get_srv();
3383
3384 /* If we are booted as a cluster node, was stateid locally generated? */
3385 if ((cluster_bootflags & CLUSTER_BOOTED) && foreign_stateid(id))
3386 return (NFS4ERR_STALE_STATEID);
3387
3388 /* If types don't match then no use checking further */
3389 if (type != id->bits.type)
3390 return (NFS4ERR_BAD_STATEID);
3391
3392 /* From a different server instantiation, return STALE */
3393 if (id->bits.boottime != nsrv4->rfs4_start_time)
3394 return (NFS4ERR_STALE_STATEID);
3395
3396 /*
3397 * From this server but the state is most likely beyond lease
3398 * timeout: return NFS4ERR_EXPIRED. However, there is the
3399 * case of a delegation stateid. For delegations, there is a
3400 * case where the state can be removed without the client's
3401 * knowledge/consent: revocation. In the case of delegation
3402 * revocation, the delegation state will be removed and will
3403 * not be found. If the client does something like a
3404 * DELEGRETURN or even a READ/WRITE with a delegatoin stateid
3405 * that has been revoked, the server should return BAD_STATEID
3406 * instead of the more common EXPIRED error.
3407 */
3408 if (id->bits.boottime == nsrv4->rfs4_start_time) {
3409 if (type == DELEGID)
3410 return (NFS4ERR_BAD_STATEID);
3411 else
3412 return (NFS4ERR_EXPIRED);
3413 }
3414
3415 return (NFS4ERR_BAD_STATEID);
3416 }
3417
3418 /*
3419 * Used later on to find the various state structs. When called from
3420 * rfs4_check_stateid()->rfs4_get_all_state(), no file struct lock is
3421 * taken (it is not needed) and helps on the read/write path with
3422 * respect to performance.
3423 */
3424 static nfsstat4
3425 rfs4_get_state_lockit(stateid4 *stateid, rfs4_state_t **spp,
3426 rfs4_dbsearch_type_t find_invalid, bool_t lock_fp)
3427 {
3428 stateid_t *id = (stateid_t *)stateid;
3890
3891 /*
3892 * This is a special function in that for the file struct provided the
3893 * server wants to remove/close all current state associated with the
3894 * file. The prime use of this would be with OP_REMOVE to force the
3895 * release of state and particularly of file locks.
3896 *
3897 * There is an assumption that there is no delegations outstanding on
3898 * this file at this point. The caller should have waited for those
3899 * to be returned or revoked.
3900 */
3901 void
3902 rfs4_close_all_state(rfs4_file_t *fp)
3903 {
3904 rfs4_state_t *sp;
3905
3906 rfs4_dbe_lock(fp->rf_dbe);
3907
3908 #ifdef DEBUG
3909 /* only applies when server is handing out delegations */
3910 if (nfs4_get_deleg_policy() != SRV_NEVER_DELEGATE)
3911 ASSERT(fp->rf_dinfo.rd_hold_grant > 0);
3912 #endif
3913
3914 /* No delegations for this file */
3915 ASSERT(list_is_empty(&fp->rf_delegstatelist));
3916
3917 /* Make sure that it can not be found */
3918 rfs4_dbe_invalidate(fp->rf_dbe);
3919
3920 if (fp->rf_vp == NULL) {
3921 rfs4_dbe_unlock(fp->rf_dbe);
3922 return;
3923 }
3924 rfs4_dbe_unlock(fp->rf_dbe);
3925
3926 /*
3927 * Hold as writer to prevent other server threads from
3928 * processing requests related to the file while all state is
3929 * being removed.
3930 */
4096 OPEN_DELEGATE_WRITE) {
4097 (void) fem_uninstall(vp, deleg_wrops,
4098 (void *)fp);
4099 vn_open_downgrade(vp, FREAD|FWRITE);
4100 }
4101 mutex_enter(&vp->v_vsd_lock);
4102 (void) vsd_set(vp, nfs4_srv_vkey, NULL);
4103 mutex_exit(&vp->v_vsd_lock);
4104 VN_RELE(vp);
4105 fp->rf_vp = NULL;
4106 }
4107 rfs4_dbe_invalidate(fp->rf_dbe);
4108 }
4109 }
4110
4111 /*
4112 * Given a directory that is being unexported, cleanup/release all
4113 * state in the server that refers to objects residing underneath this
4114 * particular export. The ordering of the release is important.
4115 * Lock_owner, then state and then file.
4116 *
4117 * NFS zones note: nfs_export.c:unexport() calls this from a
4118 * thread in the global zone for NGZ data structures, so we
4119 * CANNOT use zone_getspecific anywhere in this code path.
4120 */
4121 void
4122 rfs4_clean_state_exi(nfs_export_t *ne, struct exportinfo *exi)
4123 {
4124 nfs_globals_t *ng;
4125 nfs4_srv_t *nsrv4;
4126
4127 ng = ne->ne_globals;
4128 ASSERT(ng->nfs_zoneid == exi->exi_zoneid);
4129 nsrv4 = ng->nfs4_srv;
4130
4131 mutex_enter(&nsrv4->state_lock);
4132
4133 if (nsrv4->nfs4_server_state == NULL) {
4134 mutex_exit(&nsrv4->state_lock);
4135 return;
4136 }
4137
4138 rfs4_dbe_walk(nsrv4->rfs4_lo_state_tab,
4139 rfs4_lo_state_walk_callout, exi);
4140 rfs4_dbe_walk(nsrv4->rfs4_state_tab, rfs4_state_walk_callout, exi);
4141 rfs4_dbe_walk(nsrv4->rfs4_deleg_state_tab,
4142 rfs4_deleg_state_walk_callout, exi);
4143 rfs4_dbe_walk(nsrv4->rfs4_file_tab, rfs4_file_walk_callout, exi);
4144
4145 mutex_exit(&nsrv4->state_lock);
4146 }
|