1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
24 */
25
26 /*
27 * Copyright 2018 Nexenta Systems, Inc.
28 * Copyright 2019 Nexenta by DDN, Inc.
29 */
30
31 #include <sys/systm.h>
32 #include <sys/kmem.h>
33 #include <sys/cmn_err.h>
34 #include <sys/atomic.h>
35 #include <sys/clconf.h>
36 #include <sys/cladm.h>
37 #include <sys/flock.h>
38 #include <nfs/export.h>
39 #include <nfs/nfs.h>
40 #include <nfs/nfs4.h>
41 #include <nfs/nfssys.h>
42 #include <nfs/lm.h>
43 #include <sys/pathname.h>
44 #include <sys/sdt.h>
45 #include <sys/nvpair.h>
46
47 extern u_longlong_t nfs4_srv_caller_id;
48
49 extern uint_t nfs4_srv_vkey;
50
51 stateid4 special0 = {
52 0,
53 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }
54 };
55
56 stateid4 special1 = {
57 0xffffffff,
58 {
59 (char)0xff, (char)0xff, (char)0xff, (char)0xff,
60 (char)0xff, (char)0xff, (char)0xff, (char)0xff,
61 (char)0xff, (char)0xff, (char)0xff, (char)0xff
62 }
63 };
64
65
66 #define ISSPECIAL(id) (stateid4_cmp(id, &special0) || \
67 stateid4_cmp(id, &special1))
68
69 /* For embedding the cluster nodeid into our clientid */
70 #define CLUSTER_NODEID_SHIFT 24
71 #define CLUSTER_MAX_NODEID 255
72
73 #ifdef DEBUG
74 int rfs4_debug;
75 #endif
76
77 static uint32_t rfs4_database_debug = 0x00;
78
79 /* CSTYLED */
80 static void rfs4_ss_clid_write(nfs4_srv_t *nsrv4, rfs4_client_t *cp, char *leaf);
81 static void rfs4_ss_clid_write_one(rfs4_client_t *cp, char *dir, char *leaf);
82 static void rfs4_dss_clear_oldstate(rfs4_servinst_t *sip);
83 static void rfs4_ss_chkclid_sip(rfs4_client_t *cp, rfs4_servinst_t *sip);
84
85 /*
86 * Couple of simple init/destroy functions for a general waiter
87 */
88 void
89 rfs4_sw_init(rfs4_state_wait_t *swp)
90 {
91 mutex_init(swp->sw_cv_lock, NULL, MUTEX_DEFAULT, NULL);
92 cv_init(swp->sw_cv, NULL, CV_DEFAULT, NULL);
93 swp->sw_active = FALSE;
94 swp->sw_wait_count = 0;
95 }
96
97 void
98 rfs4_sw_destroy(rfs4_state_wait_t *swp)
99 {
100 mutex_destroy(swp->sw_cv_lock);
101 cv_destroy(swp->sw_cv);
102 }
103
104 void
105 rfs4_sw_enter(rfs4_state_wait_t *swp)
106 {
107 mutex_enter(swp->sw_cv_lock);
108 while (swp->sw_active) {
109 swp->sw_wait_count++;
110 cv_wait(swp->sw_cv, swp->sw_cv_lock);
111 swp->sw_wait_count--;
112 }
113 ASSERT(swp->sw_active == FALSE);
114 swp->sw_active = TRUE;
115 mutex_exit(swp->sw_cv_lock);
116 }
117
118 void
119 rfs4_sw_exit(rfs4_state_wait_t *swp)
120 {
121 mutex_enter(swp->sw_cv_lock);
122 ASSERT(swp->sw_active == TRUE);
123 swp->sw_active = FALSE;
124 if (swp->sw_wait_count != 0)
125 cv_broadcast(swp->sw_cv);
126 mutex_exit(swp->sw_cv_lock);
127 }
128
129 static void
130 deep_lock_copy(LOCK4res *dres, LOCK4res *sres)
131 {
132 lock_owner4 *slo = &sres->LOCK4res_u.denied.owner;
133 lock_owner4 *dlo = &dres->LOCK4res_u.denied.owner;
134
135 if (sres->status == NFS4ERR_DENIED) {
136 dlo->owner_val = kmem_alloc(slo->owner_len, KM_SLEEP);
137 bcopy(slo->owner_val, dlo->owner_val, slo->owner_len);
138 }
139 }
140
141 /*
142 * CPR callback id -- not related to v4 callbacks
143 */
144 static callb_id_t cpr_id = 0;
145
146 static void
147 deep_lock_free(LOCK4res *res)
148 {
149 lock_owner4 *lo = &res->LOCK4res_u.denied.owner;
150
151 if (res->status == NFS4ERR_DENIED)
152 kmem_free(lo->owner_val, lo->owner_len);
153 }
154
155 static void
156 deep_open_copy(OPEN4res *dres, OPEN4res *sres)
157 {
158 nfsace4 *sacep, *dacep;
159
160 if (sres->status != NFS4_OK) {
161 return;
162 }
163
164 dres->attrset = sres->attrset;
165
166 switch (sres->delegation.delegation_type) {
167 case OPEN_DELEGATE_NONE:
168 return;
169 case OPEN_DELEGATE_READ:
170 sacep = &sres->delegation.open_delegation4_u.read.permissions;
171 dacep = &dres->delegation.open_delegation4_u.read.permissions;
172 break;
173 case OPEN_DELEGATE_WRITE:
174 sacep = &sres->delegation.open_delegation4_u.write.permissions;
175 dacep = &dres->delegation.open_delegation4_u.write.permissions;
176 break;
177 }
178 dacep->who.utf8string_val =
179 kmem_alloc(sacep->who.utf8string_len, KM_SLEEP);
180 bcopy(sacep->who.utf8string_val, dacep->who.utf8string_val,
181 sacep->who.utf8string_len);
182 }
183
184 static void
185 deep_open_free(OPEN4res *res)
186 {
187 nfsace4 *acep;
188 if (res->status != NFS4_OK)
189 return;
190
191 switch (res->delegation.delegation_type) {
192 case OPEN_DELEGATE_NONE:
193 return;
194 case OPEN_DELEGATE_READ:
195 acep = &res->delegation.open_delegation4_u.read.permissions;
196 break;
197 case OPEN_DELEGATE_WRITE:
198 acep = &res->delegation.open_delegation4_u.write.permissions;
199 break;
200 }
201
202 if (acep->who.utf8string_val) {
203 kmem_free(acep->who.utf8string_val, acep->who.utf8string_len);
204 acep->who.utf8string_val = NULL;
205 }
206 }
207
208 void
209 rfs4_free_reply(nfs_resop4 *rp)
210 {
211 switch (rp->resop) {
212 case OP_LOCK:
213 deep_lock_free(&rp->nfs_resop4_u.oplock);
214 break;
215 case OP_OPEN:
216 deep_open_free(&rp->nfs_resop4_u.opopen);
217 default:
218 break;
219 }
220 }
221
222 void
223 rfs4_copy_reply(nfs_resop4 *dst, nfs_resop4 *src)
224 {
225 *dst = *src;
226
227 /* Handle responses that need deep copy */
228 switch (src->resop) {
229 case OP_LOCK:
230 deep_lock_copy(&dst->nfs_resop4_u.oplock,
231 &src->nfs_resop4_u.oplock);
232 break;
233 case OP_OPEN:
234 deep_open_copy(&dst->nfs_resop4_u.opopen,
235 &src->nfs_resop4_u.opopen);
236 break;
237 default:
238 break;
239 };
240 }
241
242 /*
243 * This is the implementation of the underlying state engine. The
244 * public interface to this engine is described by
245 * nfs4_state.h. Callers to the engine should hold no state engine
246 * locks when they call in to it. If the protocol needs to lock data
247 * structures it should do so after acquiring all references to them
248 * first and then follow the following lock order:
249 *
250 * client > openowner > state > lo_state > lockowner > file.
251 *
252 * Internally we only allow a thread to hold one hash bucket lock at a
253 * time and the lock is higher in the lock order (must be acquired
254 * first) than the data structure that is on that hash list.
255 *
256 * If a new reference was acquired by the caller, that reference needs
257 * to be released after releasing all acquired locks with the
258 * corresponding rfs4_*_rele routine.
259 */
260
261 /*
262 * This code is some what prototypical for now. Its purpose currently is to
263 * implement the interfaces sufficiently to finish the higher protocol
264 * elements. This will be replaced by a dynamically resizeable tables
265 * backed by kmem_cache allocator. However synchronization is handled
266 * correctly (I hope) and will not change by much. The mutexes for
267 * the hash buckets that can be used to create new instances of data
268 * structures might be good candidates to evolve into reader writer
269 * locks. If it has to do a creation, it would be holding the
270 * mutex across a kmem_alloc with KM_SLEEP specified.
271 */
272
273 #ifdef DEBUG
274 #define TABSIZE 17
275 #else
276 #define TABSIZE 2047
277 #endif
278
279 #define ADDRHASH(key) ((unsigned long)(key) >> 3)
280
281 #define MAXTABSZ 1024*1024
282
283 /* The values below are rfs4_lease_time units */
284
285 #ifdef DEBUG
286 #define CLIENT_CACHE_TIME 1
287 #define OPENOWNER_CACHE_TIME 1
288 #define STATE_CACHE_TIME 1
289 #define LO_STATE_CACHE_TIME 1
290 #define LOCKOWNER_CACHE_TIME 1
291 #define FILE_CACHE_TIME 3
292 #define DELEG_STATE_CACHE_TIME 1
293 #else
294 #define CLIENT_CACHE_TIME 10
295 #define OPENOWNER_CACHE_TIME 5
296 #define STATE_CACHE_TIME 1
297 #define LO_STATE_CACHE_TIME 1
298 #define LOCKOWNER_CACHE_TIME 3
299 #define FILE_CACHE_TIME 40
300 #define DELEG_STATE_CACHE_TIME 1
301 #endif
302
303 /*
304 * NFSv4 server state databases
305 *
306 * Initilized when the module is loaded and used by NFSv4 state tables.
307 * These kmem_cache databases are global, the tables that make use of these
308 * are per zone.
309 */
310 kmem_cache_t *rfs4_client_mem_cache;
311 kmem_cache_t *rfs4_clntIP_mem_cache;
312 kmem_cache_t *rfs4_openown_mem_cache;
313 kmem_cache_t *rfs4_openstID_mem_cache;
314 kmem_cache_t *rfs4_lockstID_mem_cache;
315 kmem_cache_t *rfs4_lockown_mem_cache;
316 kmem_cache_t *rfs4_file_mem_cache;
317 kmem_cache_t *rfs4_delegstID_mem_cache;
318
319 /*
320 * NFSv4 state table functions
321 */
322 static bool_t rfs4_client_create(rfs4_entry_t, void *);
323 static void rfs4_dss_remove_cpleaf(rfs4_client_t *);
324 static void rfs4_dss_remove_leaf(rfs4_servinst_t *, char *, char *);
325 static void rfs4_client_destroy(rfs4_entry_t);
326 static bool_t rfs4_client_expiry(rfs4_entry_t);
327 static uint32_t clientid_hash(void *);
328 static bool_t clientid_compare(rfs4_entry_t, void *);
329 static void *clientid_mkkey(rfs4_entry_t);
330 static uint32_t nfsclnt_hash(void *);
331 static bool_t nfsclnt_compare(rfs4_entry_t, void *);
332 static void *nfsclnt_mkkey(rfs4_entry_t);
333 static bool_t rfs4_clntip_expiry(rfs4_entry_t);
334 static void rfs4_clntip_destroy(rfs4_entry_t);
335 static bool_t rfs4_clntip_create(rfs4_entry_t, void *);
336 static uint32_t clntip_hash(void *);
337 static bool_t clntip_compare(rfs4_entry_t, void *);
338 static void *clntip_mkkey(rfs4_entry_t);
339 static bool_t rfs4_openowner_create(rfs4_entry_t, void *);
340 static void rfs4_openowner_destroy(rfs4_entry_t);
341 static bool_t rfs4_openowner_expiry(rfs4_entry_t);
342 static uint32_t openowner_hash(void *);
343 static bool_t openowner_compare(rfs4_entry_t, void *);
344 static void *openowner_mkkey(rfs4_entry_t);
345 static bool_t rfs4_state_create(rfs4_entry_t, void *);
346 static void rfs4_state_destroy(rfs4_entry_t);
347 static bool_t rfs4_state_expiry(rfs4_entry_t);
348 static uint32_t state_hash(void *);
349 static bool_t state_compare(rfs4_entry_t, void *);
350 static void *state_mkkey(rfs4_entry_t);
351 static uint32_t state_owner_file_hash(void *);
352 static bool_t state_owner_file_compare(rfs4_entry_t, void *);
353 static void *state_owner_file_mkkey(rfs4_entry_t);
354 static uint32_t state_file_hash(void *);
355 static bool_t state_file_compare(rfs4_entry_t, void *);
356 static void *state_file_mkkey(rfs4_entry_t);
357 static bool_t rfs4_lo_state_create(rfs4_entry_t, void *);
358 static void rfs4_lo_state_destroy(rfs4_entry_t);
359 static bool_t rfs4_lo_state_expiry(rfs4_entry_t);
360 static uint32_t lo_state_hash(void *);
361 static bool_t lo_state_compare(rfs4_entry_t, void *);
362 static void *lo_state_mkkey(rfs4_entry_t);
363 static uint32_t lo_state_lo_hash(void *);
364 static bool_t lo_state_lo_compare(rfs4_entry_t, void *);
365 static void *lo_state_lo_mkkey(rfs4_entry_t);
366 static bool_t rfs4_lockowner_create(rfs4_entry_t, void *);
367 static void rfs4_lockowner_destroy(rfs4_entry_t);
368 static bool_t rfs4_lockowner_expiry(rfs4_entry_t);
369 static uint32_t lockowner_hash(void *);
370 static bool_t lockowner_compare(rfs4_entry_t, void *);
371 static void *lockowner_mkkey(rfs4_entry_t);
372 static uint32_t pid_hash(void *);
373 static bool_t pid_compare(rfs4_entry_t, void *);
374 static void *pid_mkkey(rfs4_entry_t);
375 static bool_t rfs4_file_create(rfs4_entry_t, void *);
376 static void rfs4_file_destroy(rfs4_entry_t);
377 static uint32_t file_hash(void *);
378 static bool_t file_compare(rfs4_entry_t, void *);
379 static void *file_mkkey(rfs4_entry_t);
380 static bool_t rfs4_deleg_state_create(rfs4_entry_t, void *);
381 static void rfs4_deleg_state_destroy(rfs4_entry_t);
382 static bool_t rfs4_deleg_state_expiry(rfs4_entry_t);
383 static uint32_t deleg_hash(void *);
384 static bool_t deleg_compare(rfs4_entry_t, void *);
385 static void *deleg_mkkey(rfs4_entry_t);
386 static uint32_t deleg_state_hash(void *);
387 static bool_t deleg_state_compare(rfs4_entry_t, void *);
388 static void *deleg_state_mkkey(rfs4_entry_t);
389
390 static void rfs4_state_rele_nounlock(rfs4_state_t *);
391
392 static int rfs4_ss_enabled = 0;
393
394 extern void (*rfs4_client_clrst)(struct nfs4clrst_args *);
395
396 void
397 rfs4_ss_pnfree(rfs4_ss_pn_t *ss_pn)
398 {
399 kmem_free(ss_pn, sizeof (rfs4_ss_pn_t));
400 }
401
402 static rfs4_ss_pn_t *
403 rfs4_ss_pnalloc(char *dir, char *leaf)
404 {
405 rfs4_ss_pn_t *ss_pn;
406 int dir_len, leaf_len;
407
408 /*
409 * validate we have a resonable path
410 * (account for the '/' and trailing null)
411 */
412 if ((dir_len = strlen(dir)) > MAXPATHLEN ||
413 (leaf_len = strlen(leaf)) > MAXNAMELEN ||
414 (dir_len + leaf_len + 2) > MAXPATHLEN) {
415 return (NULL);
416 }
417
418 ss_pn = kmem_alloc(sizeof (rfs4_ss_pn_t), KM_SLEEP);
419
420 (void) snprintf(ss_pn->pn, MAXPATHLEN, "%s/%s", dir, leaf);
421 /* Handy pointer to just the leaf name */
422 ss_pn->leaf = ss_pn->pn + dir_len + 1;
423 return (ss_pn);
424 }
425
426
427 /*
428 * Move the "leaf" filename from "sdir" directory
429 * to the "ddir" directory. Return the pathname of
430 * the destination unless the rename fails in which
431 * case we need to return the source pathname.
432 */
433 static rfs4_ss_pn_t *
434 rfs4_ss_movestate(char *sdir, char *ddir, char *leaf)
435 {
436 rfs4_ss_pn_t *src, *dst;
437
438 if ((src = rfs4_ss_pnalloc(sdir, leaf)) == NULL)
439 return (NULL);
440
441 if ((dst = rfs4_ss_pnalloc(ddir, leaf)) == NULL) {
442 rfs4_ss_pnfree(src);
443 return (NULL);
444 }
445
446 /*
447 * If the rename fails we shall return the src
448 * pathname and free the dst. Otherwise we need
449 * to free the src and return the dst pathanme.
450 */
451 if (vn_rename(src->pn, dst->pn, UIO_SYSSPACE)) {
452 rfs4_ss_pnfree(dst);
453 return (src);
454 }
455 rfs4_ss_pnfree(src);
456 return (dst);
457 }
458
459
460 static rfs4_oldstate_t *
461 rfs4_ss_getstate(vnode_t *dvp, rfs4_ss_pn_t *ss_pn)
462 {
463 struct uio uio;
464 struct iovec iov[3];
465
466 rfs4_oldstate_t *cl_ss = NULL;
467 vnode_t *vp;
468 vattr_t va;
469 uint_t id_len;
470 int err, kill_file, file_vers;
471
472 if (ss_pn == NULL)
473 return (NULL);
474
475 /*
476 * open the state file.
477 */
478 if (vn_open(ss_pn->pn, UIO_SYSSPACE, FREAD, 0, &vp, 0, 0) != 0) {
479 return (NULL);
480 }
481
482 if (vp->v_type != VREG) {
483 (void) VOP_CLOSE(vp, FREAD, 1, (offset_t)0, CRED(), NULL);
484 VN_RELE(vp);
485 return (NULL);
486 }
487
488 err = VOP_ACCESS(vp, VREAD, 0, CRED(), NULL);
489 if (err) {
490 /*
491 * We don't have read access? better get the heck out.
492 */
493 (void) VOP_CLOSE(vp, FREAD, 1, (offset_t)0, CRED(), NULL);
494 VN_RELE(vp);
495 return (NULL);
496 }
497
498 (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL);
499 /*
500 * get the file size to do some basic validation
501 */
502 va.va_mask = AT_SIZE;
503 err = VOP_GETATTR(vp, &va, 0, CRED(), NULL);
504
505 kill_file = (va.va_size == 0 || va.va_size <
506 (NFS4_VERIFIER_SIZE + sizeof (uint_t)+1));
507
508 if (err || kill_file) {
509 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
510 (void) VOP_CLOSE(vp, FREAD, 1, (offset_t)0, CRED(), NULL);
511 VN_RELE(vp);
512 if (kill_file) {
513 (void) VOP_REMOVE(dvp, ss_pn->leaf, CRED(), NULL, 0);
514 }
515 return (NULL);
516 }
517
518 cl_ss = kmem_alloc(sizeof (rfs4_oldstate_t), KM_SLEEP);
519
520 /*
521 * build iovecs to read in the file_version, verifier and id_len
522 */
523 iov[0].iov_base = (caddr_t)&file_vers;
524 iov[0].iov_len = sizeof (int);
525 iov[1].iov_base = (caddr_t)&cl_ss->cl_id4.verifier;
526 iov[1].iov_len = NFS4_VERIFIER_SIZE;
527 iov[2].iov_base = (caddr_t)&id_len;
528 iov[2].iov_len = sizeof (uint_t);
529
530 uio.uio_iov = iov;
531 uio.uio_iovcnt = 3;
532 uio.uio_segflg = UIO_SYSSPACE;
533 uio.uio_loffset = 0;
534 uio.uio_resid = sizeof (int) + NFS4_VERIFIER_SIZE + sizeof (uint_t);
535
536 if (err = VOP_READ(vp, &uio, FREAD, CRED(), NULL)) {
537 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
538 (void) VOP_CLOSE(vp, FREAD, 1, (offset_t)0, CRED(), NULL);
539 VN_RELE(vp);
540 kmem_free(cl_ss, sizeof (rfs4_oldstate_t));
541 return (NULL);
542 }
543
544 /*
545 * if the file_version doesn't match or if the
546 * id_len is zero or the combination of the verifier,
547 * id_len and id_val is bigger than the file we have
548 * a problem. If so ditch the file.
549 */
550 kill_file = (file_vers != NFS4_SS_VERSION || id_len == 0 ||
551 (id_len + NFS4_VERIFIER_SIZE + sizeof (uint_t)) > va.va_size);
552
553 if (err || kill_file) {
554 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
555 (void) VOP_CLOSE(vp, FREAD, 1, (offset_t)0, CRED(), NULL);
556 VN_RELE(vp);
557 kmem_free(cl_ss, sizeof (rfs4_oldstate_t));
558 if (kill_file) {
559 (void) VOP_REMOVE(dvp, ss_pn->leaf, CRED(), NULL, 0);
560 }
561 return (NULL);
562 }
563
564 /*
565 * now get the client id value
566 */
567 cl_ss->cl_id4.id_val = kmem_alloc(id_len, KM_SLEEP);
568 iov[0].iov_base = cl_ss->cl_id4.id_val;
569 iov[0].iov_len = id_len;
570
571 uio.uio_iov = iov;
572 uio.uio_iovcnt = 1;
573 uio.uio_segflg = UIO_SYSSPACE;
574 uio.uio_resid = cl_ss->cl_id4.id_len = id_len;
575
576 if (err = VOP_READ(vp, &uio, FREAD, CRED(), NULL)) {
577 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
578 (void) VOP_CLOSE(vp, FREAD, 1, (offset_t)0, CRED(), NULL);
579 VN_RELE(vp);
580 kmem_free(cl_ss->cl_id4.id_val, id_len);
581 kmem_free(cl_ss, sizeof (rfs4_oldstate_t));
582 return (NULL);
583 }
584
585 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
586 (void) VOP_CLOSE(vp, FREAD, 1, (offset_t)0, CRED(), NULL);
587 VN_RELE(vp);
588 return (cl_ss);
589 }
590
591 #ifdef nextdp
592 #undef nextdp
593 #endif
594 #define nextdp(dp) ((struct dirent64 *)((char *)(dp) + (dp)->d_reclen))
595
596 /*
597 * Add entries from statedir to supplied oldstate list.
598 * Optionally, move all entries from statedir -> destdir.
599 */
600 void
601 rfs4_ss_oldstate(rfs4_oldstate_t *oldstate, char *statedir, char *destdir)
602 {
603 rfs4_ss_pn_t *ss_pn;
604 rfs4_oldstate_t *cl_ss = NULL;
605 char *dirt = NULL;
606 int err, dir_eof = 0, size = 0;
607 vnode_t *dvp;
608 struct iovec iov;
609 struct uio uio;
610 struct dirent64 *dep;
611 offset_t dirchunk_offset = 0;
612
613 /*
614 * open the state directory
615 */
616 if (vn_open(statedir, UIO_SYSSPACE, FREAD, 0, &dvp, 0, 0))
617 return;
618
619 if (dvp->v_type != VDIR || VOP_ACCESS(dvp, VREAD, 0, CRED(), NULL))
620 goto out;
621
622 dirt = kmem_alloc(RFS4_SS_DIRSIZE, KM_SLEEP);
623
624 /*
625 * Get and process the directory entries
626 */
627 while (!dir_eof) {
628 (void) VOP_RWLOCK(dvp, V_WRITELOCK_FALSE, NULL);
629 iov.iov_base = dirt;
630 iov.iov_len = RFS4_SS_DIRSIZE;
631 uio.uio_iov = &iov;
632 uio.uio_iovcnt = 1;
633 uio.uio_segflg = UIO_SYSSPACE;
634 uio.uio_loffset = dirchunk_offset;
635 uio.uio_resid = RFS4_SS_DIRSIZE;
636
637 err = VOP_READDIR(dvp, &uio, CRED(), &dir_eof, NULL, 0);
638 VOP_RWUNLOCK(dvp, V_WRITELOCK_FALSE, NULL);
639 if (err)
640 goto out;
641
642 size = RFS4_SS_DIRSIZE - uio.uio_resid;
643
644 /*
645 * Process all the directory entries in this
646 * readdir chunk
647 */
648 for (dep = (struct dirent64 *)dirt; size > 0;
649 dep = nextdp(dep)) {
650
651 size -= dep->d_reclen;
652 dirchunk_offset = dep->d_off;
653
654 /*
655 * Skip '.' and '..'
656 */
657 if (NFS_IS_DOTNAME(dep->d_name))
658 continue;
659
660 ss_pn = rfs4_ss_pnalloc(statedir, dep->d_name);
661 if (ss_pn == NULL)
662 continue;
663
664 if (cl_ss = rfs4_ss_getstate(dvp, ss_pn)) {
665 if (destdir != NULL) {
666 rfs4_ss_pnfree(ss_pn);
667 cl_ss->ss_pn = rfs4_ss_movestate(
668 statedir, destdir, dep->d_name);
669 } else {
670 cl_ss->ss_pn = ss_pn;
671 }
672 insque(cl_ss, oldstate);
673 } else {
674 rfs4_ss_pnfree(ss_pn);
675 }
676 }
677 }
678
679 out:
680 (void) VOP_CLOSE(dvp, FREAD, 1, (offset_t)0, CRED(), NULL);
681 VN_RELE(dvp);
682 if (dirt)
683 kmem_free((caddr_t)dirt, RFS4_SS_DIRSIZE);
684 }
685
686 static void
687 rfs4_ss_init(nfs4_srv_t *nsrv4)
688 {
689 int npaths = 1;
690 char *default_dss_path = NFS4_DSS_VAR_DIR;
691
692 /* read the default stable storage state */
693 rfs4_dss_readstate(nsrv4, npaths, &default_dss_path);
694
695 rfs4_ss_enabled = 1;
696 }
697
698 static void
699 rfs4_ss_fini(nfs4_srv_t *nsrv4)
700 {
701 rfs4_servinst_t *sip;
702
703 mutex_enter(&nsrv4->servinst_lock);
704 sip = nsrv4->nfs4_cur_servinst;
705 while (sip != NULL) {
706 rfs4_dss_clear_oldstate(sip);
707 sip = sip->next;
708 }
709 mutex_exit(&nsrv4->servinst_lock);
710 }
711
712 /*
713 * Remove all oldstate files referenced by this servinst.
714 */
715 static void
716 rfs4_dss_clear_oldstate(rfs4_servinst_t *sip)
717 {
718 rfs4_oldstate_t *os_head, *osp;
719
720 rw_enter(&sip->oldstate_lock, RW_WRITER);
721 os_head = sip->oldstate;
722
723 if (os_head == NULL) {
724 rw_exit(&sip->oldstate_lock);
725 return;
726 }
727
728 /* skip dummy entry */
729 osp = os_head->next;
730 while (osp != os_head) {
731 char *leaf = osp->ss_pn->leaf;
732 rfs4_oldstate_t *os_next;
733
734 rfs4_dss_remove_leaf(sip, NFS4_DSS_OLDSTATE_LEAF, leaf);
735
736 if (osp->cl_id4.id_val)
737 kmem_free(osp->cl_id4.id_val, osp->cl_id4.id_len);
738 rfs4_ss_pnfree(osp->ss_pn);
739
740 os_next = osp->next;
741 remque(osp);
742 kmem_free(osp, sizeof (rfs4_oldstate_t));
743 osp = os_next;
744 }
745
746 rw_exit(&sip->oldstate_lock);
747 }
748
749 /*
750 * Form the state and oldstate paths, and read in the stable storage files.
751 */
752 void
753 rfs4_dss_readstate(nfs4_srv_t *nsrv4, int npaths, char **paths)
754 {
755 int i;
756 char *state, *oldstate;
757
758 state = kmem_alloc(MAXPATHLEN, KM_SLEEP);
759 oldstate = kmem_alloc(MAXPATHLEN, KM_SLEEP);
760
761 for (i = 0; i < npaths; i++) {
762 char *path = paths[i];
763
764 (void) sprintf(state, "%s/%s", path, NFS4_DSS_STATE_LEAF);
765 (void) sprintf(oldstate, "%s/%s", path, NFS4_DSS_OLDSTATE_LEAF);
766
767 /*
768 * Populate the current server instance's oldstate list.
769 *
770 * 1. Read stable storage data from old state directory,
771 * leaving its contents alone.
772 *
773 * 2. Read stable storage data from state directory,
774 * and move the latter's contents to old state
775 * directory.
776 */
777 /* CSTYLED */
778 rfs4_ss_oldstate(nsrv4->nfs4_cur_servinst->oldstate, oldstate, NULL);
779 /* CSTYLED */
780 rfs4_ss_oldstate(nsrv4->nfs4_cur_servinst->oldstate, state, oldstate);
781 }
782
783 kmem_free(state, MAXPATHLEN);
784 kmem_free(oldstate, MAXPATHLEN);
785 }
786
787
788 /*
789 * Check if we are still in grace and if the client can be
790 * granted permission to perform reclaims.
791 */
792 void
793 rfs4_ss_chkclid(nfs4_srv_t *nsrv4, rfs4_client_t *cp)
794 {
795 rfs4_servinst_t *sip;
796
797 /*
798 * It should be sufficient to check the oldstate data for just
799 * this client's instance. However, since our per-instance
800 * client grouping is solely temporal, HA-NFSv4 RG failover
801 * might result in clients of the same RG being partitioned into
802 * separate instances.
803 *
804 * Until the client grouping is improved, we must check the
805 * oldstate data for all instances with an active grace period.
806 *
807 * This also serves as the mechanism to remove stale oldstate data.
808 * The first time we check an instance after its grace period has
809 * expired, the oldstate data should be cleared.
810 *
811 * Start at the current instance, and walk the list backwards
812 * to the first.
813 */
814 mutex_enter(&nsrv4->servinst_lock);
815 for (sip = nsrv4->nfs4_cur_servinst; sip != NULL; sip = sip->prev) {
816 rfs4_ss_chkclid_sip(cp, sip);
817
818 /* if the above check found this client, we're done */
819 if (cp->rc_can_reclaim)
820 break;
821 }
822 mutex_exit(&nsrv4->servinst_lock);
823 }
824
825 static void
826 rfs4_ss_chkclid_sip(rfs4_client_t *cp, rfs4_servinst_t *sip)
827 {
828 rfs4_oldstate_t *osp, *os_head;
829
830 /* short circuit everything if this server instance has no oldstate */
831 rw_enter(&sip->oldstate_lock, RW_READER);
832 os_head = sip->oldstate;
833 rw_exit(&sip->oldstate_lock);
834 if (os_head == NULL)
835 return;
836
837 /*
838 * If this server instance is no longer in a grace period then
839 * the client won't be able to reclaim. No further need for this
840 * instance's oldstate data, so it can be cleared.
841 */
842 if (!rfs4_servinst_in_grace(sip))
843 return;
844
845 /* this instance is still in grace; search for the clientid */
846
847 rw_enter(&sip->oldstate_lock, RW_READER);
848
849 os_head = sip->oldstate;
850 /* skip dummy entry */
851 osp = os_head->next;
852 while (osp != os_head) {
853 if (osp->cl_id4.id_len == cp->rc_nfs_client.id_len) {
854 if (bcmp(osp->cl_id4.id_val, cp->rc_nfs_client.id_val,
855 osp->cl_id4.id_len) == 0) {
856 cp->rc_can_reclaim = 1;
857 break;
858 }
859 }
860 osp = osp->next;
861 }
862
863 rw_exit(&sip->oldstate_lock);
864 }
865
866 /*
867 * Place client information into stable storage: 1/3.
868 * First, generate the leaf filename, from the client's IP address and
869 * the server-generated short-hand clientid.
870 */
871 void
872 rfs4_ss_clid(nfs4_srv_t *nsrv4, rfs4_client_t *cp)
873 {
874 const char *kinet_ntop6(uchar_t *, char *, size_t);
875 char leaf[MAXNAMELEN], buf[INET6_ADDRSTRLEN];
876 struct sockaddr *ca;
877 uchar_t *b;
878
879 if (rfs4_ss_enabled == 0) {
880 return;
881 }
882
883 buf[0] = 0;
884
885 ca = (struct sockaddr *)&cp->rc_addr;
886
887 /*
888 * Convert the caller's IP address to a dotted string
889 */
890 if (ca->sa_family == AF_INET) {
891 b = (uchar_t *)&((struct sockaddr_in *)ca)->sin_addr;
892 (void) sprintf(buf, "%03d.%03d.%03d.%03d", b[0] & 0xFF,
893 b[1] & 0xFF, b[2] & 0xFF, b[3] & 0xFF);
894 } else if (ca->sa_family == AF_INET6) {
895 struct sockaddr_in6 *sin6;
896
897 sin6 = (struct sockaddr_in6 *)ca;
898 (void) kinet_ntop6((uchar_t *)&sin6->sin6_addr,
899 buf, INET6_ADDRSTRLEN);
900 }
901
902 (void) snprintf(leaf, MAXNAMELEN, "%s-%llx", buf,
903 (longlong_t)cp->rc_clientid);
904 rfs4_ss_clid_write(nsrv4, cp, leaf);
905 }
906
907 /*
908 * Place client information into stable storage: 2/3.
909 * DSS: distributed stable storage: the file may need to be written to
910 * multiple directories.
911 */
912 static void
913 rfs4_ss_clid_write(nfs4_srv_t *nsrv4, rfs4_client_t *cp, char *leaf)
914 {
915 rfs4_servinst_t *sip;
916
917 /*
918 * It should be sufficient to write the leaf file to (all) DSS paths
919 * associated with just this client's instance. However, since our
920 * per-instance client grouping is solely temporal, HA-NFSv4 RG
921 * failover might result in us losing DSS data.
922 *
923 * Until the client grouping is improved, we must write the DSS data
924 * to all instances' paths. Start at the current instance, and
925 * walk the list backwards to the first.
926 */
927 mutex_enter(&nsrv4->servinst_lock);
928 for (sip = nsrv4->nfs4_cur_servinst; sip != NULL; sip = sip->prev) {
929 int i, npaths = sip->dss_npaths;
930
931 /* write the leaf file to all DSS paths */
932 for (i = 0; i < npaths; i++) {
933 rfs4_dss_path_t *dss_path = sip->dss_paths[i];
934
935 /* HA-NFSv4 path might have been failed-away from us */
936 if (dss_path == NULL)
937 continue;
938
939 rfs4_ss_clid_write_one(cp, dss_path->path, leaf);
940 }
941 }
942 mutex_exit(&nsrv4->servinst_lock);
943 }
944
945 /*
946 * Place client information into stable storage: 3/3.
947 * Write the stable storage data to the requested file.
948 */
949 static void
950 rfs4_ss_clid_write_one(rfs4_client_t *cp, char *dss_path, char *leaf)
951 {
952 int ioflag;
953 int file_vers = NFS4_SS_VERSION;
954 size_t dirlen;
955 struct uio uio;
956 struct iovec iov[4];
957 char *dir;
958 rfs4_ss_pn_t *ss_pn;
959 vnode_t *vp;
960 nfs_client_id4 *cl_id4 = &(cp->rc_nfs_client);
961
962 /* allow 2 extra bytes for '/' & NUL */
963 dirlen = strlen(dss_path) + strlen(NFS4_DSS_STATE_LEAF) + 2;
964 dir = kmem_alloc(dirlen, KM_SLEEP);
965 (void) sprintf(dir, "%s/%s", dss_path, NFS4_DSS_STATE_LEAF);
966
967 ss_pn = rfs4_ss_pnalloc(dir, leaf);
968 /* rfs4_ss_pnalloc takes its own copy */
969 kmem_free(dir, dirlen);
970 if (ss_pn == NULL)
971 return;
972
973 if (vn_open(ss_pn->pn, UIO_SYSSPACE, FCREAT|FWRITE, 0600, &vp,
974 CRCREAT, 0)) {
975 rfs4_ss_pnfree(ss_pn);
976 return;
977 }
978
979 /*
980 * We need to record leaf - i.e. the filename - so that we know
981 * what to remove, in the future. However, the dir part of cp->ss_pn
982 * should never be referenced directly, since it's potentially only
983 * one of several paths with this leaf in it.
984 */
985 if (cp->rc_ss_pn != NULL) {
986 if (strcmp(cp->rc_ss_pn->leaf, leaf) == 0) {
987 /* we've already recorded *this* leaf */
988 rfs4_ss_pnfree(ss_pn);
989 } else {
990 /* replace with this leaf */
991 rfs4_ss_pnfree(cp->rc_ss_pn);
992 cp->rc_ss_pn = ss_pn;
993 }
994 } else {
995 cp->rc_ss_pn = ss_pn;
996 }
997
998 /*
999 * Build a scatter list that points to the nfs_client_id4
1000 */
1001 iov[0].iov_base = (caddr_t)&file_vers;
1002 iov[0].iov_len = sizeof (int);
1003 iov[1].iov_base = (caddr_t)&(cl_id4->verifier);
1004 iov[1].iov_len = NFS4_VERIFIER_SIZE;
1005 iov[2].iov_base = (caddr_t)&(cl_id4->id_len);
1006 iov[2].iov_len = sizeof (uint_t);
1007 iov[3].iov_base = (caddr_t)cl_id4->id_val;
1008 iov[3].iov_len = cl_id4->id_len;
1009
1010 uio.uio_iov = iov;
1011 uio.uio_iovcnt = 4;
1012 uio.uio_loffset = 0;
1013 uio.uio_segflg = UIO_SYSSPACE;
1014 uio.uio_llimit = (rlim64_t)MAXOFFSET_T;
1015 uio.uio_resid = cl_id4->id_len + sizeof (int) +
1016 NFS4_VERIFIER_SIZE + sizeof (uint_t);
1017
1018 ioflag = uio.uio_fmode = (FWRITE|FSYNC);
1019 uio.uio_extflg = UIO_COPY_DEFAULT;
1020
1021 (void) VOP_RWLOCK(vp, V_WRITELOCK_TRUE, NULL);
1022 /* write the full client id to the file. */
1023 (void) VOP_WRITE(vp, &uio, ioflag, CRED(), NULL);
1024 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, NULL);
1025
1026 (void) VOP_CLOSE(vp, FWRITE, 1, (offset_t)0, CRED(), NULL);
1027 VN_RELE(vp);
1028 }
1029
1030 /*
1031 * DSS: distributed stable storage.
1032 * Unpack the list of paths passed by nfsd.
1033 * Use nvlist_alloc(9F) to manage the data.
1034 * The caller is responsible for allocating and freeing the buffer.
1035 */
1036 int
1037 rfs4_dss_setpaths(char *buf, size_t buflen)
1038 {
1039 int error;
1040
1041 /*
1042 * If this is a "warm start", i.e. we previously had DSS paths,
1043 * preserve the old paths.
1044 */
1045 if (rfs4_dss_paths != NULL) {
1046 /*
1047 * Before we lose the ptr, destroy the nvlist and pathnames
1048 * array from the warm start before this one.
1049 */
1050 nvlist_free(rfs4_dss_oldpaths);
1051 rfs4_dss_oldpaths = rfs4_dss_paths;
1052 }
1053
1054 /* unpack the buffer into a searchable nvlist */
1055 error = nvlist_unpack(buf, buflen, &rfs4_dss_paths, KM_SLEEP);
1056 if (error)
1057 return (error);
1058
1059 /*
1060 * Search the nvlist for the pathnames nvpair (which is the only nvpair
1061 * in the list, and record its location.
1062 */
1063 error = nvlist_lookup_string_array(rfs4_dss_paths, NFS4_DSS_NVPAIR_NAME,
1064 &rfs4_dss_newpaths, &rfs4_dss_numnewpaths);
1065 return (error);
1066 }
1067
1068 /*
1069 * Ultimately the nfssys() call NFS4_CLR_STATE endsup here
1070 * to find and mark the client for forced expire.
1071 */
1072 static void
1073 rfs4_client_scrub(rfs4_entry_t ent, void *arg)
1074 {
1075 rfs4_client_t *cp = (rfs4_client_t *)ent;
1076 struct nfs4clrst_args *clr = arg;
1077 struct sockaddr_in6 *ent_sin6;
1078 struct in6_addr clr_in6;
1079 struct sockaddr_in *ent_sin;
1080 struct in_addr clr_in;
1081
1082 if (clr->addr_type != cp->rc_addr.ss_family) {
1083 return;
1084 }
1085
1086 switch (clr->addr_type) {
1087
1088 case AF_INET6:
1089 /* copyin the address from user space */
1090 if (copyin(clr->ap, &clr_in6, sizeof (clr_in6))) {
1091 break;
1092 }
1093
1094 ent_sin6 = (struct sockaddr_in6 *)&cp->rc_addr;
1095
1096 /*
1097 * now compare, and if equivalent mark entry
1098 * for forced expiration
1099 */
1100 if (IN6_ARE_ADDR_EQUAL(&ent_sin6->sin6_addr, &clr_in6)) {
1101 cp->rc_forced_expire = 1;
1102 }
1103 break;
1104
1105 case AF_INET:
1106 /* copyin the address from user space */
1107 if (copyin(clr->ap, &clr_in, sizeof (clr_in))) {
1108 break;
1109 }
1110
1111 ent_sin = (struct sockaddr_in *)&cp->rc_addr;
1112
1113 /*
1114 * now compare, and if equivalent mark entry
1115 * for forced expiration
1116 */
1117 if (ent_sin->sin_addr.s_addr == clr_in.s_addr) {
1118 cp->rc_forced_expire = 1;
1119 }
1120 break;
1121
1122 default:
1123 /* force this assert to fail */
1124 ASSERT(clr->addr_type != clr->addr_type);
1125 }
1126 }
1127
1128 /*
1129 * This is called from nfssys() in order to clear server state
1130 * for the specified client IP Address.
1131 */
1132 void
1133 rfs4_clear_client_state(struct nfs4clrst_args *clr)
1134 {
1135 nfs4_srv_t *nsrv4;
1136 nsrv4 = nfs4_get_srv();
1137 (void) rfs4_dbe_walk(nsrv4->rfs4_client_tab, rfs4_client_scrub, clr);
1138 }
1139
1140 /*
1141 * Used to initialize the NFSv4 server's state or database. All of
1142 * the tables are created and timers are set.
1143 */
1144 void
1145 rfs4_state_g_init()
1146 {
1147 extern boolean_t rfs4_cpr_callb(void *, int);
1148 /*
1149 * Add a CPR callback so that we can update client
1150 * access times to extend the lease after a suspend
1151 * and resume (using the same class as rpcmod/connmgr)
1152 */
1153 cpr_id = callb_add(rfs4_cpr_callb, 0, CB_CL_CPR_RPC, "rfs4");
1154
1155 /*
1156 * NFSv4 server state databases
1157 *
1158 * Initialized when the module is loaded and used by NFSv4 state
1159 * tables. These kmem_cache free pools are used globally, the NFSv4
1160 * state tables which make use of these kmem_cache free pools are per
1161 * zone.
1162 *
1163 * initialize the global kmem_cache free pools which will be used by
1164 * the NFSv4 state tables.
1165 */
1166 /* CSTYLED */
1167 rfs4_client_mem_cache = nfs4_init_mem_cache("Client_entry_cache", 2, sizeof (rfs4_client_t), 0);
1168 /* CSTYLED */
1169 rfs4_clntIP_mem_cache = nfs4_init_mem_cache("ClntIP_entry_cache", 1, sizeof (rfs4_clntip_t), 1);
1170 /* CSTYLED */
1171 rfs4_openown_mem_cache = nfs4_init_mem_cache("OpenOwner_entry_cache", 1, sizeof (rfs4_openowner_t), 2);
1172 /* CSTYLED */
1173 rfs4_openstID_mem_cache = nfs4_init_mem_cache("OpenStateID_entry_cache", 3, sizeof (rfs4_state_t), 3);
1174 /* CSTYLED */
1175 rfs4_lockstID_mem_cache = nfs4_init_mem_cache("LockStateID_entry_cache", 3, sizeof (rfs4_lo_state_t), 4);
1176 /* CSTYLED */
1177 rfs4_lockown_mem_cache = nfs4_init_mem_cache("Lockowner_entry_cache", 2, sizeof (rfs4_lockowner_t), 5);
1178 /* CSTYLED */
1179 rfs4_file_mem_cache = nfs4_init_mem_cache("File_entry_cache", 1, sizeof (rfs4_file_t), 6);
1180 /* CSTYLED */
1181 rfs4_delegstID_mem_cache = nfs4_init_mem_cache("DelegStateID_entry_cache", 2, sizeof (rfs4_deleg_state_t), 7);
1182
1183 rfs4_client_clrst = rfs4_clear_client_state;
1184 }
1185
1186
1187 /*
1188 * Used at server shutdown to cleanup all of the NFSv4 server's structures
1189 * and other state.
1190 */
1191 void
1192 rfs4_state_g_fini()
1193 {
1194 int i;
1195 /*
1196 * Cleanup the CPR callback.
1197 */
1198 if (cpr_id)
1199 (void) callb_delete(cpr_id);
1200
1201 rfs4_client_clrst = NULL;
1202
1203 /* free the NFSv4 state databases */
1204 for (i = 0; i < RFS4_DB_MEM_CACHE_NUM; i++) {
1205 kmem_cache_destroy(rfs4_db_mem_cache_table[i].r_db_mem_cache);
1206 rfs4_db_mem_cache_table[i].r_db_mem_cache = NULL;
1207 }
1208
1209 rfs4_client_mem_cache = NULL;
1210 rfs4_clntIP_mem_cache = NULL;
1211 rfs4_openown_mem_cache = NULL;
1212 rfs4_openstID_mem_cache = NULL;
1213 rfs4_lockstID_mem_cache = NULL;
1214 rfs4_lockown_mem_cache = NULL;
1215 rfs4_file_mem_cache = NULL;
1216 rfs4_delegstID_mem_cache = NULL;
1217
1218 /* DSS: distributed stable storage */
1219 nvlist_free(rfs4_dss_oldpaths);
1220 nvlist_free(rfs4_dss_paths);
1221 rfs4_dss_paths = rfs4_dss_oldpaths = NULL;
1222 }
1223
1224 /*
1225 * Used to initialize the per zone NFSv4 server's state
1226 */
1227 void
1228 rfs4_state_zone_init(nfs4_srv_t *nsrv4)
1229 {
1230 time_t start_time;
1231 int start_grace;
1232 char *dss_path = NFS4_DSS_VAR_DIR;
1233
1234 /* DSS: distributed stable storage: initialise served paths list */
1235 nsrv4->dss_pathlist = NULL;
1236
1237 /*
1238 * Set the boot time. If the server
1239 * has been restarted quickly and has had the opportunity to
1240 * service clients, then the start_time needs to be bumped
1241 * regardless. A small window but it exists...
1242 */
1243 start_time = gethrestime_sec();
1244 if (nsrv4->rfs4_start_time < start_time)
1245 nsrv4->rfs4_start_time = start_time;
1246 else
1247 nsrv4->rfs4_start_time++;
1248
1249 /*
1250 * Create the first server instance, or a new one if the server has
1251 * been restarted; see above comments on rfs4_start_time. Don't
1252 * start its grace period; that will be done later, to maximise the
1253 * clients' recovery window.
1254 */
1255 start_grace = 0;
1256 if (curzone == global_zone && rfs4_dss_numnewpaths > 0) {
1257 int i;
1258 char **dss_allpaths = NULL;
1259 dss_allpaths = kmem_alloc(sizeof (char *) * (rfs4_dss_numnewpaths + 1), KM_SLEEP);
1260 /*
1261 * Add the default path into the list of paths for saving
1262 * state informantion.
1263 */
1264 dss_allpaths[0] = dss_path;
1265 for ( i = 0; i < rfs4_dss_numnewpaths; i++) {
1266 dss_allpaths[i + 1] = rfs4_dss_newpaths[i];
1267 }
1268 rfs4_servinst_create(nsrv4, start_grace, (rfs4_dss_numnewpaths + 1), dss_allpaths);
1269 kmem_free(dss_allpaths, (sizeof (char *) * (rfs4_dss_numnewpaths + 1)));
1270 } else {
1271 rfs4_servinst_create(nsrv4, start_grace, 1, &dss_path);
1272 }
1273
1274 /* reset the "first NFSv4 request" status */
1275 nsrv4->seen_first_compound = 0;
1276
1277 mutex_enter(&nsrv4->state_lock);
1278
1279 /*
1280 * If the server state database has already been initialized,
1281 * skip it
1282 */
1283 if (nsrv4->nfs4_server_state != NULL) {
1284 mutex_exit(&nsrv4->state_lock);
1285 return;
1286 }
1287
1288 rw_init(&nsrv4->rfs4_findclient_lock, NULL, RW_DEFAULT, NULL);
1289
1290 /* set the various cache timers for table creation */
1291 if (nsrv4->rfs4_client_cache_time == 0)
1292 nsrv4->rfs4_client_cache_time = CLIENT_CACHE_TIME;
1293 if (nsrv4->rfs4_openowner_cache_time == 0)
1294 nsrv4->rfs4_openowner_cache_time = OPENOWNER_CACHE_TIME;
1295 if (nsrv4->rfs4_state_cache_time == 0)
1296 nsrv4->rfs4_state_cache_time = STATE_CACHE_TIME;
1297 if (nsrv4->rfs4_lo_state_cache_time == 0)
1298 nsrv4->rfs4_lo_state_cache_time = LO_STATE_CACHE_TIME;
1299 if (nsrv4->rfs4_lockowner_cache_time == 0)
1300 nsrv4->rfs4_lockowner_cache_time = LOCKOWNER_CACHE_TIME;
1301 if (nsrv4->rfs4_file_cache_time == 0)
1302 nsrv4->rfs4_file_cache_time = FILE_CACHE_TIME;
1303 if (nsrv4->rfs4_deleg_state_cache_time == 0)
1304 nsrv4->rfs4_deleg_state_cache_time = DELEG_STATE_CACHE_TIME;
1305
1306 /* Create the overall database to hold all server state */
1307 nsrv4->nfs4_server_state = rfs4_database_create(rfs4_database_debug);
1308
1309 /* Now create the individual tables */
1310 nsrv4->rfs4_client_cache_time *= rfs4_lease_time;
1311 nsrv4->rfs4_client_tab = rfs4_table_create(nsrv4->nfs4_server_state,
1312 "Client",
1313 nsrv4->rfs4_client_cache_time,
1314 2,
1315 rfs4_client_create,
1316 rfs4_client_destroy,
1317 rfs4_client_expiry,
1318 sizeof (rfs4_client_t),
1319 TABSIZE,
1320 MAXTABSZ/8, 100);
1321 nsrv4->rfs4_nfsclnt_idx = rfs4_index_create(nsrv4->rfs4_client_tab,
1322 "nfs_client_id4", nfsclnt_hash,
1323 nfsclnt_compare, nfsclnt_mkkey,
1324 TRUE);
1325 nsrv4->rfs4_clientid_idx = rfs4_index_create(nsrv4->rfs4_client_tab,
1326 "client_id", clientid_hash,
1327 clientid_compare, clientid_mkkey,
1328 FALSE);
1329
1330 nsrv4->rfs4_clntip_cache_time = 86400 * 365; /* about a year */
1331 nsrv4->rfs4_clntip_tab = rfs4_table_create(nsrv4->nfs4_server_state,
1332 "ClntIP",
1333 nsrv4->rfs4_clntip_cache_time,
1334 1,
1335 rfs4_clntip_create,
1336 rfs4_clntip_destroy,
1337 rfs4_clntip_expiry,
1338 sizeof (rfs4_clntip_t),
1339 TABSIZE,
1340 MAXTABSZ, 100);
1341 nsrv4->rfs4_clntip_idx = rfs4_index_create(nsrv4->rfs4_clntip_tab,
1342 "client_ip", clntip_hash,
1343 clntip_compare, clntip_mkkey,
1344 TRUE);
1345
1346 nsrv4->rfs4_openowner_cache_time *= rfs4_lease_time;
1347 nsrv4->rfs4_openowner_tab = rfs4_table_create(nsrv4->nfs4_server_state,
1348 "OpenOwner",
1349 nsrv4->rfs4_openowner_cache_time,
1350 1,
1351 rfs4_openowner_create,
1352 rfs4_openowner_destroy,
1353 rfs4_openowner_expiry,
1354 sizeof (rfs4_openowner_t),
1355 TABSIZE,
1356 MAXTABSZ, 100);
1357 nsrv4->rfs4_openowner_idx = rfs4_index_create(nsrv4->rfs4_openowner_tab,
1358 "open_owner4", openowner_hash,
1359 openowner_compare,
1360 openowner_mkkey, TRUE);
1361
1362 nsrv4->rfs4_state_cache_time *= rfs4_lease_time;
1363 nsrv4->rfs4_state_tab = rfs4_table_create(nsrv4->nfs4_server_state,
1364 "OpenStateID",
1365 nsrv4->rfs4_state_cache_time,
1366 3,
1367 rfs4_state_create,
1368 rfs4_state_destroy,
1369 rfs4_state_expiry,
1370 sizeof (rfs4_state_t),
1371 TABSIZE,
1372 MAXTABSZ, 100);
1373
1374 /* CSTYLED */
1375 nsrv4->rfs4_state_owner_file_idx = rfs4_index_create(nsrv4->rfs4_state_tab,
1376 "Openowner-File",
1377 state_owner_file_hash,
1378 state_owner_file_compare,
1379 state_owner_file_mkkey, TRUE);
1380
1381 nsrv4->rfs4_state_idx = rfs4_index_create(nsrv4->rfs4_state_tab,
1382 "State-id", state_hash,
1383 state_compare, state_mkkey, FALSE);
1384
1385 nsrv4->rfs4_state_file_idx = rfs4_index_create(nsrv4->rfs4_state_tab,
1386 "File", state_file_hash,
1387 state_file_compare, state_file_mkkey,
1388 FALSE);
1389
1390 nsrv4->rfs4_lo_state_cache_time *= rfs4_lease_time;
1391 nsrv4->rfs4_lo_state_tab = rfs4_table_create(nsrv4->nfs4_server_state,
1392 "LockStateID",
1393 nsrv4->rfs4_lo_state_cache_time,
1394 2,
1395 rfs4_lo_state_create,
1396 rfs4_lo_state_destroy,
1397 rfs4_lo_state_expiry,
1398 sizeof (rfs4_lo_state_t),
1399 TABSIZE,
1400 MAXTABSZ, 100);
1401
1402 /* CSTYLED */
1403 nsrv4->rfs4_lo_state_owner_idx = rfs4_index_create(nsrv4->rfs4_lo_state_tab,
1404 "lockownerxstate",
1405 lo_state_lo_hash,
1406 lo_state_lo_compare,
1407 lo_state_lo_mkkey, TRUE);
1408
1409 nsrv4->rfs4_lo_state_idx = rfs4_index_create(nsrv4->rfs4_lo_state_tab,
1410 "State-id",
1411 lo_state_hash, lo_state_compare,
1412 lo_state_mkkey, FALSE);
1413
1414 nsrv4->rfs4_lockowner_cache_time *= rfs4_lease_time;
1415
1416 nsrv4->rfs4_lockowner_tab = rfs4_table_create(nsrv4->nfs4_server_state,
1417 "Lockowner",
1418 nsrv4->rfs4_lockowner_cache_time,
1419 2,
1420 rfs4_lockowner_create,
1421 rfs4_lockowner_destroy,
1422 rfs4_lockowner_expiry,
1423 sizeof (rfs4_lockowner_t),
1424 TABSIZE,
1425 MAXTABSZ, 100);
1426
1427 nsrv4->rfs4_lockowner_idx = rfs4_index_create(nsrv4->rfs4_lockowner_tab,
1428 "lock_owner4", lockowner_hash,
1429 lockowner_compare,
1430 lockowner_mkkey, TRUE);
1431
1432 /* CSTYLED */
1433 nsrv4->rfs4_lockowner_pid_idx = rfs4_index_create(nsrv4->rfs4_lockowner_tab,
1434 "pid", pid_hash,
1435 pid_compare, pid_mkkey,
1436 FALSE);
1437
1438 nsrv4->rfs4_file_cache_time *= rfs4_lease_time;
1439 nsrv4->rfs4_file_tab = rfs4_table_create(nsrv4->nfs4_server_state,
1440 "File",
1441 nsrv4->rfs4_file_cache_time,
1442 1,
1443 rfs4_file_create,
1444 rfs4_file_destroy,
1445 NULL,
1446 sizeof (rfs4_file_t),
1447 TABSIZE,
1448 MAXTABSZ, -1);
1449
1450 nsrv4->rfs4_file_idx = rfs4_index_create(nsrv4->rfs4_file_tab,
1451 "Filehandle", file_hash,
1452 file_compare, file_mkkey, TRUE);
1453
1454 nsrv4->rfs4_deleg_state_cache_time *= rfs4_lease_time;
1455 /* CSTYLED */
1456 nsrv4->rfs4_deleg_state_tab = rfs4_table_create(nsrv4->nfs4_server_state,
1457 "DelegStateID",
1458 nsrv4->rfs4_deleg_state_cache_time,
1459 2,
1460 rfs4_deleg_state_create,
1461 rfs4_deleg_state_destroy,
1462 rfs4_deleg_state_expiry,
1463 sizeof (rfs4_deleg_state_t),
1464 TABSIZE,
1465 MAXTABSZ, 100);
1466 nsrv4->rfs4_deleg_idx = rfs4_index_create(nsrv4->rfs4_deleg_state_tab,
1467 "DelegByFileClient",
1468 deleg_hash,
1469 deleg_compare,
1470 deleg_mkkey, TRUE);
1471
1472 /* CSTYLED */
1473 nsrv4->rfs4_deleg_state_idx = rfs4_index_create(nsrv4->rfs4_deleg_state_tab,
1474 "DelegState",
1475 deleg_state_hash,
1476 deleg_state_compare,
1477 deleg_state_mkkey, FALSE);
1478
1479 mutex_exit(&nsrv4->state_lock);
1480
1481 /*
1482 * Init the stable storage.
1483 */
1484 rfs4_ss_init(nsrv4);
1485 }
1486
1487 /*
1488 * Used at server shutdown to cleanup all of NFSv4 server's zone structures
1489 * and state.
1490 */
1491 void
1492 rfs4_state_zone_fini()
1493 {
1494 rfs4_database_t *dbp;
1495 nfs4_srv_t *nsrv4;
1496 nsrv4 = nfs4_get_srv();
1497
1498 rfs4_set_deleg_policy(nsrv4, SRV_NEVER_DELEGATE);
1499
1500 /*
1501 * Clean up any dangling stable storage structures BEFORE calling
1502 * rfs4_servinst_destroy_all() so there are no dangling structures
1503 * (i.e. the srvinsts are all cleared of danglers BEFORE they get
1504 * freed).
1505 */
1506 rfs4_ss_fini(nsrv4);
1507
1508 mutex_enter(&nsrv4->state_lock);
1509
1510 if (nsrv4->nfs4_server_state == NULL) {
1511 mutex_exit(&nsrv4->state_lock);
1512 return;
1513 }
1514
1515 /* destroy server instances and current instance ptr */
1516 rfs4_servinst_destroy_all(nsrv4);
1517
1518 /* reset the "first NFSv4 request" status */
1519 nsrv4->seen_first_compound = 0;
1520
1521 dbp = nsrv4->nfs4_server_state;
1522 nsrv4->nfs4_server_state = NULL;
1523
1524 rw_destroy(&nsrv4->rfs4_findclient_lock);
1525
1526 /* First stop all of the reaper threads in the database */
1527 rfs4_database_shutdown(dbp);
1528 /*
1529 * WARNING: There may be consumers of the rfs4 database still
1530 * active as we destroy these. IF that's the case, consider putting
1531 * some of their _zone_fini()-like functions into the zsd key as
1532 * ~~SHUTDOWN~~ functions instead of ~~DESTROY~~ functions. We can
1533 * maintain some ordering guarantees better that way.
1534 */
1535 /* Now destroy/release the database tables */
1536 rfs4_database_destroy(dbp);
1537
1538 /* Reset the cache timers for next time */
1539 nsrv4->rfs4_client_cache_time = 0;
1540 nsrv4->rfs4_openowner_cache_time = 0;
1541 nsrv4->rfs4_state_cache_time = 0;
1542 nsrv4->rfs4_lo_state_cache_time = 0;
1543 nsrv4->rfs4_lockowner_cache_time = 0;
1544 nsrv4->rfs4_file_cache_time = 0;
1545 nsrv4->rfs4_deleg_state_cache_time = 0;
1546
1547 mutex_exit(&nsrv4->state_lock);
1548 }
1549
1550 typedef union {
1551 struct {
1552 uint32_t start_time;
1553 uint32_t c_id;
1554 } impl_id;
1555 clientid4 id4;
1556 } cid;
1557
1558 static int foreign_stateid(stateid_t *id);
1559 static int foreign_clientid(cid *cidp);
1560 static void embed_nodeid(cid *cidp);
1561
1562 typedef union {
1563 struct {
1564 uint32_t c_id;
1565 uint32_t gen_num;
1566 } cv_impl;
1567 verifier4 confirm_verf;
1568 } scid_confirm_verf;
1569
1570 static uint32_t
1571 clientid_hash(void *key)
1572 {
1573 cid *idp = key;
1574
1575 return (idp->impl_id.c_id);
1576 }
1577
1578 static bool_t
1579 clientid_compare(rfs4_entry_t entry, void *key)
1580 {
1581 rfs4_client_t *cp = (rfs4_client_t *)entry;
1582 clientid4 *idp = key;
1583
1584 return (*idp == cp->rc_clientid);
1585 }
1586
1587 static void *
1588 clientid_mkkey(rfs4_entry_t entry)
1589 {
1590 rfs4_client_t *cp = (rfs4_client_t *)entry;
1591
1592 return (&cp->rc_clientid);
1593 }
1594
1595 static uint32_t
1596 nfsclnt_hash(void *key)
1597 {
1598 nfs_client_id4 *client = key;
1599 int i;
1600 uint32_t hash = 0;
1601
1602 for (i = 0; i < client->id_len; i++) {
1603 hash <<= 1;
1604 hash += (uint_t)client->id_val[i];
1605 }
1606 return (hash);
1607 }
1608
1609
1610 static bool_t
1611 nfsclnt_compare(rfs4_entry_t entry, void *key)
1612 {
1613 rfs4_client_t *cp = (rfs4_client_t *)entry;
1614 nfs_client_id4 *nfs_client = key;
1615
1616 if (cp->rc_nfs_client.id_len != nfs_client->id_len)
1617 return (FALSE);
1618
1619 return (bcmp(cp->rc_nfs_client.id_val, nfs_client->id_val,
1620 nfs_client->id_len) == 0);
1621 }
1622
1623 static void *
1624 nfsclnt_mkkey(rfs4_entry_t entry)
1625 {
1626 rfs4_client_t *cp = (rfs4_client_t *)entry;
1627
1628 return (&cp->rc_nfs_client);
1629 }
1630
1631 static bool_t
1632 rfs4_client_expiry(rfs4_entry_t u_entry)
1633 {
1634 rfs4_client_t *cp = (rfs4_client_t *)u_entry;
1635 bool_t cp_expired;
1636
1637 if (rfs4_dbe_is_invalid(cp->rc_dbe)) {
1638 cp->rc_ss_remove = 1;
1639 return (TRUE);
1640 }
1641 /*
1642 * If the sysadmin has used clear_locks for this
1643 * entry then forced_expire will be set and we
1644 * want this entry to be reaped. Or the entry
1645 * has exceeded its lease period.
1646 */
1647 cp_expired = (cp->rc_forced_expire ||
1648 (gethrestime_sec() - cp->rc_last_access
1649 > rfs4_lease_time));
1650
1651 if (!cp->rc_ss_remove && cp_expired)
1652 cp->rc_ss_remove = 1;
1653 return (cp_expired);
1654 }
1655
1656 /*
1657 * Remove the leaf file from all distributed stable storage paths.
1658 */
1659 static void
1660 rfs4_dss_remove_cpleaf(rfs4_client_t *cp)
1661 {
1662 nfs4_srv_t *nsrv4;
1663 rfs4_servinst_t *sip;
1664 char *leaf = cp->rc_ss_pn->leaf;
1665
1666 /*
1667 * since the state files are written to all DSS
1668 * paths we must remove this leaf file instance
1669 * from all server instances.
1670 */
1671
1672 nsrv4 = nfs4_get_srv();
1673 mutex_enter(&nsrv4->servinst_lock);
1674 for (sip = nsrv4->nfs4_cur_servinst; sip != NULL; sip = sip->prev) {
1675 /* remove the leaf file associated with this server instance */
1676 rfs4_dss_remove_leaf(sip, NFS4_DSS_STATE_LEAF, leaf);
1677 }
1678 mutex_exit(&nsrv4->servinst_lock);
1679 }
1680
1681 static void
1682 rfs4_dss_remove_leaf(rfs4_servinst_t *sip, char *dir_leaf, char *leaf)
1683 {
1684 int i, npaths = sip->dss_npaths;
1685
1686 for (i = 0; i < npaths; i++) {
1687 rfs4_dss_path_t *dss_path = sip->dss_paths[i];
1688 char *path, *dir;
1689 size_t pathlen;
1690
1691 /* the HA-NFSv4 path might have been failed-over away from us */
1692 if (dss_path == NULL)
1693 continue;
1694
1695 dir = dss_path->path;
1696
1697 /* allow 3 extra bytes for two '/' & a NUL */
1698 pathlen = strlen(dir) + strlen(dir_leaf) + strlen(leaf) + 3;
1699 path = kmem_alloc(pathlen, KM_SLEEP);
1700 (void) sprintf(path, "%s/%s/%s", dir, dir_leaf, leaf);
1701
1702 (void) vn_remove(path, UIO_SYSSPACE, RMFILE);
1703
1704 kmem_free(path, pathlen);
1705 }
1706 }
1707
1708 static void
1709 rfs4_client_destroy(rfs4_entry_t u_entry)
1710 {
1711 rfs4_client_t *cp = (rfs4_client_t *)u_entry;
1712
1713 mutex_destroy(cp->rc_cbinfo.cb_lock);
1714 cv_destroy(cp->rc_cbinfo.cb_cv);
1715 cv_destroy(cp->rc_cbinfo.cb_cv_nullcaller);
1716 list_destroy(&cp->rc_openownerlist);
1717
1718 /* free callback info */
1719 rfs4_cbinfo_free(&cp->rc_cbinfo);
1720
1721 if (cp->rc_cp_confirmed)
1722 rfs4_client_rele(cp->rc_cp_confirmed);
1723
1724 if (cp->rc_ss_pn) {
1725 /* check if the stable storage files need to be removed */
1726 if (cp->rc_ss_remove)
1727 rfs4_dss_remove_cpleaf(cp);
1728 rfs4_ss_pnfree(cp->rc_ss_pn);
1729 }
1730
1731 /* Free the client supplied client id */
1732 kmem_free(cp->rc_nfs_client.id_val, cp->rc_nfs_client.id_len);
1733
1734 if (cp->rc_sysidt != LM_NOSYSID)
1735 lm_free_sysidt(cp->rc_sysidt);
1736 }
1737
1738 static bool_t
1739 rfs4_client_create(rfs4_entry_t u_entry, void *arg)
1740 {
1741 rfs4_client_t *cp = (rfs4_client_t *)u_entry;
1742 nfs_client_id4 *client = (nfs_client_id4 *)arg;
1743 struct sockaddr *ca;
1744 cid *cidp;
1745 scid_confirm_verf *scvp;
1746 nfs4_srv_t *nsrv4;
1747
1748 nsrv4 = nfs4_get_srv();
1749
1750 /* Get a clientid to give to the client */
1751 cidp = (cid *)&cp->rc_clientid;
1752 cidp->impl_id.start_time = nsrv4->rfs4_start_time;
1753 cidp->impl_id.c_id = (uint32_t)rfs4_dbe_getid(cp->rc_dbe);
1754
1755 /* If we are booted as a cluster node, embed our nodeid */
1756 if (cluster_bootflags & CLUSTER_BOOTED)
1757 embed_nodeid(cidp);
1758
1759 /* Allocate and copy client's client id value */
1760 cp->rc_nfs_client.id_val = kmem_alloc(client->id_len, KM_SLEEP);
1761 cp->rc_nfs_client.id_len = client->id_len;
1762 bcopy(client->id_val, cp->rc_nfs_client.id_val, client->id_len);
1763 cp->rc_nfs_client.verifier = client->verifier;
1764
1765 /* Copy client's IP address */
1766 ca = client->cl_addr;
1767 if (ca->sa_family == AF_INET)
1768 bcopy(ca, &cp->rc_addr, sizeof (struct sockaddr_in));
1769 else if (ca->sa_family == AF_INET6)
1770 bcopy(ca, &cp->rc_addr, sizeof (struct sockaddr_in6));
1771 cp->rc_nfs_client.cl_addr = (struct sockaddr *)&cp->rc_addr;
1772
1773 /* Init the value for the SETCLIENTID_CONFIRM verifier */
1774 scvp = (scid_confirm_verf *)&cp->rc_confirm_verf;
1775 scvp->cv_impl.c_id = cidp->impl_id.c_id;
1776 scvp->cv_impl.gen_num = 0;
1777
1778 /* An F_UNLKSYS has been done for this client */
1779 cp->rc_unlksys_completed = FALSE;
1780
1781 /* We need the client to ack us */
1782 cp->rc_need_confirm = TRUE;
1783 cp->rc_cp_confirmed = NULL;
1784
1785 /* TRUE all the time until the callback path actually fails */
1786 cp->rc_cbinfo.cb_notified_of_cb_path_down = TRUE;
1787
1788 /* Initialize the access time to now */
1789 cp->rc_last_access = gethrestime_sec();
1790
1791 cp->rc_cr_set = NULL;
1792
1793 cp->rc_sysidt = LM_NOSYSID;
1794
1795 list_create(&cp->rc_openownerlist, sizeof (rfs4_openowner_t),
1796 offsetof(rfs4_openowner_t, ro_node));
1797
1798 /* set up the callback control structure */
1799 cp->rc_cbinfo.cb_state = CB_UNINIT;
1800 mutex_init(cp->rc_cbinfo.cb_lock, NULL, MUTEX_DEFAULT, NULL);
1801 cv_init(cp->rc_cbinfo.cb_cv, NULL, CV_DEFAULT, NULL);
1802 cv_init(cp->rc_cbinfo.cb_cv_nullcaller, NULL, CV_DEFAULT, NULL);
1803
1804 /*
1805 * Associate the client_t with the current server instance.
1806 * The hold is solely to satisfy the calling requirement of
1807 * rfs4_servinst_assign(). In this case it's not strictly necessary.
1808 */
1809 rfs4_dbe_hold(cp->rc_dbe);
1810 rfs4_servinst_assign(nsrv4, cp, nsrv4->nfs4_cur_servinst);
1811 rfs4_dbe_rele(cp->rc_dbe);
1812
1813 return (TRUE);
1814 }
1815
1816 /*
1817 * Caller wants to generate/update the setclientid_confirm verifier
1818 * associated with a client. This is done during the SETCLIENTID
1819 * processing.
1820 */
1821 void
1822 rfs4_client_scv_next(rfs4_client_t *cp)
1823 {
1824 scid_confirm_verf *scvp;
1825
1826 /* Init the value for the SETCLIENTID_CONFIRM verifier */
1827 scvp = (scid_confirm_verf *)&cp->rc_confirm_verf;
1828 scvp->cv_impl.gen_num++;
1829 }
1830
1831 void
1832 rfs4_client_rele(rfs4_client_t *cp)
1833 {
1834 rfs4_dbe_rele(cp->rc_dbe);
1835 }
1836
1837 rfs4_client_t *
1838 rfs4_findclient(nfs_client_id4 *client, bool_t *create, rfs4_client_t *oldcp)
1839 {
1840 rfs4_client_t *cp;
1841 nfs4_srv_t *nsrv4;
1842 nsrv4 = nfs4_get_srv();
1843
1844
1845 if (oldcp) {
1846 rw_enter(&nsrv4->rfs4_findclient_lock, RW_WRITER);
1847 rfs4_dbe_hide(oldcp->rc_dbe);
1848 } else {
1849 rw_enter(&nsrv4->rfs4_findclient_lock, RW_READER);
1850 }
1851
1852 cp = (rfs4_client_t *)rfs4_dbsearch(nsrv4->rfs4_nfsclnt_idx, client,
1853 create, (void *)client, RFS4_DBS_VALID);
1854
1855 if (oldcp)
1856 rfs4_dbe_unhide(oldcp->rc_dbe);
1857
1858 rw_exit(&nsrv4->rfs4_findclient_lock);
1859
1860 return (cp);
1861 }
1862
1863 rfs4_client_t *
1864 rfs4_findclient_by_id(clientid4 clientid, bool_t find_unconfirmed)
1865 {
1866 rfs4_client_t *cp;
1867 bool_t create = FALSE;
1868 cid *cidp = (cid *)&clientid;
1869 nfs4_srv_t *nsrv4 = nfs4_get_srv();
1870
1871 /* If we're a cluster and the nodeid isn't right, short-circuit */
1872 if (cluster_bootflags & CLUSTER_BOOTED && foreign_clientid(cidp))
1873 return (NULL);
1874
1875 rw_enter(&nsrv4->rfs4_findclient_lock, RW_READER);
1876
1877 cp = (rfs4_client_t *)rfs4_dbsearch(nsrv4->rfs4_clientid_idx, &clientid,
1878 &create, NULL, RFS4_DBS_VALID);
1879
1880 rw_exit(&nsrv4->rfs4_findclient_lock);
1881
1882 if (cp && cp->rc_need_confirm && find_unconfirmed == FALSE) {
1883 rfs4_client_rele(cp);
1884 return (NULL);
1885 } else {
1886 return (cp);
1887 }
1888 }
1889
1890 static uint32_t
1891 clntip_hash(void *key)
1892 {
1893 struct sockaddr *addr = key;
1894 int i, len = 0;
1895 uint32_t hash = 0;
1896 char *ptr;
1897
1898 if (addr->sa_family == AF_INET) {
1899 struct sockaddr_in *a = (struct sockaddr_in *)addr;
1900 len = sizeof (struct in_addr);
1901 ptr = (char *)&a->sin_addr;
1902 } else if (addr->sa_family == AF_INET6) {
1903 struct sockaddr_in6 *a = (struct sockaddr_in6 *)addr;
1904 len = sizeof (struct in6_addr);
1905 ptr = (char *)&a->sin6_addr;
1906 } else
1907 return (0);
1908
1909 for (i = 0; i < len; i++) {
1910 hash <<= 1;
1911 hash += (uint_t)ptr[i];
1912 }
1913 return (hash);
1914 }
1915
1916 static bool_t
1917 clntip_compare(rfs4_entry_t entry, void *key)
1918 {
1919 rfs4_clntip_t *cp = (rfs4_clntip_t *)entry;
1920 struct sockaddr *addr = key;
1921 int len = 0;
1922 char *p1, *p2;
1923
1924 if (addr->sa_family == AF_INET) {
1925 struct sockaddr_in *a1 = (struct sockaddr_in *)&cp->ri_addr;
1926 struct sockaddr_in *a2 = (struct sockaddr_in *)addr;
1927 len = sizeof (struct in_addr);
1928 p1 = (char *)&a1->sin_addr;
1929 p2 = (char *)&a2->sin_addr;
1930 } else if (addr->sa_family == AF_INET6) {
1931 struct sockaddr_in6 *a1 = (struct sockaddr_in6 *)&cp->ri_addr;
1932 struct sockaddr_in6 *a2 = (struct sockaddr_in6 *)addr;
1933 len = sizeof (struct in6_addr);
1934 p1 = (char *)&a1->sin6_addr;
1935 p2 = (char *)&a2->sin6_addr;
1936 } else
1937 return (0);
1938
1939 return (bcmp(p1, p2, len) == 0);
1940 }
1941
1942 static void *
1943 clntip_mkkey(rfs4_entry_t entry)
1944 {
1945 rfs4_clntip_t *cp = (rfs4_clntip_t *)entry;
1946
1947 return (&cp->ri_addr);
1948 }
1949
1950 static bool_t
1951 rfs4_clntip_expiry(rfs4_entry_t u_entry)
1952 {
1953 rfs4_clntip_t *cp = (rfs4_clntip_t *)u_entry;
1954
1955 if (rfs4_dbe_is_invalid(cp->ri_dbe))
1956 return (TRUE);
1957 return (FALSE);
1958 }
1959
1960 /* ARGSUSED */
1961 static void
1962 rfs4_clntip_destroy(rfs4_entry_t u_entry)
1963 {
1964 }
1965
1966 static bool_t
1967 rfs4_clntip_create(rfs4_entry_t u_entry, void *arg)
1968 {
1969 rfs4_clntip_t *cp = (rfs4_clntip_t *)u_entry;
1970 struct sockaddr *ca = (struct sockaddr *)arg;
1971
1972 /* Copy client's IP address */
1973 if (ca->sa_family == AF_INET)
1974 bcopy(ca, &cp->ri_addr, sizeof (struct sockaddr_in));
1975 else if (ca->sa_family == AF_INET6)
1976 bcopy(ca, &cp->ri_addr, sizeof (struct sockaddr_in6));
1977 else
1978 return (FALSE);
1979 cp->ri_no_referrals = 1;
1980
1981 return (TRUE);
1982 }
1983
1984 rfs4_clntip_t *
1985 rfs4_find_clntip(struct sockaddr *addr, bool_t *create)
1986 {
1987 rfs4_clntip_t *cp;
1988 nfs4_srv_t *nsrv4;
1989
1990 nsrv4 = nfs4_get_srv();
1991
1992 rw_enter(&nsrv4->rfs4_findclient_lock, RW_READER);
1993
1994 cp = (rfs4_clntip_t *)rfs4_dbsearch(nsrv4->rfs4_clntip_idx, addr,
1995 create, addr, RFS4_DBS_VALID);
1996
1997 rw_exit(&nsrv4->rfs4_findclient_lock);
1998
1999 return (cp);
2000 }
2001
2002 void
2003 rfs4_invalidate_clntip(struct sockaddr *addr)
2004 {
2005 rfs4_clntip_t *cp;
2006 bool_t create = FALSE;
2007 nfs4_srv_t *nsrv4 = nfs4_get_srv();
2008
2009 rw_enter(&nsrv4->rfs4_findclient_lock, RW_READER);
2010
2011 cp = (rfs4_clntip_t *)rfs4_dbsearch(nsrv4->rfs4_clntip_idx, addr,
2012 &create, NULL, RFS4_DBS_VALID);
2013 if (cp == NULL) {
2014 rw_exit(&nsrv4->rfs4_findclient_lock);
2015 return;
2016 }
2017 rfs4_dbe_invalidate(cp->ri_dbe);
2018 rfs4_dbe_rele(cp->ri_dbe);
2019
2020 rw_exit(&nsrv4->rfs4_findclient_lock);
2021 }
2022
2023 bool_t
2024 rfs4_lease_expired(rfs4_client_t *cp)
2025 {
2026 bool_t rc;
2027
2028 rfs4_dbe_lock(cp->rc_dbe);
2029
2030 /*
2031 * If the admin has executed clear_locks for this
2032 * client id, force expire will be set, so no need
2033 * to calculate anything because it's "outa here".
2034 */
2035 if (cp->rc_forced_expire) {
2036 rc = TRUE;
2037 } else {
2038 rc = (gethrestime_sec() - cp->rc_last_access > rfs4_lease_time);
2039 }
2040
2041 /*
2042 * If the lease has expired we will also want
2043 * to remove any stable storage state data. So
2044 * mark the client id accordingly.
2045 */
2046 if (!cp->rc_ss_remove)
2047 cp->rc_ss_remove = (rc == TRUE);
2048
2049 rfs4_dbe_unlock(cp->rc_dbe);
2050
2051 return (rc);
2052 }
2053
2054 void
2055 rfs4_update_lease(rfs4_client_t *cp)
2056 {
2057 rfs4_dbe_lock(cp->rc_dbe);
2058 if (!cp->rc_forced_expire)
2059 cp->rc_last_access = gethrestime_sec();
2060 rfs4_dbe_unlock(cp->rc_dbe);
2061 }
2062
2063
2064 static bool_t
2065 EQOPENOWNER(open_owner4 *a, open_owner4 *b)
2066 {
2067 bool_t rc;
2068
2069 if (a->clientid != b->clientid)
2070 return (FALSE);
2071
2072 if (a->owner_len != b->owner_len)
2073 return (FALSE);
2074
2075 rc = (bcmp(a->owner_val, b->owner_val, a->owner_len) == 0);
2076
2077 return (rc);
2078 }
2079
2080 static uint_t
2081 openowner_hash(void *key)
2082 {
2083 int i;
2084 open_owner4 *openowner = key;
2085 uint_t hash = 0;
2086
2087 for (i = 0; i < openowner->owner_len; i++) {
2088 hash <<= 4;
2089 hash += (uint_t)openowner->owner_val[i];
2090 }
2091 hash += (uint_t)openowner->clientid;
2092 hash |= (openowner->clientid >> 32);
2093
2094 return (hash);
2095 }
2096
2097 static bool_t
2098 openowner_compare(rfs4_entry_t u_entry, void *key)
2099 {
2100 rfs4_openowner_t *oo = (rfs4_openowner_t *)u_entry;
2101 open_owner4 *arg = key;
2102
2103 return (EQOPENOWNER(&oo->ro_owner, arg));
2104 }
2105
2106 void *
2107 openowner_mkkey(rfs4_entry_t u_entry)
2108 {
2109 rfs4_openowner_t *oo = (rfs4_openowner_t *)u_entry;
2110
2111 return (&oo->ro_owner);
2112 }
2113
2114 /* ARGSUSED */
2115 static bool_t
2116 rfs4_openowner_expiry(rfs4_entry_t u_entry)
2117 {
2118 /* openstateid held us and did all needed delay */
2119 return (TRUE);
2120 }
2121
2122 static void
2123 rfs4_openowner_destroy(rfs4_entry_t u_entry)
2124 {
2125 rfs4_openowner_t *oo = (rfs4_openowner_t *)u_entry;
2126
2127 /* Remove open owner from client's lists of open owners */
2128 rfs4_dbe_lock(oo->ro_client->rc_dbe);
2129 list_remove(&oo->ro_client->rc_openownerlist, oo);
2130 rfs4_dbe_unlock(oo->ro_client->rc_dbe);
2131
2132 /* One less reference to the client */
2133 rfs4_client_rele(oo->ro_client);
2134 oo->ro_client = NULL;
2135
2136 /* Free the last reply for this lock owner */
2137 rfs4_free_reply(&oo->ro_reply);
2138
2139 if (oo->ro_reply_fh.nfs_fh4_val) {
2140 kmem_free(oo->ro_reply_fh.nfs_fh4_val,
2141 oo->ro_reply_fh.nfs_fh4_len);
2142 oo->ro_reply_fh.nfs_fh4_val = NULL;
2143 oo->ro_reply_fh.nfs_fh4_len = 0;
2144 }
2145
2146 rfs4_sw_destroy(&oo->ro_sw);
2147 list_destroy(&oo->ro_statelist);
2148
2149 /* Free the lock owner id */
2150 kmem_free(oo->ro_owner.owner_val, oo->ro_owner.owner_len);
2151 }
2152
2153 void
2154 rfs4_openowner_rele(rfs4_openowner_t *oo)
2155 {
2156 rfs4_dbe_rele(oo->ro_dbe);
2157 }
2158
2159 static bool_t
2160 rfs4_openowner_create(rfs4_entry_t u_entry, void *arg)
2161 {
2162 rfs4_openowner_t *oo = (rfs4_openowner_t *)u_entry;
2163 rfs4_openowner_t *argp = (rfs4_openowner_t *)arg;
2164 open_owner4 *openowner = &argp->ro_owner;
2165 seqid4 seqid = argp->ro_open_seqid;
2166 rfs4_client_t *cp;
2167 bool_t create = FALSE;
2168 nfs4_srv_t *nsrv4 = nfs4_get_srv();
2169
2170 rw_enter(&nsrv4->rfs4_findclient_lock, RW_READER);
2171
2172 cp = (rfs4_client_t *)rfs4_dbsearch(nsrv4->rfs4_clientid_idx,
2173 &openowner->clientid,
2174 &create, NULL, RFS4_DBS_VALID);
2175
2176 rw_exit(&nsrv4->rfs4_findclient_lock);
2177
2178 if (cp == NULL)
2179 return (FALSE);
2180
2181 oo->ro_reply_fh.nfs_fh4_len = 0;
2182 oo->ro_reply_fh.nfs_fh4_val = NULL;
2183
2184 oo->ro_owner.clientid = openowner->clientid;
2185 oo->ro_owner.owner_val =
2186 kmem_alloc(openowner->owner_len, KM_SLEEP);
2187
2188 bcopy(openowner->owner_val,
2189 oo->ro_owner.owner_val, openowner->owner_len);
2190
2191 oo->ro_owner.owner_len = openowner->owner_len;
2192
2193 oo->ro_need_confirm = TRUE;
2194
2195 rfs4_sw_init(&oo->ro_sw);
2196
2197 oo->ro_open_seqid = seqid;
2198 bzero(&oo->ro_reply, sizeof (nfs_resop4));
2199 oo->ro_client = cp;
2200 oo->ro_cr_set = NULL;
2201
2202 list_create(&oo->ro_statelist, sizeof (rfs4_state_t),
2203 offsetof(rfs4_state_t, rs_node));
2204
2205 /* Insert openowner into client's open owner list */
2206 rfs4_dbe_lock(cp->rc_dbe);
2207 list_insert_tail(&cp->rc_openownerlist, oo);
2208 rfs4_dbe_unlock(cp->rc_dbe);
2209
2210 return (TRUE);
2211 }
2212
2213 rfs4_openowner_t *
2214 rfs4_findopenowner(open_owner4 *openowner, bool_t *create, seqid4 seqid)
2215 {
2216 rfs4_openowner_t *oo;
2217 rfs4_openowner_t arg;
2218 nfs4_srv_t *nsrv4 = nfs4_get_srv();
2219
2220 arg.ro_owner = *openowner;
2221 arg.ro_open_seqid = seqid;
2222 /* CSTYLED */
2223 oo = (rfs4_openowner_t *)rfs4_dbsearch(nsrv4->rfs4_openowner_idx, openowner,
2224 create, &arg, RFS4_DBS_VALID);
2225
2226 return (oo);
2227 }
2228
2229 void
2230 rfs4_update_open_sequence(rfs4_openowner_t *oo)
2231 {
2232
2233 rfs4_dbe_lock(oo->ro_dbe);
2234
2235 oo->ro_open_seqid++;
2236
2237 rfs4_dbe_unlock(oo->ro_dbe);
2238 }
2239
2240 void
2241 rfs4_update_open_resp(rfs4_openowner_t *oo, nfs_resop4 *resp, nfs_fh4 *fh)
2242 {
2243
2244 rfs4_dbe_lock(oo->ro_dbe);
2245
2246 rfs4_free_reply(&oo->ro_reply);
2247
2248 rfs4_copy_reply(&oo->ro_reply, resp);
2249
2250 /* Save the filehandle if provided and free if not used */
2251 if (resp->nfs_resop4_u.opopen.status == NFS4_OK &&
2252 fh && fh->nfs_fh4_len) {
2253 if (oo->ro_reply_fh.nfs_fh4_val == NULL)
2254 oo->ro_reply_fh.nfs_fh4_val =
2255 kmem_alloc(fh->nfs_fh4_len, KM_SLEEP);
2256 nfs_fh4_copy(fh, &oo->ro_reply_fh);
2257 } else {
2258 if (oo->ro_reply_fh.nfs_fh4_val) {
2259 kmem_free(oo->ro_reply_fh.nfs_fh4_val,
2260 oo->ro_reply_fh.nfs_fh4_len);
2261 oo->ro_reply_fh.nfs_fh4_val = NULL;
2262 oo->ro_reply_fh.nfs_fh4_len = 0;
2263 }
2264 }
2265
2266 rfs4_dbe_unlock(oo->ro_dbe);
2267 }
2268
2269 static bool_t
2270 lockowner_compare(rfs4_entry_t u_entry, void *key)
2271 {
2272 rfs4_lockowner_t *lo = (rfs4_lockowner_t *)u_entry;
2273 lock_owner4 *b = (lock_owner4 *)key;
2274
2275 if (lo->rl_owner.clientid != b->clientid)
2276 return (FALSE);
2277
2278 if (lo->rl_owner.owner_len != b->owner_len)
2279 return (FALSE);
2280
2281 return (bcmp(lo->rl_owner.owner_val, b->owner_val,
2282 lo->rl_owner.owner_len) == 0);
2283 }
2284
2285 void *
2286 lockowner_mkkey(rfs4_entry_t u_entry)
2287 {
2288 rfs4_lockowner_t *lo = (rfs4_lockowner_t *)u_entry;
2289
2290 return (&lo->rl_owner);
2291 }
2292
2293 static uint32_t
2294 lockowner_hash(void *key)
2295 {
2296 int i;
2297 lock_owner4 *lockowner = key;
2298 uint_t hash = 0;
2299
2300 for (i = 0; i < lockowner->owner_len; i++) {
2301 hash <<= 4;
2302 hash += (uint_t)lockowner->owner_val[i];
2303 }
2304 hash += (uint_t)lockowner->clientid;
2305 hash |= (lockowner->clientid >> 32);
2306
2307 return (hash);
2308 }
2309
2310 static uint32_t
2311 pid_hash(void *key)
2312 {
2313 return ((uint32_t)(uintptr_t)key);
2314 }
2315
2316 static void *
2317 pid_mkkey(rfs4_entry_t u_entry)
2318 {
2319 rfs4_lockowner_t *lo = (rfs4_lockowner_t *)u_entry;
2320
2321 return ((void *)(uintptr_t)lo->rl_pid);
2322 }
2323
2324 static bool_t
2325 pid_compare(rfs4_entry_t u_entry, void *key)
2326 {
2327 rfs4_lockowner_t *lo = (rfs4_lockowner_t *)u_entry;
2328
2329 return (lo->rl_pid == (pid_t)(uintptr_t)key);
2330 }
2331
2332 static void
2333 rfs4_lockowner_destroy(rfs4_entry_t u_entry)
2334 {
2335 rfs4_lockowner_t *lo = (rfs4_lockowner_t *)u_entry;
2336
2337 /* Free the lock owner id */
2338 kmem_free(lo->rl_owner.owner_val, lo->rl_owner.owner_len);
2339 rfs4_client_rele(lo->rl_client);
2340 }
2341
2342 void
2343 rfs4_lockowner_rele(rfs4_lockowner_t *lo)
2344 {
2345 rfs4_dbe_rele(lo->rl_dbe);
2346 }
2347
2348 /* ARGSUSED */
2349 static bool_t
2350 rfs4_lockowner_expiry(rfs4_entry_t u_entry)
2351 {
2352 /*
2353 * Since expiry is called with no other references on
2354 * this struct, go ahead and have it removed.
2355 */
2356 return (TRUE);
2357 }
2358
2359 static bool_t
2360 rfs4_lockowner_create(rfs4_entry_t u_entry, void *arg)
2361 {
2362 rfs4_lockowner_t *lo = (rfs4_lockowner_t *)u_entry;
2363 lock_owner4 *lockowner = (lock_owner4 *)arg;
2364 rfs4_client_t *cp;
2365 bool_t create = FALSE;
2366 nfs4_srv_t *nsrv4 = nfs4_get_srv();
2367
2368 rw_enter(&nsrv4->rfs4_findclient_lock, RW_READER);
2369
2370 cp = (rfs4_client_t *)rfs4_dbsearch(nsrv4->rfs4_clientid_idx,
2371 &lockowner->clientid,
2372 &create, NULL, RFS4_DBS_VALID);
2373
2374 rw_exit(&nsrv4->rfs4_findclient_lock);
2375
2376 if (cp == NULL)
2377 return (FALSE);
2378
2379 /* Reference client */
2380 lo->rl_client = cp;
2381 lo->rl_owner.clientid = lockowner->clientid;
2382 lo->rl_owner.owner_val = kmem_alloc(lockowner->owner_len, KM_SLEEP);
2383 bcopy(lockowner->owner_val, lo->rl_owner.owner_val,
2384 lockowner->owner_len);
2385 lo->rl_owner.owner_len = lockowner->owner_len;
2386 lo->rl_pid = rfs4_dbe_getid(lo->rl_dbe);
2387
2388 return (TRUE);
2389 }
2390
2391 rfs4_lockowner_t *
2392 rfs4_findlockowner(lock_owner4 *lockowner, bool_t *create)
2393 {
2394 rfs4_lockowner_t *lo;
2395 nfs4_srv_t *nsrv4 = nfs4_get_srv();
2396
2397 /* CSTYLED */
2398 lo = (rfs4_lockowner_t *)rfs4_dbsearch(nsrv4->rfs4_lockowner_idx, lockowner,
2399 create, lockowner, RFS4_DBS_VALID);
2400
2401 return (lo);
2402 }
2403
2404 rfs4_lockowner_t *
2405 rfs4_findlockowner_by_pid(pid_t pid)
2406 {
2407 rfs4_lockowner_t *lo;
2408 bool_t create = FALSE;
2409 nfs4_srv_t *nsrv4 = nfs4_get_srv();
2410
2411 lo = (rfs4_lockowner_t *)rfs4_dbsearch(nsrv4->rfs4_lockowner_pid_idx,
2412 (void *)(uintptr_t)pid, &create, NULL, RFS4_DBS_VALID);
2413
2414 return (lo);
2415 }
2416
2417
2418 static uint32_t
2419 file_hash(void *key)
2420 {
2421 return (ADDRHASH(key));
2422 }
2423
2424 static void *
2425 file_mkkey(rfs4_entry_t u_entry)
2426 {
2427 rfs4_file_t *fp = (rfs4_file_t *)u_entry;
2428
2429 return (fp->rf_vp);
2430 }
2431
2432 static bool_t
2433 file_compare(rfs4_entry_t u_entry, void *key)
2434 {
2435 rfs4_file_t *fp = (rfs4_file_t *)u_entry;
2436
2437 return (fp->rf_vp == (vnode_t *)key);
2438 }
2439
2440 static void
2441 rfs4_file_destroy(rfs4_entry_t u_entry)
2442 {
2443 rfs4_file_t *fp = (rfs4_file_t *)u_entry;
2444
2445 list_destroy(&fp->rf_delegstatelist);
2446
2447 if (fp->rf_filehandle.nfs_fh4_val)
2448 kmem_free(fp->rf_filehandle.nfs_fh4_val,
2449 fp->rf_filehandle.nfs_fh4_len);
2450 cv_destroy(fp->rf_dinfo.rd_recall_cv);
2451 if (fp->rf_vp) {
2452 vnode_t *vp = fp->rf_vp;
2453
2454 mutex_enter(&vp->v_vsd_lock);
2455 (void) vsd_set(vp, nfs4_srv_vkey, NULL);
2456 mutex_exit(&vp->v_vsd_lock);
2457 VN_RELE(vp);
2458 fp->rf_vp = NULL;
2459 }
2460 rw_destroy(&fp->rf_file_rwlock);
2461 }
2462
2463 /*
2464 * Used to unlock the underlying dbe struct only
2465 */
2466 void
2467 rfs4_file_rele(rfs4_file_t *fp)
2468 {
2469 rfs4_dbe_rele(fp->rf_dbe);
2470 }
2471
2472 typedef struct {
2473 vnode_t *vp;
2474 nfs_fh4 *fh;
2475 } rfs4_fcreate_arg;
2476
2477 static bool_t
2478 rfs4_file_create(rfs4_entry_t u_entry, void *arg)
2479 {
2480 rfs4_file_t *fp = (rfs4_file_t *)u_entry;
2481 rfs4_fcreate_arg *ap = (rfs4_fcreate_arg *)arg;
2482 vnode_t *vp = ap->vp;
2483 nfs_fh4 *fh = ap->fh;
2484
2485 VN_HOLD(vp);
2486
2487 fp->rf_filehandle.nfs_fh4_len = 0;
2488 fp->rf_filehandle.nfs_fh4_val = NULL;
2489 ASSERT(fh && fh->nfs_fh4_len);
2490 if (fh && fh->nfs_fh4_len) {
2491 fp->rf_filehandle.nfs_fh4_val =
2492 kmem_alloc(fh->nfs_fh4_len, KM_SLEEP);
2493 nfs_fh4_copy(fh, &fp->rf_filehandle);
2494 }
2495 fp->rf_vp = vp;
2496
2497 list_create(&fp->rf_delegstatelist, sizeof (rfs4_deleg_state_t),
2498 offsetof(rfs4_deleg_state_t, rds_node));
2499
2500 fp->rf_share_deny = fp->rf_share_access = fp->rf_access_read = 0;
2501 fp->rf_access_write = fp->rf_deny_read = fp->rf_deny_write = 0;
2502
2503 mutex_init(fp->rf_dinfo.rd_recall_lock, NULL, MUTEX_DEFAULT, NULL);
2504 cv_init(fp->rf_dinfo.rd_recall_cv, NULL, CV_DEFAULT, NULL);
2505
2506 fp->rf_dinfo.rd_dtype = OPEN_DELEGATE_NONE;
2507
2508 rw_init(&fp->rf_file_rwlock, NULL, RW_DEFAULT, NULL);
2509
2510 mutex_enter(&vp->v_vsd_lock);
2511 VERIFY(vsd_set(vp, nfs4_srv_vkey, (void *)fp) == 0);
2512 mutex_exit(&vp->v_vsd_lock);
2513
2514 return (TRUE);
2515 }
2516
2517 rfs4_file_t *
2518 rfs4_findfile(vnode_t *vp, nfs_fh4 *fh, bool_t *create)
2519 {
2520 rfs4_file_t *fp;
2521 rfs4_fcreate_arg arg;
2522 nfs4_srv_t *nsrv4 = nfs4_get_srv();
2523
2524 arg.vp = vp;
2525 arg.fh = fh;
2526
2527 if (*create == TRUE)
2528 /* CSTYLED */
2529 fp = (rfs4_file_t *)rfs4_dbsearch(nsrv4->rfs4_file_idx, vp, create,
2530 &arg, RFS4_DBS_VALID);
2531 else {
2532 mutex_enter(&vp->v_vsd_lock);
2533 fp = (rfs4_file_t *)vsd_get(vp, nfs4_srv_vkey);
2534 if (fp) {
2535 rfs4_dbe_lock(fp->rf_dbe);
2536 if (rfs4_dbe_is_invalid(fp->rf_dbe) ||
2537 (rfs4_dbe_refcnt(fp->rf_dbe) == 0)) {
2538 rfs4_dbe_unlock(fp->rf_dbe);
2539 fp = NULL;
2540 } else {
2541 rfs4_dbe_hold(fp->rf_dbe);
2542 rfs4_dbe_unlock(fp->rf_dbe);
2543 }
2544 }
2545 mutex_exit(&vp->v_vsd_lock);
2546 }
2547 return (fp);
2548 }
2549
2550 /*
2551 * Find a file in the db and once it is located, take the rw lock.
2552 * Need to check the vnode pointer and if it does not exist (it was
2553 * removed between the db location and check) redo the find. This
2554 * assumes that a file struct that has a NULL vnode pointer is marked
2555 * at 'invalid' and will not be found in the db the second time
2556 * around.
2557 */
2558 rfs4_file_t *
2559 rfs4_findfile_withlock(vnode_t *vp, nfs_fh4 *fh, bool_t *create)
2560 {
2561 rfs4_file_t *fp;
2562 rfs4_fcreate_arg arg;
2563 bool_t screate = *create;
2564 nfs4_srv_t *nsrv4 = nfs4_get_srv();
2565
2566 if (screate == FALSE) {
2567 mutex_enter(&vp->v_vsd_lock);
2568 fp = (rfs4_file_t *)vsd_get(vp, nfs4_srv_vkey);
2569 if (fp) {
2570 rfs4_dbe_lock(fp->rf_dbe);
2571 if (rfs4_dbe_is_invalid(fp->rf_dbe) ||
2572 (rfs4_dbe_refcnt(fp->rf_dbe) == 0)) {
2573 rfs4_dbe_unlock(fp->rf_dbe);
2574 mutex_exit(&vp->v_vsd_lock);
2575 fp = NULL;
2576 } else {
2577 rfs4_dbe_hold(fp->rf_dbe);
2578 rfs4_dbe_unlock(fp->rf_dbe);
2579 mutex_exit(&vp->v_vsd_lock);
2580 rw_enter(&fp->rf_file_rwlock, RW_WRITER);
2581 if (fp->rf_vp == NULL) {
2582 rw_exit(&fp->rf_file_rwlock);
2583 rfs4_file_rele(fp);
2584 fp = NULL;
2585 }
2586 }
2587 } else {
2588 mutex_exit(&vp->v_vsd_lock);
2589 }
2590 } else {
2591 retry:
2592 arg.vp = vp;
2593 arg.fh = fh;
2594
2595 fp = (rfs4_file_t *)rfs4_dbsearch(nsrv4->rfs4_file_idx, vp,
2596 create, &arg, RFS4_DBS_VALID);
2597 if (fp != NULL) {
2598 rw_enter(&fp->rf_file_rwlock, RW_WRITER);
2599 if (fp->rf_vp == NULL) {
2600 rw_exit(&fp->rf_file_rwlock);
2601 rfs4_file_rele(fp);
2602 *create = screate;
2603 goto retry;
2604 }
2605 }
2606 }
2607
2608 return (fp);
2609 }
2610
2611 static uint32_t
2612 lo_state_hash(void *key)
2613 {
2614 stateid_t *id = key;
2615
2616 return (id->bits.ident+id->bits.pid);
2617 }
2618
2619 static bool_t
2620 lo_state_compare(rfs4_entry_t u_entry, void *key)
2621 {
2622 rfs4_lo_state_t *lsp = (rfs4_lo_state_t *)u_entry;
2623 stateid_t *id = key;
2624 bool_t rc;
2625
2626 rc = (lsp->rls_lockid.bits.boottime == id->bits.boottime &&
2627 lsp->rls_lockid.bits.type == id->bits.type &&
2628 lsp->rls_lockid.bits.ident == id->bits.ident &&
2629 lsp->rls_lockid.bits.pid == id->bits.pid);
2630
2631 return (rc);
2632 }
2633
2634 static void *
2635 lo_state_mkkey(rfs4_entry_t u_entry)
2636 {
2637 rfs4_lo_state_t *lsp = (rfs4_lo_state_t *)u_entry;
2638
2639 return (&lsp->rls_lockid);
2640 }
2641
2642 static bool_t
2643 rfs4_lo_state_expiry(rfs4_entry_t u_entry)
2644 {
2645 rfs4_lo_state_t *lsp = (rfs4_lo_state_t *)u_entry;
2646
2647 if (rfs4_dbe_is_invalid(lsp->rls_dbe))
2648 return (TRUE);
2649 if (lsp->rls_state->rs_closed)
2650 return (TRUE);
2651 return ((gethrestime_sec() -
2652 lsp->rls_state->rs_owner->ro_client->rc_last_access
2653 > rfs4_lease_time));
2654 }
2655
2656 static void
2657 rfs4_lo_state_destroy(rfs4_entry_t u_entry)
2658 {
2659 rfs4_lo_state_t *lsp = (rfs4_lo_state_t *)u_entry;
2660
2661 rfs4_dbe_lock(lsp->rls_state->rs_dbe);
2662 list_remove(&lsp->rls_state->rs_lostatelist, lsp);
2663 rfs4_dbe_unlock(lsp->rls_state->rs_dbe);
2664
2665 rfs4_sw_destroy(&lsp->rls_sw);
2666
2667 /* Make sure to release the file locks */
2668 if (lsp->rls_locks_cleaned == FALSE) {
2669 lsp->rls_locks_cleaned = TRUE;
2670 if (lsp->rls_locker->rl_client->rc_sysidt != LM_NOSYSID) {
2671 /* Is the PxFS kernel module loaded? */
2672 if (lm_remove_file_locks != NULL) {
2673 int new_sysid;
2674
2675 /* Encode the cluster nodeid in new sysid */
2676 new_sysid =
2677 lsp->rls_locker->rl_client->rc_sysidt;
2678 lm_set_nlmid_flk(&new_sysid);
2679
2680 /*
2681 * This PxFS routine removes file locks for a
2682 * client over all nodes of a cluster.
2683 */
2684 DTRACE_PROBE1(nfss_i_clust_rm_lck,
2685 int, new_sysid);
2686 (*lm_remove_file_locks)(new_sysid);
2687 } else {
2688 (void) cleanlocks(
2689 lsp->rls_state->rs_finfo->rf_vp,
2690 lsp->rls_locker->rl_pid,
2691 lsp->rls_locker->rl_client->rc_sysidt);
2692 }
2693 }
2694 }
2695
2696 /* Free the last reply for this state */
2697 rfs4_free_reply(&lsp->rls_reply);
2698
2699 rfs4_lockowner_rele(lsp->rls_locker);
2700 lsp->rls_locker = NULL;
2701
2702 rfs4_state_rele_nounlock(lsp->rls_state);
2703 lsp->rls_state = NULL;
2704 }
2705
2706 static bool_t
2707 rfs4_lo_state_create(rfs4_entry_t u_entry, void *arg)
2708 {
2709 rfs4_lo_state_t *lsp = (rfs4_lo_state_t *)u_entry;
2710 rfs4_lo_state_t *argp = (rfs4_lo_state_t *)arg;
2711 rfs4_lockowner_t *lo = argp->rls_locker;
2712 rfs4_state_t *sp = argp->rls_state;
2713
2714 lsp->rls_state = sp;
2715
2716 lsp->rls_lockid = sp->rs_stateid;
2717 lsp->rls_lockid.bits.type = LOCKID;
2718 lsp->rls_lockid.bits.chgseq = 0;
2719 lsp->rls_lockid.bits.pid = lo->rl_pid;
2720
2721 lsp->rls_locks_cleaned = FALSE;
2722 lsp->rls_lock_completed = FALSE;
2723
2724 rfs4_sw_init(&lsp->rls_sw);
2725
2726 /* Attached the supplied lock owner */
2727 rfs4_dbe_hold(lo->rl_dbe);
2728 lsp->rls_locker = lo;
2729
2730 rfs4_dbe_lock(sp->rs_dbe);
2731 list_insert_tail(&sp->rs_lostatelist, lsp);
2732 rfs4_dbe_hold(sp->rs_dbe);
2733 rfs4_dbe_unlock(sp->rs_dbe);
2734
2735 return (TRUE);
2736 }
2737
2738 void
2739 rfs4_lo_state_rele(rfs4_lo_state_t *lsp, bool_t unlock_fp)
2740 {
2741 if (unlock_fp == TRUE)
2742 rw_exit(&lsp->rls_state->rs_finfo->rf_file_rwlock);
2743 rfs4_dbe_rele(lsp->rls_dbe);
2744 }
2745
2746 static rfs4_lo_state_t *
2747 rfs4_findlo_state(stateid_t *id, bool_t lock_fp)
2748 {
2749 rfs4_lo_state_t *lsp;
2750 bool_t create = FALSE;
2751 nfs4_srv_t *nsrv4 = nfs4_get_srv();
2752
2753 lsp = (rfs4_lo_state_t *)rfs4_dbsearch(nsrv4->rfs4_lo_state_idx, id,
2754 &create, NULL, RFS4_DBS_VALID);
2755 if (lock_fp == TRUE && lsp != NULL)
2756 rw_enter(&lsp->rls_state->rs_finfo->rf_file_rwlock, RW_READER);
2757
2758 return (lsp);
2759 }
2760
2761
2762 static uint32_t
2763 lo_state_lo_hash(void *key)
2764 {
2765 rfs4_lo_state_t *lsp = key;
2766
2767 return (ADDRHASH(lsp->rls_locker) ^ ADDRHASH(lsp->rls_state));
2768 }
2769
2770 static bool_t
2771 lo_state_lo_compare(rfs4_entry_t u_entry, void *key)
2772 {
2773 rfs4_lo_state_t *lsp = (rfs4_lo_state_t *)u_entry;
2774 rfs4_lo_state_t *keyp = key;
2775
2776 return (keyp->rls_locker == lsp->rls_locker &&
2777 keyp->rls_state == lsp->rls_state);
2778 }
2779
2780 static void *
2781 lo_state_lo_mkkey(rfs4_entry_t u_entry)
2782 {
2783 return (u_entry);
2784 }
2785
2786 rfs4_lo_state_t *
2787 rfs4_findlo_state_by_owner(rfs4_lockowner_t *lo, rfs4_state_t *sp,
2788 bool_t *create)
2789 {
2790 rfs4_lo_state_t *lsp;
2791 rfs4_lo_state_t arg;
2792 nfs4_srv_t *nsrv4 = nfs4_get_srv();
2793
2794 arg.rls_locker = lo;
2795 arg.rls_state = sp;
2796
2797 lsp = (rfs4_lo_state_t *)rfs4_dbsearch(nsrv4->rfs4_lo_state_owner_idx,
2798 &arg, create, &arg, RFS4_DBS_VALID);
2799
2800 return (lsp);
2801 }
2802
2803 static stateid_t
2804 get_stateid(id_t eid)
2805 {
2806 stateid_t id;
2807 nfs4_srv_t *nsrv4;
2808
2809 nsrv4 = nfs4_get_srv();
2810
2811 id.bits.boottime = nsrv4->rfs4_start_time;
2812 id.bits.ident = eid;
2813 id.bits.chgseq = 0;
2814 id.bits.type = 0;
2815 id.bits.pid = 0;
2816
2817 /*
2818 * If we are booted as a cluster node, embed our nodeid.
2819 * We've already done sanity checks in rfs4_client_create() so no
2820 * need to repeat them here.
2821 */
2822 id.bits.clnodeid = (cluster_bootflags & CLUSTER_BOOTED) ?
2823 clconf_get_nodeid() : 0;
2824
2825 return (id);
2826 }
2827
2828 /*
2829 * For use only when booted as a cluster node.
2830 * Returns TRUE if the embedded nodeid indicates that this stateid was
2831 * generated on another node.
2832 */
2833 static int
2834 foreign_stateid(stateid_t *id)
2835 {
2836 ASSERT(cluster_bootflags & CLUSTER_BOOTED);
2837 return (id->bits.clnodeid != (uint32_t)clconf_get_nodeid());
2838 }
2839
2840 /*
2841 * For use only when booted as a cluster node.
2842 * Returns TRUE if the embedded nodeid indicates that this clientid was
2843 * generated on another node.
2844 */
2845 static int
2846 foreign_clientid(cid *cidp)
2847 {
2848 ASSERT(cluster_bootflags & CLUSTER_BOOTED);
2849 return (cidp->impl_id.c_id >> CLUSTER_NODEID_SHIFT !=
2850 (uint32_t)clconf_get_nodeid());
2851 }
2852
2853 /*
2854 * For use only when booted as a cluster node.
2855 * Embed our cluster nodeid into the clientid.
2856 */
2857 static void
2858 embed_nodeid(cid *cidp)
2859 {
2860 int clnodeid;
2861 /*
2862 * Currently, our state tables are small enough that their
2863 * ids will leave enough bits free for the nodeid. If the
2864 * tables become larger, we mustn't overwrite the id.
2865 * Equally, we only have room for so many bits of nodeid, so
2866 * must check that too.
2867 */
2868 ASSERT(cluster_bootflags & CLUSTER_BOOTED);
2869 ASSERT(cidp->impl_id.c_id >> CLUSTER_NODEID_SHIFT == 0);
2870 clnodeid = clconf_get_nodeid();
2871 ASSERT(clnodeid <= CLUSTER_MAX_NODEID);
2872 ASSERT(clnodeid != NODEID_UNKNOWN);
2873 cidp->impl_id.c_id |= (clnodeid << CLUSTER_NODEID_SHIFT);
2874 }
2875
2876 static uint32_t
2877 state_hash(void *key)
2878 {
2879 stateid_t *ip = (stateid_t *)key;
2880
2881 return (ip->bits.ident);
2882 }
2883
2884 static bool_t
2885 state_compare(rfs4_entry_t u_entry, void *key)
2886 {
2887 rfs4_state_t *sp = (rfs4_state_t *)u_entry;
2888 stateid_t *id = (stateid_t *)key;
2889 bool_t rc;
2890
2891 rc = (sp->rs_stateid.bits.boottime == id->bits.boottime &&
2892 sp->rs_stateid.bits.ident == id->bits.ident);
2893
2894 return (rc);
2895 }
2896
2897 static void *
2898 state_mkkey(rfs4_entry_t u_entry)
2899 {
2900 rfs4_state_t *sp = (rfs4_state_t *)u_entry;
2901
2902 return (&sp->rs_stateid);
2903 }
2904
2905 static void
2906 rfs4_state_destroy(rfs4_entry_t u_entry)
2907 {
2908 rfs4_state_t *sp = (rfs4_state_t *)u_entry;
2909
2910 /* remove from openowner list */
2911 rfs4_dbe_lock(sp->rs_owner->ro_dbe);
2912 list_remove(&sp->rs_owner->ro_statelist, sp);
2913 rfs4_dbe_unlock(sp->rs_owner->ro_dbe);
2914
2915 list_destroy(&sp->rs_lostatelist);
2916
2917 /* release any share locks for this stateid if it's still open */
2918 if (!sp->rs_closed) {
2919 rfs4_dbe_lock(sp->rs_dbe);
2920 (void) rfs4_unshare(sp);
2921 rfs4_dbe_unlock(sp->rs_dbe);
2922 }
2923
2924 /* Were done with the file */
2925 rfs4_file_rele(sp->rs_finfo);
2926 sp->rs_finfo = NULL;
2927
2928 /* And now with the openowner */
2929 rfs4_openowner_rele(sp->rs_owner);
2930 sp->rs_owner = NULL;
2931 }
2932
2933 static void
2934 rfs4_state_rele_nounlock(rfs4_state_t *sp)
2935 {
2936 rfs4_dbe_rele(sp->rs_dbe);
2937 }
2938
2939 void
2940 rfs4_state_rele(rfs4_state_t *sp)
2941 {
2942 rw_exit(&sp->rs_finfo->rf_file_rwlock);
2943 rfs4_dbe_rele(sp->rs_dbe);
2944 }
2945
2946 static uint32_t
2947 deleg_hash(void *key)
2948 {
2949 rfs4_deleg_state_t *dsp = (rfs4_deleg_state_t *)key;
2950
2951 return (ADDRHASH(dsp->rds_client) ^ ADDRHASH(dsp->rds_finfo));
2952 }
2953
2954 static bool_t
2955 deleg_compare(rfs4_entry_t u_entry, void *key)
2956 {
2957 rfs4_deleg_state_t *dsp = (rfs4_deleg_state_t *)u_entry;
2958 rfs4_deleg_state_t *kdsp = (rfs4_deleg_state_t *)key;
2959
2960 return (dsp->rds_client == kdsp->rds_client &&
2961 dsp->rds_finfo == kdsp->rds_finfo);
2962 }
2963
2964 static void *
2965 deleg_mkkey(rfs4_entry_t u_entry)
2966 {
2967 return (u_entry);
2968 }
2969
2970 static uint32_t
2971 deleg_state_hash(void *key)
2972 {
2973 stateid_t *ip = (stateid_t *)key;
2974
2975 return (ip->bits.ident);
2976 }
2977
2978 static bool_t
2979 deleg_state_compare(rfs4_entry_t u_entry, void *key)
2980 {
2981 rfs4_deleg_state_t *dsp = (rfs4_deleg_state_t *)u_entry;
2982 stateid_t *id = (stateid_t *)key;
2983 bool_t rc;
2984
2985 if (id->bits.type != DELEGID)
2986 return (FALSE);
2987
2988 rc = (dsp->rds_delegid.bits.boottime == id->bits.boottime &&
2989 dsp->rds_delegid.bits.ident == id->bits.ident);
2990
2991 return (rc);
2992 }
2993
2994 static void *
2995 deleg_state_mkkey(rfs4_entry_t u_entry)
2996 {
2997 rfs4_deleg_state_t *dsp = (rfs4_deleg_state_t *)u_entry;
2998
2999 return (&dsp->rds_delegid);
3000 }
3001
3002 static bool_t
3003 rfs4_deleg_state_expiry(rfs4_entry_t u_entry)
3004 {
3005 rfs4_deleg_state_t *dsp = (rfs4_deleg_state_t *)u_entry;
3006
3007 if (rfs4_dbe_is_invalid(dsp->rds_dbe))
3008 return (TRUE);
3009
3010 if (dsp->rds_dtype == OPEN_DELEGATE_NONE)
3011 return (TRUE);
3012
3013 if ((gethrestime_sec() - dsp->rds_client->rc_last_access
3014 > rfs4_lease_time)) {
3015 rfs4_dbe_invalidate(dsp->rds_dbe);
3016 return (TRUE);
3017 }
3018
3019 return (FALSE);
3020 }
3021
3022 static bool_t
3023 rfs4_deleg_state_create(rfs4_entry_t u_entry, void *argp)
3024 {
3025 rfs4_deleg_state_t *dsp = (rfs4_deleg_state_t *)u_entry;
3026 rfs4_file_t *fp = ((rfs4_deleg_state_t *)argp)->rds_finfo;
3027 rfs4_client_t *cp = ((rfs4_deleg_state_t *)argp)->rds_client;
3028
3029 rfs4_dbe_hold(fp->rf_dbe);
3030 rfs4_dbe_hold(cp->rc_dbe);
3031
3032 dsp->rds_delegid = get_stateid(rfs4_dbe_getid(dsp->rds_dbe));
3033 dsp->rds_delegid.bits.type = DELEGID;
3034 dsp->rds_finfo = fp;
3035 dsp->rds_client = cp;
3036 dsp->rds_dtype = OPEN_DELEGATE_NONE;
3037
3038 dsp->rds_time_granted = gethrestime_sec(); /* observability */
3039 dsp->rds_time_revoked = 0;
3040
3041 list_link_init(&dsp->rds_node);
3042
3043 return (TRUE);
3044 }
3045
3046 static void
3047 rfs4_deleg_state_destroy(rfs4_entry_t u_entry)
3048 {
3049 rfs4_deleg_state_t *dsp = (rfs4_deleg_state_t *)u_entry;
3050
3051 /* return delegation if necessary */
3052 rfs4_return_deleg(dsp, FALSE);
3053
3054 /* Were done with the file */
3055 rfs4_file_rele(dsp->rds_finfo);
3056 dsp->rds_finfo = NULL;
3057
3058 /* And now with the openowner */
3059 rfs4_client_rele(dsp->rds_client);
3060 dsp->rds_client = NULL;
3061 }
3062
3063 rfs4_deleg_state_t *
3064 rfs4_finddeleg(rfs4_state_t *sp, bool_t *create)
3065 {
3066 rfs4_deleg_state_t ds, *dsp;
3067 nfs4_srv_t *nsrv4 = nfs4_get_srv();
3068
3069 ds.rds_client = sp->rs_owner->ro_client;
3070 ds.rds_finfo = sp->rs_finfo;
3071
3072 dsp = (rfs4_deleg_state_t *)rfs4_dbsearch(nsrv4->rfs4_deleg_idx, &ds,
3073 create, &ds, RFS4_DBS_VALID);
3074
3075 return (dsp);
3076 }
3077
3078 rfs4_deleg_state_t *
3079 rfs4_finddelegstate(stateid_t *id)
3080 {
3081 rfs4_deleg_state_t *dsp;
3082 bool_t create = FALSE;
3083 nfs4_srv_t *nsrv4 = nfs4_get_srv();
3084
3085 dsp = (rfs4_deleg_state_t *)rfs4_dbsearch(nsrv4->rfs4_deleg_state_idx,
3086 id, &create, NULL, RFS4_DBS_VALID);
3087
3088 return (dsp);
3089 }
3090
3091 void
3092 rfs4_deleg_state_rele(rfs4_deleg_state_t *dsp)
3093 {
3094 rfs4_dbe_rele(dsp->rds_dbe);
3095 }
3096
3097 void
3098 rfs4_update_lock_sequence(rfs4_lo_state_t *lsp)
3099 {
3100
3101 rfs4_dbe_lock(lsp->rls_dbe);
3102
3103 /*
3104 * If we are skipping sequence id checking, this means that
3105 * this is the first lock request and therefore the sequence
3106 * id does not need to be updated. This only happens on the
3107 * first lock request for a lockowner
3108 */
3109 if (!lsp->rls_skip_seqid_check)
3110 lsp->rls_seqid++;
3111
3112 rfs4_dbe_unlock(lsp->rls_dbe);
3113 }
3114
3115 void
3116 rfs4_update_lock_resp(rfs4_lo_state_t *lsp, nfs_resop4 *resp)
3117 {
3118
3119 rfs4_dbe_lock(lsp->rls_dbe);
3120
3121 rfs4_free_reply(&lsp->rls_reply);
3122
3123 rfs4_copy_reply(&lsp->rls_reply, resp);
3124
3125 rfs4_dbe_unlock(lsp->rls_dbe);
3126 }
3127
3128 void
3129 rfs4_free_opens(rfs4_openowner_t *oo, bool_t invalidate,
3130 bool_t close_of_client)
3131 {
3132 rfs4_state_t *sp;
3133
3134 rfs4_dbe_lock(oo->ro_dbe);
3135
3136 for (sp = list_head(&oo->ro_statelist); sp != NULL;
3137 sp = list_next(&oo->ro_statelist, sp)) {
3138 rfs4_state_close(sp, FALSE, close_of_client, CRED());
3139 if (invalidate == TRUE)
3140 rfs4_dbe_invalidate(sp->rs_dbe);
3141 }
3142
3143 rfs4_dbe_invalidate(oo->ro_dbe);
3144 rfs4_dbe_unlock(oo->ro_dbe);
3145 }
3146
3147 static uint32_t
3148 state_owner_file_hash(void *key)
3149 {
3150 rfs4_state_t *sp = key;
3151
3152 return (ADDRHASH(sp->rs_owner) ^ ADDRHASH(sp->rs_finfo));
3153 }
3154
3155 static bool_t
3156 state_owner_file_compare(rfs4_entry_t u_entry, void *key)
3157 {
3158 rfs4_state_t *sp = (rfs4_state_t *)u_entry;
3159 rfs4_state_t *arg = key;
3160
3161 if (sp->rs_closed == TRUE)
3162 return (FALSE);
3163
3164 return (arg->rs_owner == sp->rs_owner && arg->rs_finfo == sp->rs_finfo);
3165 }
3166
3167 static void *
3168 state_owner_file_mkkey(rfs4_entry_t u_entry)
3169 {
3170 return (u_entry);
3171 }
3172
3173 static uint32_t
3174 state_file_hash(void *key)
3175 {
3176 return (ADDRHASH(key));
3177 }
3178
3179 static bool_t
3180 state_file_compare(rfs4_entry_t u_entry, void *key)
3181 {
3182 rfs4_state_t *sp = (rfs4_state_t *)u_entry;
3183 rfs4_file_t *fp = key;
3184
3185 if (sp->rs_closed == TRUE)
3186 return (FALSE);
3187
3188 return (fp == sp->rs_finfo);
3189 }
3190
3191 static void *
3192 state_file_mkkey(rfs4_entry_t u_entry)
3193 {
3194 rfs4_state_t *sp = (rfs4_state_t *)u_entry;
3195
3196 return (sp->rs_finfo);
3197 }
3198
3199 rfs4_state_t *
3200 rfs4_findstate_by_owner_file(rfs4_openowner_t *oo, rfs4_file_t *fp,
3201 bool_t *create)
3202 {
3203 rfs4_state_t *sp;
3204 rfs4_state_t key;
3205 nfs4_srv_t *nsrv4 = nfs4_get_srv();
3206
3207 key.rs_owner = oo;
3208 key.rs_finfo = fp;
3209
3210 sp = (rfs4_state_t *)rfs4_dbsearch(nsrv4->rfs4_state_owner_file_idx,
3211 &key, create, &key, RFS4_DBS_VALID);
3212
3213 return (sp);
3214 }
3215
3216 /* This returns ANY state struct that refers to this file */
3217 static rfs4_state_t *
3218 rfs4_findstate_by_file(rfs4_file_t *fp)
3219 {
3220 bool_t create = FALSE;
3221 nfs4_srv_t *nsrv4 = nfs4_get_srv();
3222
3223 return ((rfs4_state_t *)rfs4_dbsearch(nsrv4->rfs4_state_file_idx, fp,
3224 &create, fp, RFS4_DBS_VALID));
3225 }
3226
3227 static bool_t
3228 rfs4_state_expiry(rfs4_entry_t u_entry)
3229 {
3230 rfs4_state_t *sp = (rfs4_state_t *)u_entry;
3231
3232 if (rfs4_dbe_is_invalid(sp->rs_dbe))
3233 return (TRUE);
3234
3235 if (sp->rs_closed == TRUE &&
3236 ((gethrestime_sec() - rfs4_dbe_get_timerele(sp->rs_dbe))
3237 > rfs4_lease_time))
3238 return (TRUE);
3239
3240 return ((gethrestime_sec() - sp->rs_owner->ro_client->rc_last_access
3241 > rfs4_lease_time));
3242 }
3243
3244 static bool_t
3245 rfs4_state_create(rfs4_entry_t u_entry, void *argp)
3246 {
3247 rfs4_state_t *sp = (rfs4_state_t *)u_entry;
3248 rfs4_file_t *fp = ((rfs4_state_t *)argp)->rs_finfo;
3249 rfs4_openowner_t *oo = ((rfs4_state_t *)argp)->rs_owner;
3250
3251 rfs4_dbe_hold(fp->rf_dbe);
3252 rfs4_dbe_hold(oo->ro_dbe);
3253 sp->rs_stateid = get_stateid(rfs4_dbe_getid(sp->rs_dbe));
3254 sp->rs_stateid.bits.type = OPENID;
3255 sp->rs_owner = oo;
3256 sp->rs_finfo = fp;
3257
3258 list_create(&sp->rs_lostatelist, sizeof (rfs4_lo_state_t),
3259 offsetof(rfs4_lo_state_t, rls_node));
3260
3261 /* Insert state on per open owner's list */
3262 rfs4_dbe_lock(oo->ro_dbe);
3263 list_insert_tail(&oo->ro_statelist, sp);
3264 rfs4_dbe_unlock(oo->ro_dbe);
3265
3266 return (TRUE);
3267 }
3268
3269 static rfs4_state_t *
3270 rfs4_findstate(stateid_t *id, rfs4_dbsearch_type_t find_invalid, bool_t lock_fp)
3271 {
3272 rfs4_state_t *sp;
3273 bool_t create = FALSE;
3274 nfs4_srv_t *nsrv4 = nfs4_get_srv();
3275
3276 sp = (rfs4_state_t *)rfs4_dbsearch(nsrv4->rfs4_state_idx, id,
3277 &create, NULL, find_invalid);
3278 if (lock_fp == TRUE && sp != NULL)
3279 rw_enter(&sp->rs_finfo->rf_file_rwlock, RW_READER);
3280
3281 return (sp);
3282 }
3283
3284 void
3285 rfs4_state_close(rfs4_state_t *sp, bool_t lock_held, bool_t close_of_client,
3286 cred_t *cr)
3287 {
3288 /* Remove the associated lo_state owners */
3289 if (!lock_held)
3290 rfs4_dbe_lock(sp->rs_dbe);
3291
3292 /*
3293 * If refcnt == 0, the dbe is about to be destroyed.
3294 * lock state will be released by the reaper thread.
3295 */
3296
3297 if (rfs4_dbe_refcnt(sp->rs_dbe) > 0) {
3298 if (sp->rs_closed == FALSE) {
3299 rfs4_release_share_lock_state(sp, cr, close_of_client);
3300 sp->rs_closed = TRUE;
3301 }
3302 }
3303
3304 if (!lock_held)
3305 rfs4_dbe_unlock(sp->rs_dbe);
3306 }
3307
3308 /*
3309 * Remove all state associated with the given client.
3310 */
3311 void
3312 rfs4_client_state_remove(rfs4_client_t *cp)
3313 {
3314 rfs4_openowner_t *oo;
3315
3316 rfs4_dbe_lock(cp->rc_dbe);
3317
3318 for (oo = list_head(&cp->rc_openownerlist); oo != NULL;
3319 oo = list_next(&cp->rc_openownerlist, oo)) {
3320 rfs4_free_opens(oo, TRUE, TRUE);
3321 }
3322
3323 rfs4_dbe_unlock(cp->rc_dbe);
3324 }
3325
3326 void
3327 rfs4_client_close(rfs4_client_t *cp)
3328 {
3329 /* Mark client as going away. */
3330 rfs4_dbe_lock(cp->rc_dbe);
3331 rfs4_dbe_invalidate(cp->rc_dbe);
3332 rfs4_dbe_unlock(cp->rc_dbe);
3333
3334 rfs4_client_state_remove(cp);
3335
3336 /* Release the client */
3337 rfs4_client_rele(cp);
3338 }
3339
3340 nfsstat4
3341 rfs4_check_clientid(clientid4 *cp, int setclid_confirm)
3342 {
3343 cid *cidp = (cid *) cp;
3344 nfs4_srv_t *nsrv4;
3345
3346 nsrv4 = nfs4_get_srv();
3347
3348 /*
3349 * If we are booted as a cluster node, check the embedded nodeid.
3350 * If it indicates that this clientid was generated on another node,
3351 * inform the client accordingly.
3352 */
3353 if (cluster_bootflags & CLUSTER_BOOTED && foreign_clientid(cidp))
3354 return (NFS4ERR_STALE_CLIENTID);
3355
3356 /*
3357 * If the server start time matches the time provided
3358 * by the client (via the clientid) and this is NOT a
3359 * setclientid_confirm then return EXPIRED.
3360 */
3361 if (!setclid_confirm &&
3362 cidp->impl_id.start_time == nsrv4->rfs4_start_time)
3363 return (NFS4ERR_EXPIRED);
3364
3365 return (NFS4ERR_STALE_CLIENTID);
3366 }
3367
3368 /*
3369 * This is used when a stateid has not been found amongst the
3370 * current server's state. Check the stateid to see if it
3371 * was from this server instantiation or not.
3372 */
3373 static nfsstat4
3374 what_stateid_error(stateid_t *id, stateid_type_t type)
3375 {
3376 nfs4_srv_t *nsrv4;
3377
3378 nsrv4 = nfs4_get_srv();
3379
3380 /* If we are booted as a cluster node, was stateid locally generated? */
3381 if ((cluster_bootflags & CLUSTER_BOOTED) && foreign_stateid(id))
3382 return (NFS4ERR_STALE_STATEID);
3383
3384 /* If types don't match then no use checking further */
3385 if (type != id->bits.type)
3386 return (NFS4ERR_BAD_STATEID);
3387
3388 /* From a different server instantiation, return STALE */
3389 if (id->bits.boottime != nsrv4->rfs4_start_time)
3390 return (NFS4ERR_STALE_STATEID);
3391
3392 /*
3393 * From this server but the state is most likely beyond lease
3394 * timeout: return NFS4ERR_EXPIRED. However, there is the
3395 * case of a delegation stateid. For delegations, there is a
3396 * case where the state can be removed without the client's
3397 * knowledge/consent: revocation. In the case of delegation
3398 * revocation, the delegation state will be removed and will
3399 * not be found. If the client does something like a
3400 * DELEGRETURN or even a READ/WRITE with a delegatoin stateid
3401 * that has been revoked, the server should return BAD_STATEID
3402 * instead of the more common EXPIRED error.
3403 */
3404 if (id->bits.boottime == nsrv4->rfs4_start_time) {
3405 if (type == DELEGID)
3406 return (NFS4ERR_BAD_STATEID);
3407 else
3408 return (NFS4ERR_EXPIRED);
3409 }
3410
3411 return (NFS4ERR_BAD_STATEID);
3412 }
3413
3414 /*
3415 * Used later on to find the various state structs. When called from
3416 * rfs4_check_stateid()->rfs4_get_all_state(), no file struct lock is
3417 * taken (it is not needed) and helps on the read/write path with
3418 * respect to performance.
3419 */
3420 static nfsstat4
3421 rfs4_get_state_lockit(stateid4 *stateid, rfs4_state_t **spp,
3422 rfs4_dbsearch_type_t find_invalid, bool_t lock_fp)
3423 {
3424 stateid_t *id = (stateid_t *)stateid;
3425 rfs4_state_t *sp;
3426
3427 *spp = NULL;
3428
3429 /* If we are booted as a cluster node, was stateid locally generated? */
3430 if ((cluster_bootflags & CLUSTER_BOOTED) && foreign_stateid(id))
3431 return (NFS4ERR_STALE_STATEID);
3432
3433 sp = rfs4_findstate(id, find_invalid, lock_fp);
3434 if (sp == NULL) {
3435 return (what_stateid_error(id, OPENID));
3436 }
3437
3438 if (rfs4_lease_expired(sp->rs_owner->ro_client)) {
3439 if (lock_fp == TRUE)
3440 rfs4_state_rele(sp);
3441 else
3442 rfs4_state_rele_nounlock(sp);
3443 return (NFS4ERR_EXPIRED);
3444 }
3445
3446 *spp = sp;
3447
3448 return (NFS4_OK);
3449 }
3450
3451 nfsstat4
3452 rfs4_get_state(stateid4 *stateid, rfs4_state_t **spp,
3453 rfs4_dbsearch_type_t find_invalid)
3454 {
3455 return (rfs4_get_state_lockit(stateid, spp, find_invalid, TRUE));
3456 }
3457
3458 int
3459 rfs4_check_stateid_seqid(rfs4_state_t *sp, stateid4 *stateid)
3460 {
3461 stateid_t *id = (stateid_t *)stateid;
3462
3463 if (rfs4_lease_expired(sp->rs_owner->ro_client))
3464 return (NFS4_CHECK_STATEID_EXPIRED);
3465
3466 /* Stateid is some time in the future - that's bad */
3467 if (sp->rs_stateid.bits.chgseq < id->bits.chgseq)
3468 return (NFS4_CHECK_STATEID_BAD);
3469
3470 if (sp->rs_stateid.bits.chgseq == id->bits.chgseq + 1)
3471 return (NFS4_CHECK_STATEID_REPLAY);
3472
3473 /* Stateid is some time in the past - that's old */
3474 if (sp->rs_stateid.bits.chgseq > id->bits.chgseq)
3475 return (NFS4_CHECK_STATEID_OLD);
3476
3477 /* Caller needs to know about confirmation before closure */
3478 if (sp->rs_owner->ro_need_confirm)
3479 return (NFS4_CHECK_STATEID_UNCONFIRMED);
3480
3481 if (sp->rs_closed == TRUE)
3482 return (NFS4_CHECK_STATEID_CLOSED);
3483
3484 return (NFS4_CHECK_STATEID_OKAY);
3485 }
3486
3487 int
3488 rfs4_check_lo_stateid_seqid(rfs4_lo_state_t *lsp, stateid4 *stateid)
3489 {
3490 stateid_t *id = (stateid_t *)stateid;
3491
3492 if (rfs4_lease_expired(lsp->rls_state->rs_owner->ro_client))
3493 return (NFS4_CHECK_STATEID_EXPIRED);
3494
3495 /* Stateid is some time in the future - that's bad */
3496 if (lsp->rls_lockid.bits.chgseq < id->bits.chgseq)
3497 return (NFS4_CHECK_STATEID_BAD);
3498
3499 if (lsp->rls_lockid.bits.chgseq == id->bits.chgseq + 1)
3500 return (NFS4_CHECK_STATEID_REPLAY);
3501
3502 /* Stateid is some time in the past - that's old */
3503 if (lsp->rls_lockid.bits.chgseq > id->bits.chgseq)
3504 return (NFS4_CHECK_STATEID_OLD);
3505
3506 if (lsp->rls_state->rs_closed == TRUE)
3507 return (NFS4_CHECK_STATEID_CLOSED);
3508
3509 return (NFS4_CHECK_STATEID_OKAY);
3510 }
3511
3512 nfsstat4
3513 rfs4_get_deleg_state(stateid4 *stateid, rfs4_deleg_state_t **dspp)
3514 {
3515 stateid_t *id = (stateid_t *)stateid;
3516 rfs4_deleg_state_t *dsp;
3517
3518 *dspp = NULL;
3519
3520 /* If we are booted as a cluster node, was stateid locally generated? */
3521 if ((cluster_bootflags & CLUSTER_BOOTED) && foreign_stateid(id))
3522 return (NFS4ERR_STALE_STATEID);
3523
3524 dsp = rfs4_finddelegstate(id);
3525 if (dsp == NULL) {
3526 return (what_stateid_error(id, DELEGID));
3527 }
3528
3529 if (rfs4_lease_expired(dsp->rds_client)) {
3530 rfs4_deleg_state_rele(dsp);
3531 return (NFS4ERR_EXPIRED);
3532 }
3533
3534 *dspp = dsp;
3535
3536 return (NFS4_OK);
3537 }
3538
3539 nfsstat4
3540 rfs4_get_lo_state(stateid4 *stateid, rfs4_lo_state_t **lspp, bool_t lock_fp)
3541 {
3542 stateid_t *id = (stateid_t *)stateid;
3543 rfs4_lo_state_t *lsp;
3544
3545 *lspp = NULL;
3546
3547 /* If we are booted as a cluster node, was stateid locally generated? */
3548 if ((cluster_bootflags & CLUSTER_BOOTED) && foreign_stateid(id))
3549 return (NFS4ERR_STALE_STATEID);
3550
3551 lsp = rfs4_findlo_state(id, lock_fp);
3552 if (lsp == NULL) {
3553 return (what_stateid_error(id, LOCKID));
3554 }
3555
3556 if (rfs4_lease_expired(lsp->rls_state->rs_owner->ro_client)) {
3557 rfs4_lo_state_rele(lsp, lock_fp);
3558 return (NFS4ERR_EXPIRED);
3559 }
3560
3561 *lspp = lsp;
3562
3563 return (NFS4_OK);
3564 }
3565
3566 static nfsstat4
3567 rfs4_get_all_state(stateid4 *sid, rfs4_state_t **spp,
3568 rfs4_deleg_state_t **dspp, rfs4_lo_state_t **lspp)
3569 {
3570 rfs4_state_t *sp = NULL;
3571 rfs4_deleg_state_t *dsp = NULL;
3572 rfs4_lo_state_t *lsp = NULL;
3573 stateid_t *id;
3574 nfsstat4 status;
3575
3576 *spp = NULL; *dspp = NULL; *lspp = NULL;
3577
3578 id = (stateid_t *)sid;
3579 switch (id->bits.type) {
3580 case OPENID:
3581 status = rfs4_get_state_lockit(sid, &sp, FALSE, FALSE);
3582 break;
3583 case DELEGID:
3584 status = rfs4_get_deleg_state(sid, &dsp);
3585 break;
3586 case LOCKID:
3587 status = rfs4_get_lo_state(sid, &lsp, FALSE);
3588 if (status == NFS4_OK) {
3589 sp = lsp->rls_state;
3590 rfs4_dbe_hold(sp->rs_dbe);
3591 }
3592 break;
3593 default:
3594 status = NFS4ERR_BAD_STATEID;
3595 }
3596
3597 if (status == NFS4_OK) {
3598 *spp = sp;
3599 *dspp = dsp;
3600 *lspp = lsp;
3601 }
3602
3603 return (status);
3604 }
3605
3606 /*
3607 * Given the I/O mode (FREAD or FWRITE), this checks whether the
3608 * rfs4_state_t struct has access to do this operation and if so
3609 * return NFS4_OK; otherwise the proper NFSv4 error is returned.
3610 */
3611 nfsstat4
3612 rfs4_state_has_access(rfs4_state_t *sp, int mode, vnode_t *vp)
3613 {
3614 nfsstat4 stat = NFS4_OK;
3615 rfs4_file_t *fp;
3616 bool_t create = FALSE;
3617
3618 rfs4_dbe_lock(sp->rs_dbe);
3619 if (mode == FWRITE) {
3620 if (!(sp->rs_share_access & OPEN4_SHARE_ACCESS_WRITE)) {
3621 stat = NFS4ERR_OPENMODE;
3622 }
3623 } else if (mode == FREAD) {
3624 if (!(sp->rs_share_access & OPEN4_SHARE_ACCESS_READ)) {
3625 /*
3626 * If we have OPENed the file with DENYing access
3627 * to both READ and WRITE then no one else could
3628 * have OPENed the file, hence no conflicting READ
3629 * deny. This check is merely an optimization.
3630 */
3631 if (sp->rs_share_deny == OPEN4_SHARE_DENY_BOTH)
3632 goto out;
3633
3634 /* Check against file struct's DENY mode */
3635 fp = rfs4_findfile(vp, NULL, &create);
3636 if (fp != NULL) {
3637 int deny_read = 0;
3638 rfs4_dbe_lock(fp->rf_dbe);
3639 /*
3640 * Check if any other open owner has the file
3641 * OPENed with deny READ.
3642 */
3643 if (sp->rs_share_deny & OPEN4_SHARE_DENY_READ)
3644 deny_read = 1;
3645 ASSERT(fp->rf_deny_read >= deny_read);
3646 if (fp->rf_deny_read > deny_read)
3647 stat = NFS4ERR_OPENMODE;
3648 rfs4_dbe_unlock(fp->rf_dbe);
3649 rfs4_file_rele(fp);
3650 }
3651 }
3652 } else {
3653 /* Illegal I/O mode */
3654 stat = NFS4ERR_INVAL;
3655 }
3656 out:
3657 rfs4_dbe_unlock(sp->rs_dbe);
3658 return (stat);
3659 }
3660
3661 /*
3662 * Given the I/O mode (FREAD or FWRITE), the vnode, the stateid and whether
3663 * the file is being truncated, return NFS4_OK if allowed or appropriate
3664 * V4 error if not. Note NFS4ERR_DELAY will be returned and a recall on
3665 * the associated file will be done if the I/O is not consistent with any
3666 * delegation in effect on the file. Should be holding VOP_RWLOCK, either
3667 * as reader or writer as appropriate. rfs4_op_open will acquire the
3668 * VOP_RWLOCK as writer when setting up delegation. If the stateid is bad
3669 * this routine will return NFS4ERR_BAD_STATEID. In addition, through the
3670 * deleg parameter, we will return whether a write delegation is held by
3671 * the client associated with this stateid.
3672 * If the server instance associated with the relevant client is in its
3673 * grace period, return NFS4ERR_GRACE.
3674 */
3675
3676 nfsstat4
3677 rfs4_check_stateid(int mode, vnode_t *vp,
3678 stateid4 *stateid, bool_t trunc, bool_t *deleg,
3679 bool_t do_access, caller_context_t *ct)
3680 {
3681 rfs4_file_t *fp;
3682 bool_t create = FALSE;
3683 rfs4_state_t *sp;
3684 rfs4_deleg_state_t *dsp;
3685 rfs4_lo_state_t *lsp;
3686 stateid_t *id = (stateid_t *)stateid;
3687 nfsstat4 stat = NFS4_OK;
3688
3689 if (ct != NULL) {
3690 ct->cc_sysid = 0;
3691 ct->cc_pid = 0;
3692 ct->cc_caller_id = nfs4_srv_caller_id;
3693 ct->cc_flags = CC_DONTBLOCK;
3694 }
3695
3696 if (ISSPECIAL(stateid)) {
3697 fp = rfs4_findfile(vp, NULL, &create);
3698 if (fp == NULL)
3699 return (NFS4_OK);
3700 if (fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_NONE) {
3701 rfs4_file_rele(fp);
3702 return (NFS4_OK);
3703 }
3704 if (mode == FWRITE ||
3705 fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_WRITE) {
3706 rfs4_recall_deleg(fp, trunc, NULL);
3707 rfs4_file_rele(fp);
3708 return (NFS4ERR_DELAY);
3709 }
3710 rfs4_file_rele(fp);
3711 return (NFS4_OK);
3712 } else {
3713 stat = rfs4_get_all_state(stateid, &sp, &dsp, &lsp);
3714 if (stat != NFS4_OK)
3715 return (stat);
3716 if (lsp != NULL) {
3717 /* Is associated server instance in its grace period? */
3718 if (rfs4_clnt_in_grace(lsp->rls_locker->rl_client)) {
3719 rfs4_lo_state_rele(lsp, FALSE);
3720 if (sp != NULL)
3721 rfs4_state_rele_nounlock(sp);
3722 return (NFS4ERR_GRACE);
3723 }
3724 if (id->bits.type == LOCKID) {
3725 /* Seqid in the future? - that's bad */
3726 if (lsp->rls_lockid.bits.chgseq <
3727 id->bits.chgseq) {
3728 rfs4_lo_state_rele(lsp, FALSE);
3729 if (sp != NULL)
3730 rfs4_state_rele_nounlock(sp);
3731 return (NFS4ERR_BAD_STATEID);
3732 }
3733 /* Seqid in the past? - that's old */
3734 if (lsp->rls_lockid.bits.chgseq >
3735 id->bits.chgseq) {
3736 rfs4_lo_state_rele(lsp, FALSE);
3737 if (sp != NULL)
3738 rfs4_state_rele_nounlock(sp);
3739 return (NFS4ERR_OLD_STATEID);
3740 }
3741 /* Ensure specified filehandle matches */
3742 if (lsp->rls_state->rs_finfo->rf_vp != vp) {
3743 rfs4_lo_state_rele(lsp, FALSE);
3744 if (sp != NULL)
3745 rfs4_state_rele_nounlock(sp);
3746 return (NFS4ERR_BAD_STATEID);
3747 }
3748 }
3749 if (ct != NULL) {
3750 ct->cc_sysid =
3751 lsp->rls_locker->rl_client->rc_sysidt;
3752 ct->cc_pid = lsp->rls_locker->rl_pid;
3753 }
3754 rfs4_lo_state_rele(lsp, FALSE);
3755 }
3756
3757 /* Stateid provided was an "open" stateid */
3758 if (sp != NULL) {
3759 /* Is associated server instance in its grace period? */
3760 if (rfs4_clnt_in_grace(sp->rs_owner->ro_client)) {
3761 rfs4_state_rele_nounlock(sp);
3762 return (NFS4ERR_GRACE);
3763 }
3764 if (id->bits.type == OPENID) {
3765 /* Seqid in the future? - that's bad */
3766 if (sp->rs_stateid.bits.chgseq <
3767 id->bits.chgseq) {
3768 rfs4_state_rele_nounlock(sp);
3769 return (NFS4ERR_BAD_STATEID);
3770 }
3771 /* Seqid in the past - that's old */
3772 if (sp->rs_stateid.bits.chgseq >
3773 id->bits.chgseq) {
3774 rfs4_state_rele_nounlock(sp);
3775 return (NFS4ERR_OLD_STATEID);
3776 }
3777 }
3778 /* Ensure specified filehandle matches */
3779 if (sp->rs_finfo->rf_vp != vp) {
3780 rfs4_state_rele_nounlock(sp);
3781 return (NFS4ERR_BAD_STATEID);
3782 }
3783
3784 if (sp->rs_owner->ro_need_confirm) {
3785 rfs4_state_rele_nounlock(sp);
3786 return (NFS4ERR_BAD_STATEID);
3787 }
3788
3789 if (sp->rs_closed == TRUE) {
3790 rfs4_state_rele_nounlock(sp);
3791 return (NFS4ERR_OLD_STATEID);
3792 }
3793
3794 if (do_access)
3795 stat = rfs4_state_has_access(sp, mode, vp);
3796 else
3797 stat = NFS4_OK;
3798
3799 /*
3800 * Return whether this state has write
3801 * delegation if desired
3802 */
3803 if (deleg && (sp->rs_finfo->rf_dinfo.rd_dtype ==
3804 OPEN_DELEGATE_WRITE))
3805 *deleg = TRUE;
3806
3807 /*
3808 * We got a valid stateid, so we update the
3809 * lease on the client. Ideally we would like
3810 * to do this after the calling op succeeds,
3811 * but for now this will be good
3812 * enough. Callers of this routine are
3813 * currently insulated from the state stuff.
3814 */
3815 rfs4_update_lease(sp->rs_owner->ro_client);
3816
3817 /*
3818 * If a delegation is present on this file and
3819 * this is a WRITE, then update the lastwrite
3820 * time to indicate that activity is present.
3821 */
3822 if (sp->rs_finfo->rf_dinfo.rd_dtype ==
3823 OPEN_DELEGATE_WRITE &&
3824 mode == FWRITE) {
3825 sp->rs_finfo->rf_dinfo.rd_time_lastwrite =
3826 gethrestime_sec();
3827 }
3828
3829 rfs4_state_rele_nounlock(sp);
3830
3831 return (stat);
3832 }
3833
3834 if (dsp != NULL) {
3835 /* Is associated server instance in its grace period? */
3836 if (rfs4_clnt_in_grace(dsp->rds_client)) {
3837 rfs4_deleg_state_rele(dsp);
3838 return (NFS4ERR_GRACE);
3839 }
3840 if (dsp->rds_delegid.bits.chgseq != id->bits.chgseq) {
3841 rfs4_deleg_state_rele(dsp);
3842 return (NFS4ERR_BAD_STATEID);
3843 }
3844
3845 /* Ensure specified filehandle matches */
3846 if (dsp->rds_finfo->rf_vp != vp) {
3847 rfs4_deleg_state_rele(dsp);
3848 return (NFS4ERR_BAD_STATEID);
3849 }
3850 /*
3851 * Return whether this state has write
3852 * delegation if desired
3853 */
3854 if (deleg && (dsp->rds_finfo->rf_dinfo.rd_dtype ==
3855 OPEN_DELEGATE_WRITE))
3856 *deleg = TRUE;
3857
3858 rfs4_update_lease(dsp->rds_client);
3859
3860 /*
3861 * If a delegation is present on this file and
3862 * this is a WRITE, then update the lastwrite
3863 * time to indicate that activity is present.
3864 */
3865 if (dsp->rds_finfo->rf_dinfo.rd_dtype ==
3866 OPEN_DELEGATE_WRITE && mode == FWRITE) {
3867 dsp->rds_finfo->rf_dinfo.rd_time_lastwrite =
3868 gethrestime_sec();
3869 }
3870
3871 /*
3872 * XXX - what happens if this is a WRITE and the
3873 * delegation type of for READ.
3874 */
3875 rfs4_deleg_state_rele(dsp);
3876
3877 return (stat);
3878 }
3879 /*
3880 * If we got this far, something bad happened
3881 */
3882 return (NFS4ERR_BAD_STATEID);
3883 }
3884 }
3885
3886
3887 /*
3888 * This is a special function in that for the file struct provided the
3889 * server wants to remove/close all current state associated with the
3890 * file. The prime use of this would be with OP_REMOVE to force the
3891 * release of state and particularly of file locks.
3892 *
3893 * There is an assumption that there is no delegations outstanding on
3894 * this file at this point. The caller should have waited for those
3895 * to be returned or revoked.
3896 */
3897 void
3898 rfs4_close_all_state(rfs4_file_t *fp)
3899 {
3900 rfs4_state_t *sp;
3901
3902 rfs4_dbe_lock(fp->rf_dbe);
3903
3904 #ifdef DEBUG
3905 /* only applies when server is handing out delegations */
3906 if (nfs4_get_deleg_policy() != SRV_NEVER_DELEGATE)
3907 ASSERT(fp->rf_dinfo.rd_hold_grant > 0);
3908 #endif
3909
3910 /* No delegations for this file */
3911 ASSERT(list_is_empty(&fp->rf_delegstatelist));
3912
3913 /* Make sure that it can not be found */
3914 rfs4_dbe_invalidate(fp->rf_dbe);
3915
3916 if (fp->rf_vp == NULL) {
3917 rfs4_dbe_unlock(fp->rf_dbe);
3918 return;
3919 }
3920 rfs4_dbe_unlock(fp->rf_dbe);
3921
3922 /*
3923 * Hold as writer to prevent other server threads from
3924 * processing requests related to the file while all state is
3925 * being removed.
3926 */
3927 rw_enter(&fp->rf_file_rwlock, RW_WRITER);
3928
3929 /* Remove ALL state from the file */
3930 while (sp = rfs4_findstate_by_file(fp)) {
3931 rfs4_state_close(sp, FALSE, FALSE, CRED());
3932 rfs4_state_rele_nounlock(sp);
3933 }
3934
3935 /*
3936 * This is only safe since there are no further references to
3937 * the file.
3938 */
3939 rfs4_dbe_lock(fp->rf_dbe);
3940 if (fp->rf_vp) {
3941 vnode_t *vp = fp->rf_vp;
3942
3943 mutex_enter(&vp->v_vsd_lock);
3944 (void) vsd_set(vp, nfs4_srv_vkey, NULL);
3945 mutex_exit(&vp->v_vsd_lock);
3946 VN_RELE(vp);
3947 fp->rf_vp = NULL;
3948 }
3949 rfs4_dbe_unlock(fp->rf_dbe);
3950
3951 /* Finally let other references to proceed */
3952 rw_exit(&fp->rf_file_rwlock);
3953 }
3954
3955 /*
3956 * This function is used as a target for the rfs4_dbe_walk() call
3957 * below. The purpose of this function is to see if the
3958 * lockowner_state refers to a file that resides within the exportinfo
3959 * export. If so, then remove the lock_owner state (file locks and
3960 * share "locks") for this object since the intent is the server is
3961 * unexporting the specified directory. Be sure to invalidate the
3962 * object after the state has been released
3963 */
3964 static void
3965 rfs4_lo_state_walk_callout(rfs4_entry_t u_entry, void *e)
3966 {
3967 rfs4_lo_state_t *lsp = (rfs4_lo_state_t *)u_entry;
3968 struct exportinfo *exi = (struct exportinfo *)e;
3969 nfs_fh4_fmt_t fhfmt4, *exi_fhp, *finfo_fhp;
3970 fhandle_t *efhp;
3971
3972 efhp = (fhandle_t *)&exi->exi_fh;
3973 exi_fhp = (nfs_fh4_fmt_t *)&fhfmt4;
3974
3975 FH_TO_FMT4(efhp, exi_fhp);
3976
3977 finfo_fhp = (nfs_fh4_fmt_t *)lsp->rls_state->rs_finfo->
3978 rf_filehandle.nfs_fh4_val;
3979
3980 if (EQFSID(&finfo_fhp->fh4_fsid, &exi_fhp->fh4_fsid) &&
3981 bcmp(&finfo_fhp->fh4_xdata, &exi_fhp->fh4_xdata,
3982 exi_fhp->fh4_xlen) == 0) {
3983 rfs4_state_close(lsp->rls_state, FALSE, FALSE, CRED());
3984 rfs4_dbe_invalidate(lsp->rls_dbe);
3985 rfs4_dbe_invalidate(lsp->rls_state->rs_dbe);
3986 }
3987 }
3988
3989 /*
3990 * This function is used as a target for the rfs4_dbe_walk() call
3991 * below. The purpose of this function is to see if the state refers
3992 * to a file that resides within the exportinfo export. If so, then
3993 * remove the open state for this object since the intent is the
3994 * server is unexporting the specified directory. The main result for
3995 * this type of entry is to invalidate it such it will not be found in
3996 * the future.
3997 */
3998 static void
3999 rfs4_state_walk_callout(rfs4_entry_t u_entry, void *e)
4000 {
4001 rfs4_state_t *sp = (rfs4_state_t *)u_entry;
4002 struct exportinfo *exi = (struct exportinfo *)e;
4003 nfs_fh4_fmt_t fhfmt4, *exi_fhp, *finfo_fhp;
4004 fhandle_t *efhp;
4005
4006 efhp = (fhandle_t *)&exi->exi_fh;
4007 exi_fhp = (nfs_fh4_fmt_t *)&fhfmt4;
4008
4009 FH_TO_FMT4(efhp, exi_fhp);
4010
4011 finfo_fhp =
4012 (nfs_fh4_fmt_t *)sp->rs_finfo->rf_filehandle.nfs_fh4_val;
4013
4014 if (EQFSID(&finfo_fhp->fh4_fsid, &exi_fhp->fh4_fsid) &&
4015 bcmp(&finfo_fhp->fh4_xdata, &exi_fhp->fh4_xdata,
4016 exi_fhp->fh4_xlen) == 0) {
4017 rfs4_state_close(sp, TRUE, FALSE, CRED());
4018 rfs4_dbe_invalidate(sp->rs_dbe);
4019 }
4020 }
4021
4022 /*
4023 * This function is used as a target for the rfs4_dbe_walk() call
4024 * below. The purpose of this function is to see if the state refers
4025 * to a file that resides within the exportinfo export. If so, then
4026 * remove the deleg state for this object since the intent is the
4027 * server is unexporting the specified directory. The main result for
4028 * this type of entry is to invalidate it such it will not be found in
4029 * the future.
4030 */
4031 static void
4032 rfs4_deleg_state_walk_callout(rfs4_entry_t u_entry, void *e)
4033 {
4034 rfs4_deleg_state_t *dsp = (rfs4_deleg_state_t *)u_entry;
4035 struct exportinfo *exi = (struct exportinfo *)e;
4036 nfs_fh4_fmt_t fhfmt4, *exi_fhp, *finfo_fhp;
4037 fhandle_t *efhp;
4038
4039 efhp = (fhandle_t *)&exi->exi_fh;
4040 exi_fhp = (nfs_fh4_fmt_t *)&fhfmt4;
4041
4042 FH_TO_FMT4(efhp, exi_fhp);
4043
4044 finfo_fhp =
4045 (nfs_fh4_fmt_t *)dsp->rds_finfo->rf_filehandle.nfs_fh4_val;
4046
4047 if (EQFSID(&finfo_fhp->fh4_fsid, &exi_fhp->fh4_fsid) &&
4048 bcmp(&finfo_fhp->fh4_xdata, &exi_fhp->fh4_xdata,
4049 exi_fhp->fh4_xlen) == 0) {
4050 rfs4_dbe_invalidate(dsp->rds_dbe);
4051 }
4052 }
4053
4054 /*
4055 * This function is used as a target for the rfs4_dbe_walk() call
4056 * below. The purpose of this function is to see if the state refers
4057 * to a file that resides within the exportinfo export. If so, then
4058 * release vnode hold for this object since the intent is the server
4059 * is unexporting the specified directory. Invalidation will prevent
4060 * this struct from being found in the future.
4061 */
4062 static void
4063 rfs4_file_walk_callout(rfs4_entry_t u_entry, void *e)
4064 {
4065 rfs4_file_t *fp = (rfs4_file_t *)u_entry;
4066 struct exportinfo *exi = (struct exportinfo *)e;
4067 nfs_fh4_fmt_t fhfmt4, *exi_fhp, *finfo_fhp;
4068 fhandle_t *efhp;
4069
4070 efhp = (fhandle_t *)&exi->exi_fh;
4071 exi_fhp = (nfs_fh4_fmt_t *)&fhfmt4;
4072
4073 FH_TO_FMT4(efhp, exi_fhp);
4074
4075 finfo_fhp = (nfs_fh4_fmt_t *)fp->rf_filehandle.nfs_fh4_val;
4076
4077 if (EQFSID(&finfo_fhp->fh4_fsid, &exi_fhp->fh4_fsid) &&
4078 bcmp(&finfo_fhp->fh4_xdata, &exi_fhp->fh4_xdata,
4079 exi_fhp->fh4_xlen) == 0) {
4080 if (fp->rf_vp) {
4081 vnode_t *vp = fp->rf_vp;
4082
4083 /*
4084 * don't leak monitors and remove the reference
4085 * put on the vnode when the delegation was granted.
4086 */
4087 if (fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_READ) {
4088 (void) fem_uninstall(vp, deleg_rdops,
4089 (void *)fp);
4090 vn_open_downgrade(vp, FREAD);
4091 } else if (fp->rf_dinfo.rd_dtype ==
4092 OPEN_DELEGATE_WRITE) {
4093 (void) fem_uninstall(vp, deleg_wrops,
4094 (void *)fp);
4095 vn_open_downgrade(vp, FREAD|FWRITE);
4096 }
4097 mutex_enter(&vp->v_vsd_lock);
4098 (void) vsd_set(vp, nfs4_srv_vkey, NULL);
4099 mutex_exit(&vp->v_vsd_lock);
4100 VN_RELE(vp);
4101 fp->rf_vp = NULL;
4102 }
4103 rfs4_dbe_invalidate(fp->rf_dbe);
4104 }
4105 }
4106
4107 /*
4108 * Given a directory that is being unexported, cleanup/release all
4109 * state in the server that refers to objects residing underneath this
4110 * particular export. The ordering of the release is important.
4111 * Lock_owner, then state and then file.
4112 *
4113 * NFS zones note: nfs_export.c:unexport() calls this from a
4114 * thread in the global zone for NGZ data structures, so we
4115 * CANNOT use zone_getspecific anywhere in this code path.
4116 */
4117 void
4118 rfs4_clean_state_exi(nfs_export_t *ne, struct exportinfo *exi)
4119 {
4120 nfs_globals_t *ng;
4121 nfs4_srv_t *nsrv4;
4122
4123 ng = ne->ne_globals;
4124 ASSERT(ng->nfs_zoneid == exi->exi_zoneid);
4125 nsrv4 = ng->nfs4_srv;
4126
4127 mutex_enter(&nsrv4->state_lock);
4128
4129 if (nsrv4->nfs4_server_state == NULL) {
4130 mutex_exit(&nsrv4->state_lock);
4131 return;
4132 }
4133
4134 rfs4_dbe_walk(nsrv4->rfs4_lo_state_tab,
4135 rfs4_lo_state_walk_callout, exi);
4136 rfs4_dbe_walk(nsrv4->rfs4_state_tab, rfs4_state_walk_callout, exi);
4137 rfs4_dbe_walk(nsrv4->rfs4_deleg_state_tab,
4138 rfs4_deleg_state_walk_callout, exi);
4139 rfs4_dbe_walk(nsrv4->rfs4_file_tab, rfs4_file_walk_callout, exi);
4140
4141 mutex_exit(&nsrv4->state_lock);
4142 }