1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
24 */
25
26 /*
27 * Copyright 2018 Nexenta Systems, Inc.
28 */
29
30 #include <sys/systm.h>
31 #include <sys/kmem.h>
32 #include <sys/cmn_err.h>
33 #include <sys/atomic.h>
34 #include <sys/clconf.h>
35 #include <sys/cladm.h>
36 #include <sys/flock.h>
37 #include <nfs/export.h>
38 #include <nfs/nfs.h>
39 #include <nfs/nfs4.h>
40 #include <nfs/nfssys.h>
41 #include <nfs/lm.h>
42 #include <sys/pathname.h>
43 #include <sys/sdt.h>
44 #include <sys/nvpair.h>
45
46 extern u_longlong_t nfs4_srv_caller_id;
47
48 extern uint_t nfs4_srv_vkey;
49
50 stateid4 special0 = {
51 0,
52 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }
53 };
54
55 stateid4 special1 = {
56 0xffffffff,
57 {
58 (char)0xff, (char)0xff, (char)0xff, (char)0xff,
59 (char)0xff, (char)0xff, (char)0xff, (char)0xff,
60 (char)0xff, (char)0xff, (char)0xff, (char)0xff
61 }
62 };
63
64
65 #define ISSPECIAL(id) (stateid4_cmp(id, &special0) || \
66 stateid4_cmp(id, &special1))
67
68 /* For embedding the cluster nodeid into our clientid */
69 #define CLUSTER_NODEID_SHIFT 24
70 #define CLUSTER_MAX_NODEID 255
71
72 #ifdef DEBUG
73 int rfs4_debug;
74 #endif
75
76 static uint32_t rfs4_database_debug = 0x00;
77
78 /* CSTYLED */
79 static void rfs4_ss_clid_write(nfs4_srv_t *nsrv4, rfs4_client_t *cp, char *leaf);
80 static void rfs4_ss_clid_write_one(rfs4_client_t *cp, char *dir, char *leaf);
81 static void rfs4_dss_clear_oldstate(rfs4_servinst_t *sip);
82 static void rfs4_ss_chkclid_sip(rfs4_client_t *cp, rfs4_servinst_t *sip);
83
84 /*
85 * Couple of simple init/destroy functions for a general waiter
86 */
87 void
88 rfs4_sw_init(rfs4_state_wait_t *swp)
89 {
90 mutex_init(swp->sw_cv_lock, NULL, MUTEX_DEFAULT, NULL);
91 cv_init(swp->sw_cv, NULL, CV_DEFAULT, NULL);
92 swp->sw_active = FALSE;
93 swp->sw_wait_count = 0;
94 }
95
96 void
97 rfs4_sw_destroy(rfs4_state_wait_t *swp)
98 {
99 mutex_destroy(swp->sw_cv_lock);
100 cv_destroy(swp->sw_cv);
101 }
102
103 void
104 rfs4_sw_enter(rfs4_state_wait_t *swp)
105 {
106 mutex_enter(swp->sw_cv_lock);
107 while (swp->sw_active) {
108 swp->sw_wait_count++;
109 cv_wait(swp->sw_cv, swp->sw_cv_lock);
110 swp->sw_wait_count--;
111 }
112 ASSERT(swp->sw_active == FALSE);
113 swp->sw_active = TRUE;
114 mutex_exit(swp->sw_cv_lock);
115 }
116
117 void
118 rfs4_sw_exit(rfs4_state_wait_t *swp)
119 {
120 mutex_enter(swp->sw_cv_lock);
121 ASSERT(swp->sw_active == TRUE);
122 swp->sw_active = FALSE;
123 if (swp->sw_wait_count != 0)
124 cv_broadcast(swp->sw_cv);
125 mutex_exit(swp->sw_cv_lock);
126 }
127
128 static void
129 deep_lock_copy(LOCK4res *dres, LOCK4res *sres)
130 {
131 lock_owner4 *slo = &sres->LOCK4res_u.denied.owner;
132 lock_owner4 *dlo = &dres->LOCK4res_u.denied.owner;
133
134 if (sres->status == NFS4ERR_DENIED) {
135 dlo->owner_val = kmem_alloc(slo->owner_len, KM_SLEEP);
136 bcopy(slo->owner_val, dlo->owner_val, slo->owner_len);
137 }
138 }
139
140 /*
141 * CPR callback id -- not related to v4 callbacks
142 */
143 static callb_id_t cpr_id = 0;
144
145 static void
146 deep_lock_free(LOCK4res *res)
147 {
148 lock_owner4 *lo = &res->LOCK4res_u.denied.owner;
149
150 if (res->status == NFS4ERR_DENIED)
151 kmem_free(lo->owner_val, lo->owner_len);
152 }
153
154 static void
155 deep_open_copy(OPEN4res *dres, OPEN4res *sres)
156 {
157 nfsace4 *sacep, *dacep;
158
159 if (sres->status != NFS4_OK) {
160 return;
161 }
162
163 dres->attrset = sres->attrset;
164
165 switch (sres->delegation.delegation_type) {
166 case OPEN_DELEGATE_NONE:
167 return;
168 case OPEN_DELEGATE_READ:
169 sacep = &sres->delegation.open_delegation4_u.read.permissions;
170 dacep = &dres->delegation.open_delegation4_u.read.permissions;
171 break;
172 case OPEN_DELEGATE_WRITE:
173 sacep = &sres->delegation.open_delegation4_u.write.permissions;
174 dacep = &dres->delegation.open_delegation4_u.write.permissions;
175 break;
176 }
177 dacep->who.utf8string_val =
178 kmem_alloc(sacep->who.utf8string_len, KM_SLEEP);
179 bcopy(sacep->who.utf8string_val, dacep->who.utf8string_val,
180 sacep->who.utf8string_len);
181 }
182
183 static void
184 deep_open_free(OPEN4res *res)
185 {
186 nfsace4 *acep;
187 if (res->status != NFS4_OK)
188 return;
189
190 switch (res->delegation.delegation_type) {
191 case OPEN_DELEGATE_NONE:
192 return;
193 case OPEN_DELEGATE_READ:
194 acep = &res->delegation.open_delegation4_u.read.permissions;
195 break;
196 case OPEN_DELEGATE_WRITE:
197 acep = &res->delegation.open_delegation4_u.write.permissions;
198 break;
199 }
200
201 if (acep->who.utf8string_val) {
202 kmem_free(acep->who.utf8string_val, acep->who.utf8string_len);
203 acep->who.utf8string_val = NULL;
204 }
205 }
206
207 void
208 rfs4_free_reply(nfs_resop4 *rp)
209 {
210 switch (rp->resop) {
211 case OP_LOCK:
212 deep_lock_free(&rp->nfs_resop4_u.oplock);
213 break;
214 case OP_OPEN:
215 deep_open_free(&rp->nfs_resop4_u.opopen);
216 default:
217 break;
218 }
219 }
220
221 void
222 rfs4_copy_reply(nfs_resop4 *dst, nfs_resop4 *src)
223 {
224 *dst = *src;
225
226 /* Handle responses that need deep copy */
227 switch (src->resop) {
228 case OP_LOCK:
229 deep_lock_copy(&dst->nfs_resop4_u.oplock,
230 &src->nfs_resop4_u.oplock);
231 break;
232 case OP_OPEN:
233 deep_open_copy(&dst->nfs_resop4_u.opopen,
234 &src->nfs_resop4_u.opopen);
235 break;
236 default:
237 break;
238 };
239 }
240
241 /*
242 * This is the implementation of the underlying state engine. The
243 * public interface to this engine is described by
244 * nfs4_state.h. Callers to the engine should hold no state engine
245 * locks when they call in to it. If the protocol needs to lock data
246 * structures it should do so after acquiring all references to them
247 * first and then follow the following lock order:
248 *
249 * client > openowner > state > lo_state > lockowner > file.
250 *
251 * Internally we only allow a thread to hold one hash bucket lock at a
252 * time and the lock is higher in the lock order (must be acquired
253 * first) than the data structure that is on that hash list.
254 *
255 * If a new reference was acquired by the caller, that reference needs
256 * to be released after releasing all acquired locks with the
257 * corresponding rfs4_*_rele routine.
258 */
259
260 /*
261 * This code is some what prototypical for now. Its purpose currently is to
262 * implement the interfaces sufficiently to finish the higher protocol
263 * elements. This will be replaced by a dynamically resizeable tables
264 * backed by kmem_cache allocator. However synchronization is handled
265 * correctly (I hope) and will not change by much. The mutexes for
266 * the hash buckets that can be used to create new instances of data
267 * structures might be good candidates to evolve into reader writer
268 * locks. If it has to do a creation, it would be holding the
269 * mutex across a kmem_alloc with KM_SLEEP specified.
270 */
271
272 #ifdef DEBUG
273 #define TABSIZE 17
274 #else
275 #define TABSIZE 2047
276 #endif
277
278 #define ADDRHASH(key) ((unsigned long)(key) >> 3)
279
280 #define MAXTABSZ 1024*1024
281
282 /* The values below are rfs4_lease_time units */
283
284 #ifdef DEBUG
285 #define CLIENT_CACHE_TIME 1
286 #define OPENOWNER_CACHE_TIME 1
287 #define STATE_CACHE_TIME 1
288 #define LO_STATE_CACHE_TIME 1
289 #define LOCKOWNER_CACHE_TIME 1
290 #define FILE_CACHE_TIME 3
291 #define DELEG_STATE_CACHE_TIME 1
292 #else
293 #define CLIENT_CACHE_TIME 10
294 #define OPENOWNER_CACHE_TIME 5
295 #define STATE_CACHE_TIME 1
296 #define LO_STATE_CACHE_TIME 1
297 #define LOCKOWNER_CACHE_TIME 3
298 #define FILE_CACHE_TIME 40
299 #define DELEG_STATE_CACHE_TIME 1
300 #endif
301
302 /*
303 * NFSv4 server state databases
304 *
305 * Initilized when the module is loaded and used by NFSv4 state tables.
306 * These kmem_cache databases are global, the tables that make use of these
307 * are per zone.
308 */
309 kmem_cache_t *rfs4_client_mem_cache;
310 kmem_cache_t *rfs4_clntIP_mem_cache;
311 kmem_cache_t *rfs4_openown_mem_cache;
312 kmem_cache_t *rfs4_openstID_mem_cache;
313 kmem_cache_t *rfs4_lockstID_mem_cache;
314 kmem_cache_t *rfs4_lockown_mem_cache;
315 kmem_cache_t *rfs4_file_mem_cache;
316 kmem_cache_t *rfs4_delegstID_mem_cache;
317
318 /*
319 * NFSv4 state table functions
320 */
321 static bool_t rfs4_client_create(rfs4_entry_t, void *);
322 static void rfs4_dss_remove_cpleaf(rfs4_client_t *);
323 static void rfs4_dss_remove_leaf(rfs4_servinst_t *, char *, char *);
324 static void rfs4_client_destroy(rfs4_entry_t);
325 static bool_t rfs4_client_expiry(rfs4_entry_t);
326 static uint32_t clientid_hash(void *);
327 static bool_t clientid_compare(rfs4_entry_t, void *);
328 static void *clientid_mkkey(rfs4_entry_t);
329 static uint32_t nfsclnt_hash(void *);
330 static bool_t nfsclnt_compare(rfs4_entry_t, void *);
331 static void *nfsclnt_mkkey(rfs4_entry_t);
332 static bool_t rfs4_clntip_expiry(rfs4_entry_t);
333 static void rfs4_clntip_destroy(rfs4_entry_t);
334 static bool_t rfs4_clntip_create(rfs4_entry_t, void *);
335 static uint32_t clntip_hash(void *);
336 static bool_t clntip_compare(rfs4_entry_t, void *);
337 static void *clntip_mkkey(rfs4_entry_t);
338 static bool_t rfs4_openowner_create(rfs4_entry_t, void *);
339 static void rfs4_openowner_destroy(rfs4_entry_t);
340 static bool_t rfs4_openowner_expiry(rfs4_entry_t);
341 static uint32_t openowner_hash(void *);
342 static bool_t openowner_compare(rfs4_entry_t, void *);
343 static void *openowner_mkkey(rfs4_entry_t);
344 static bool_t rfs4_state_create(rfs4_entry_t, void *);
345 static void rfs4_state_destroy(rfs4_entry_t);
346 static bool_t rfs4_state_expiry(rfs4_entry_t);
347 static uint32_t state_hash(void *);
348 static bool_t state_compare(rfs4_entry_t, void *);
349 static void *state_mkkey(rfs4_entry_t);
350 static uint32_t state_owner_file_hash(void *);
351 static bool_t state_owner_file_compare(rfs4_entry_t, void *);
352 static void *state_owner_file_mkkey(rfs4_entry_t);
353 static uint32_t state_file_hash(void *);
354 static bool_t state_file_compare(rfs4_entry_t, void *);
355 static void *state_file_mkkey(rfs4_entry_t);
356 static bool_t rfs4_lo_state_create(rfs4_entry_t, void *);
357 static void rfs4_lo_state_destroy(rfs4_entry_t);
358 static bool_t rfs4_lo_state_expiry(rfs4_entry_t);
359 static uint32_t lo_state_hash(void *);
360 static bool_t lo_state_compare(rfs4_entry_t, void *);
361 static void *lo_state_mkkey(rfs4_entry_t);
362 static uint32_t lo_state_lo_hash(void *);
363 static bool_t lo_state_lo_compare(rfs4_entry_t, void *);
364 static void *lo_state_lo_mkkey(rfs4_entry_t);
365 static bool_t rfs4_lockowner_create(rfs4_entry_t, void *);
366 static void rfs4_lockowner_destroy(rfs4_entry_t);
367 static bool_t rfs4_lockowner_expiry(rfs4_entry_t);
368 static uint32_t lockowner_hash(void *);
369 static bool_t lockowner_compare(rfs4_entry_t, void *);
370 static void *lockowner_mkkey(rfs4_entry_t);
371 static uint32_t pid_hash(void *);
372 static bool_t pid_compare(rfs4_entry_t, void *);
373 static void *pid_mkkey(rfs4_entry_t);
374 static bool_t rfs4_file_create(rfs4_entry_t, void *);
375 static void rfs4_file_destroy(rfs4_entry_t);
376 static uint32_t file_hash(void *);
377 static bool_t file_compare(rfs4_entry_t, void *);
378 static void *file_mkkey(rfs4_entry_t);
379 static bool_t rfs4_deleg_state_create(rfs4_entry_t, void *);
380 static void rfs4_deleg_state_destroy(rfs4_entry_t);
381 static bool_t rfs4_deleg_state_expiry(rfs4_entry_t);
382 static uint32_t deleg_hash(void *);
383 static bool_t deleg_compare(rfs4_entry_t, void *);
384 static void *deleg_mkkey(rfs4_entry_t);
385 static uint32_t deleg_state_hash(void *);
386 static bool_t deleg_state_compare(rfs4_entry_t, void *);
387 static void *deleg_state_mkkey(rfs4_entry_t);
388
389 static void rfs4_state_rele_nounlock(rfs4_state_t *);
390
391 static int rfs4_ss_enabled = 0;
392
393 extern void (*rfs4_client_clrst)(struct nfs4clrst_args *);
394
395 void
396 rfs4_ss_pnfree(rfs4_ss_pn_t *ss_pn)
397 {
398 kmem_free(ss_pn, sizeof (rfs4_ss_pn_t));
399 }
400
401 static rfs4_ss_pn_t *
402 rfs4_ss_pnalloc(char *dir, char *leaf)
403 {
404 rfs4_ss_pn_t *ss_pn;
405 int dir_len, leaf_len;
406
407 /*
408 * validate we have a resonable path
409 * (account for the '/' and trailing null)
410 */
411 if ((dir_len = strlen(dir)) > MAXPATHLEN ||
412 (leaf_len = strlen(leaf)) > MAXNAMELEN ||
413 (dir_len + leaf_len + 2) > MAXPATHLEN) {
414 return (NULL);
415 }
416
417 ss_pn = kmem_alloc(sizeof (rfs4_ss_pn_t), KM_SLEEP);
418
419 (void) snprintf(ss_pn->pn, MAXPATHLEN, "%s/%s", dir, leaf);
420 /* Handy pointer to just the leaf name */
421 ss_pn->leaf = ss_pn->pn + dir_len + 1;
422 return (ss_pn);
423 }
424
425
426 /*
427 * Move the "leaf" filename from "sdir" directory
428 * to the "ddir" directory. Return the pathname of
429 * the destination unless the rename fails in which
430 * case we need to return the source pathname.
431 */
432 static rfs4_ss_pn_t *
433 rfs4_ss_movestate(char *sdir, char *ddir, char *leaf)
434 {
435 rfs4_ss_pn_t *src, *dst;
436
437 if ((src = rfs4_ss_pnalloc(sdir, leaf)) == NULL)
438 return (NULL);
439
440 if ((dst = rfs4_ss_pnalloc(ddir, leaf)) == NULL) {
441 rfs4_ss_pnfree(src);
442 return (NULL);
443 }
444
445 /*
446 * If the rename fails we shall return the src
447 * pathname and free the dst. Otherwise we need
448 * to free the src and return the dst pathanme.
449 */
450 if (vn_rename(src->pn, dst->pn, UIO_SYSSPACE)) {
451 rfs4_ss_pnfree(dst);
452 return (src);
453 }
454 rfs4_ss_pnfree(src);
455 return (dst);
456 }
457
458
459 static rfs4_oldstate_t *
460 rfs4_ss_getstate(vnode_t *dvp, rfs4_ss_pn_t *ss_pn)
461 {
462 struct uio uio;
463 struct iovec iov[3];
464
465 rfs4_oldstate_t *cl_ss = NULL;
466 vnode_t *vp;
467 vattr_t va;
468 uint_t id_len;
469 int err, kill_file, file_vers;
470
471 if (ss_pn == NULL)
472 return (NULL);
473
474 /*
475 * open the state file.
476 */
477 if (vn_open(ss_pn->pn, UIO_SYSSPACE, FREAD, 0, &vp, 0, 0) != 0) {
478 return (NULL);
479 }
480
481 if (vp->v_type != VREG) {
482 (void) VOP_CLOSE(vp, FREAD, 1, (offset_t)0, CRED(), NULL);
483 VN_RELE(vp);
484 return (NULL);
485 }
486
487 err = VOP_ACCESS(vp, VREAD, 0, CRED(), NULL);
488 if (err) {
489 /*
490 * We don't have read access? better get the heck out.
491 */
492 (void) VOP_CLOSE(vp, FREAD, 1, (offset_t)0, CRED(), NULL);
493 VN_RELE(vp);
494 return (NULL);
495 }
496
497 (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL);
498 /*
499 * get the file size to do some basic validation
500 */
501 va.va_mask = AT_SIZE;
502 err = VOP_GETATTR(vp, &va, 0, CRED(), NULL);
503
504 kill_file = (va.va_size == 0 || va.va_size <
505 (NFS4_VERIFIER_SIZE + sizeof (uint_t)+1));
506
507 if (err || kill_file) {
508 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
509 (void) VOP_CLOSE(vp, FREAD, 1, (offset_t)0, CRED(), NULL);
510 VN_RELE(vp);
511 if (kill_file) {
512 (void) VOP_REMOVE(dvp, ss_pn->leaf, CRED(), NULL, 0);
513 }
514 return (NULL);
515 }
516
517 cl_ss = kmem_alloc(sizeof (rfs4_oldstate_t), KM_SLEEP);
518
519 /*
520 * build iovecs to read in the file_version, verifier and id_len
521 */
522 iov[0].iov_base = (caddr_t)&file_vers;
523 iov[0].iov_len = sizeof (int);
524 iov[1].iov_base = (caddr_t)&cl_ss->cl_id4.verifier;
525 iov[1].iov_len = NFS4_VERIFIER_SIZE;
526 iov[2].iov_base = (caddr_t)&id_len;
527 iov[2].iov_len = sizeof (uint_t);
528
529 uio.uio_iov = iov;
530 uio.uio_iovcnt = 3;
531 uio.uio_segflg = UIO_SYSSPACE;
532 uio.uio_loffset = 0;
533 uio.uio_resid = sizeof (int) + NFS4_VERIFIER_SIZE + sizeof (uint_t);
534
535 if (err = VOP_READ(vp, &uio, FREAD, CRED(), NULL)) {
536 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
537 (void) VOP_CLOSE(vp, FREAD, 1, (offset_t)0, CRED(), NULL);
538 VN_RELE(vp);
539 kmem_free(cl_ss, sizeof (rfs4_oldstate_t));
540 return (NULL);
541 }
542
543 /*
544 * if the file_version doesn't match or if the
545 * id_len is zero or the combination of the verifier,
546 * id_len and id_val is bigger than the file we have
547 * a problem. If so ditch the file.
548 */
549 kill_file = (file_vers != NFS4_SS_VERSION || id_len == 0 ||
550 (id_len + NFS4_VERIFIER_SIZE + sizeof (uint_t)) > va.va_size);
551
552 if (err || kill_file) {
553 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
554 (void) VOP_CLOSE(vp, FREAD, 1, (offset_t)0, CRED(), NULL);
555 VN_RELE(vp);
556 kmem_free(cl_ss, sizeof (rfs4_oldstate_t));
557 if (kill_file) {
558 (void) VOP_REMOVE(dvp, ss_pn->leaf, CRED(), NULL, 0);
559 }
560 return (NULL);
561 }
562
563 /*
564 * now get the client id value
565 */
566 cl_ss->cl_id4.id_val = kmem_alloc(id_len, KM_SLEEP);
567 iov[0].iov_base = cl_ss->cl_id4.id_val;
568 iov[0].iov_len = id_len;
569
570 uio.uio_iov = iov;
571 uio.uio_iovcnt = 1;
572 uio.uio_segflg = UIO_SYSSPACE;
573 uio.uio_resid = cl_ss->cl_id4.id_len = id_len;
574
575 if (err = VOP_READ(vp, &uio, FREAD, CRED(), NULL)) {
576 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
577 (void) VOP_CLOSE(vp, FREAD, 1, (offset_t)0, CRED(), NULL);
578 VN_RELE(vp);
579 kmem_free(cl_ss->cl_id4.id_val, id_len);
580 kmem_free(cl_ss, sizeof (rfs4_oldstate_t));
581 return (NULL);
582 }
583
584 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
585 (void) VOP_CLOSE(vp, FREAD, 1, (offset_t)0, CRED(), NULL);
586 VN_RELE(vp);
587 return (cl_ss);
588 }
589
590 #ifdef nextdp
591 #undef nextdp
592 #endif
593 #define nextdp(dp) ((struct dirent64 *)((char *)(dp) + (dp)->d_reclen))
594
595 /*
596 * Add entries from statedir to supplied oldstate list.
597 * Optionally, move all entries from statedir -> destdir.
598 */
599 void
600 rfs4_ss_oldstate(rfs4_oldstate_t *oldstate, char *statedir, char *destdir)
601 {
602 rfs4_ss_pn_t *ss_pn;
603 rfs4_oldstate_t *cl_ss = NULL;
604 char *dirt = NULL;
605 int err, dir_eof = 0, size = 0;
606 vnode_t *dvp;
607 struct iovec iov;
608 struct uio uio;
609 struct dirent64 *dep;
610 offset_t dirchunk_offset = 0;
611
612 /*
613 * open the state directory
614 */
615 if (vn_open(statedir, UIO_SYSSPACE, FREAD, 0, &dvp, 0, 0))
616 return;
617
618 if (dvp->v_type != VDIR || VOP_ACCESS(dvp, VREAD, 0, CRED(), NULL))
619 goto out;
620
621 dirt = kmem_alloc(RFS4_SS_DIRSIZE, KM_SLEEP);
622
623 /*
624 * Get and process the directory entries
625 */
626 while (!dir_eof) {
627 (void) VOP_RWLOCK(dvp, V_WRITELOCK_FALSE, NULL);
628 iov.iov_base = dirt;
629 iov.iov_len = RFS4_SS_DIRSIZE;
630 uio.uio_iov = &iov;
631 uio.uio_iovcnt = 1;
632 uio.uio_segflg = UIO_SYSSPACE;
633 uio.uio_loffset = dirchunk_offset;
634 uio.uio_resid = RFS4_SS_DIRSIZE;
635
636 err = VOP_READDIR(dvp, &uio, CRED(), &dir_eof, NULL, 0);
637 VOP_RWUNLOCK(dvp, V_WRITELOCK_FALSE, NULL);
638 if (err)
639 goto out;
640
641 size = RFS4_SS_DIRSIZE - uio.uio_resid;
642
643 /*
644 * Process all the directory entries in this
645 * readdir chunk
646 */
647 for (dep = (struct dirent64 *)dirt; size > 0;
648 dep = nextdp(dep)) {
649
650 size -= dep->d_reclen;
651 dirchunk_offset = dep->d_off;
652
653 /*
654 * Skip '.' and '..'
655 */
656 if (NFS_IS_DOTNAME(dep->d_name))
657 continue;
658
659 ss_pn = rfs4_ss_pnalloc(statedir, dep->d_name);
660 if (ss_pn == NULL)
661 continue;
662
663 if (cl_ss = rfs4_ss_getstate(dvp, ss_pn)) {
664 if (destdir != NULL) {
665 rfs4_ss_pnfree(ss_pn);
666 cl_ss->ss_pn = rfs4_ss_movestate(
667 statedir, destdir, dep->d_name);
668 } else {
669 cl_ss->ss_pn = ss_pn;
670 }
671 insque(cl_ss, oldstate);
672 } else {
673 rfs4_ss_pnfree(ss_pn);
674 }
675 }
676 }
677
678 out:
679 (void) VOP_CLOSE(dvp, FREAD, 1, (offset_t)0, CRED(), NULL);
680 VN_RELE(dvp);
681 if (dirt)
682 kmem_free((caddr_t)dirt, RFS4_SS_DIRSIZE);
683 }
684
685 static void
686 rfs4_ss_init(nfs4_srv_t *nsrv4)
687 {
688 int npaths = 1;
689 char *default_dss_path = NFS4_DSS_VAR_DIR;
690
691 /* read the default stable storage state */
692 rfs4_dss_readstate(nsrv4, npaths, &default_dss_path);
693
694 rfs4_ss_enabled = 1;
695 }
696
697 static void
698 rfs4_ss_fini(nfs4_srv_t *nsrv4)
699 {
700 rfs4_servinst_t *sip;
701
702 mutex_enter(&nsrv4->servinst_lock);
703 sip = nsrv4->nfs4_cur_servinst;
704 while (sip != NULL) {
705 rfs4_dss_clear_oldstate(sip);
706 sip = sip->next;
707 }
708 mutex_exit(&nsrv4->servinst_lock);
709 }
710
711 /*
712 * Remove all oldstate files referenced by this servinst.
713 */
714 static void
715 rfs4_dss_clear_oldstate(rfs4_servinst_t *sip)
716 {
717 rfs4_oldstate_t *os_head, *osp;
718
719 rw_enter(&sip->oldstate_lock, RW_WRITER);
720 os_head = sip->oldstate;
721
722 if (os_head == NULL) {
723 rw_exit(&sip->oldstate_lock);
724 return;
725 }
726
727 /* skip dummy entry */
728 osp = os_head->next;
729 while (osp != os_head) {
730 char *leaf = osp->ss_pn->leaf;
731 rfs4_oldstate_t *os_next;
732
733 rfs4_dss_remove_leaf(sip, NFS4_DSS_OLDSTATE_LEAF, leaf);
734
735 if (osp->cl_id4.id_val)
736 kmem_free(osp->cl_id4.id_val, osp->cl_id4.id_len);
737 rfs4_ss_pnfree(osp->ss_pn);
738
739 os_next = osp->next;
740 remque(osp);
741 kmem_free(osp, sizeof (rfs4_oldstate_t));
742 osp = os_next;
743 }
744
745 rw_exit(&sip->oldstate_lock);
746 }
747
748 /*
749 * Form the state and oldstate paths, and read in the stable storage files.
750 */
751 void
752 rfs4_dss_readstate(nfs4_srv_t *nsrv4, int npaths, char **paths)
753 {
754 int i;
755 char *state, *oldstate;
756
757 state = kmem_alloc(MAXPATHLEN, KM_SLEEP);
758 oldstate = kmem_alloc(MAXPATHLEN, KM_SLEEP);
759
760 for (i = 0; i < npaths; i++) {
761 char *path = paths[i];
762
763 (void) sprintf(state, "%s/%s", path, NFS4_DSS_STATE_LEAF);
764 (void) sprintf(oldstate, "%s/%s", path, NFS4_DSS_OLDSTATE_LEAF);
765
766 /*
767 * Populate the current server instance's oldstate list.
768 *
769 * 1. Read stable storage data from old state directory,
770 * leaving its contents alone.
771 *
772 * 2. Read stable storage data from state directory,
773 * and move the latter's contents to old state
774 * directory.
775 */
776 /* CSTYLED */
777 rfs4_ss_oldstate(nsrv4->nfs4_cur_servinst->oldstate, oldstate, NULL);
778 /* CSTYLED */
779 rfs4_ss_oldstate(nsrv4->nfs4_cur_servinst->oldstate, state, oldstate);
780 }
781
782 kmem_free(state, MAXPATHLEN);
783 kmem_free(oldstate, MAXPATHLEN);
784 }
785
786
787 /*
788 * Check if we are still in grace and if the client can be
789 * granted permission to perform reclaims.
790 */
791 void
792 rfs4_ss_chkclid(nfs4_srv_t *nsrv4, rfs4_client_t *cp)
793 {
794 rfs4_servinst_t *sip;
795
796 /*
797 * It should be sufficient to check the oldstate data for just
798 * this client's instance. However, since our per-instance
799 * client grouping is solely temporal, HA-NFSv4 RG failover
800 * might result in clients of the same RG being partitioned into
801 * separate instances.
802 *
803 * Until the client grouping is improved, we must check the
804 * oldstate data for all instances with an active grace period.
805 *
806 * This also serves as the mechanism to remove stale oldstate data.
807 * The first time we check an instance after its grace period has
808 * expired, the oldstate data should be cleared.
809 *
810 * Start at the current instance, and walk the list backwards
811 * to the first.
812 */
813 mutex_enter(&nsrv4->servinst_lock);
814 for (sip = nsrv4->nfs4_cur_servinst; sip != NULL; sip = sip->prev) {
815 rfs4_ss_chkclid_sip(cp, sip);
816
817 /* if the above check found this client, we're done */
818 if (cp->rc_can_reclaim)
819 break;
820 }
821 mutex_exit(&nsrv4->servinst_lock);
822 }
823
824 static void
825 rfs4_ss_chkclid_sip(rfs4_client_t *cp, rfs4_servinst_t *sip)
826 {
827 rfs4_oldstate_t *osp, *os_head;
828
829 /* short circuit everything if this server instance has no oldstate */
830 rw_enter(&sip->oldstate_lock, RW_READER);
831 os_head = sip->oldstate;
832 rw_exit(&sip->oldstate_lock);
833 if (os_head == NULL)
834 return;
835
836 /*
837 * If this server instance is no longer in a grace period then
838 * the client won't be able to reclaim. No further need for this
839 * instance's oldstate data, so it can be cleared.
840 */
841 if (!rfs4_servinst_in_grace(sip))
842 return;
843
844 /* this instance is still in grace; search for the clientid */
845
846 rw_enter(&sip->oldstate_lock, RW_READER);
847
848 os_head = sip->oldstate;
849 /* skip dummy entry */
850 osp = os_head->next;
851 while (osp != os_head) {
852 if (osp->cl_id4.id_len == cp->rc_nfs_client.id_len) {
853 if (bcmp(osp->cl_id4.id_val, cp->rc_nfs_client.id_val,
854 osp->cl_id4.id_len) == 0) {
855 cp->rc_can_reclaim = 1;
856 break;
857 }
858 }
859 osp = osp->next;
860 }
861
862 rw_exit(&sip->oldstate_lock);
863 }
864
865 /*
866 * Place client information into stable storage: 1/3.
867 * First, generate the leaf filename, from the client's IP address and
868 * the server-generated short-hand clientid.
869 */
870 void
871 rfs4_ss_clid(nfs4_srv_t *nsrv4, rfs4_client_t *cp)
872 {
873 const char *kinet_ntop6(uchar_t *, char *, size_t);
874 char leaf[MAXNAMELEN], buf[INET6_ADDRSTRLEN];
875 struct sockaddr *ca;
876 uchar_t *b;
877
878 if (rfs4_ss_enabled == 0) {
879 return;
880 }
881
882 buf[0] = 0;
883
884 ca = (struct sockaddr *)&cp->rc_addr;
885
886 /*
887 * Convert the caller's IP address to a dotted string
888 */
889 if (ca->sa_family == AF_INET) {
890 b = (uchar_t *)&((struct sockaddr_in *)ca)->sin_addr;
891 (void) sprintf(buf, "%03d.%03d.%03d.%03d", b[0] & 0xFF,
892 b[1] & 0xFF, b[2] & 0xFF, b[3] & 0xFF);
893 } else if (ca->sa_family == AF_INET6) {
894 struct sockaddr_in6 *sin6;
895
896 sin6 = (struct sockaddr_in6 *)ca;
897 (void) kinet_ntop6((uchar_t *)&sin6->sin6_addr,
898 buf, INET6_ADDRSTRLEN);
899 }
900
901 (void) snprintf(leaf, MAXNAMELEN, "%s-%llx", buf,
902 (longlong_t)cp->rc_clientid);
903 rfs4_ss_clid_write(nsrv4, cp, leaf);
904 }
905
906 /*
907 * Place client information into stable storage: 2/3.
908 * DSS: distributed stable storage: the file may need to be written to
909 * multiple directories.
910 */
911 static void
912 rfs4_ss_clid_write(nfs4_srv_t *nsrv4, rfs4_client_t *cp, char *leaf)
913 {
914 rfs4_servinst_t *sip;
915
916 /*
917 * It should be sufficient to write the leaf file to (all) DSS paths
918 * associated with just this client's instance. However, since our
919 * per-instance client grouping is solely temporal, HA-NFSv4 RG
920 * failover might result in us losing DSS data.
921 *
922 * Until the client grouping is improved, we must write the DSS data
923 * to all instances' paths. Start at the current instance, and
924 * walk the list backwards to the first.
925 */
926 mutex_enter(&nsrv4->servinst_lock);
927 for (sip = nsrv4->nfs4_cur_servinst; sip != NULL; sip = sip->prev) {
928 int i, npaths = sip->dss_npaths;
929
930 /* write the leaf file to all DSS paths */
931 for (i = 0; i < npaths; i++) {
932 rfs4_dss_path_t *dss_path = sip->dss_paths[i];
933
934 /* HA-NFSv4 path might have been failed-away from us */
935 if (dss_path == NULL)
936 continue;
937
938 rfs4_ss_clid_write_one(cp, dss_path->path, leaf);
939 }
940 }
941 mutex_exit(&nsrv4->servinst_lock);
942 }
943
944 /*
945 * Place client information into stable storage: 3/3.
946 * Write the stable storage data to the requested file.
947 */
948 static void
949 rfs4_ss_clid_write_one(rfs4_client_t *cp, char *dss_path, char *leaf)
950 {
951 int ioflag;
952 int file_vers = NFS4_SS_VERSION;
953 size_t dirlen;
954 struct uio uio;
955 struct iovec iov[4];
956 char *dir;
957 rfs4_ss_pn_t *ss_pn;
958 vnode_t *vp;
959 nfs_client_id4 *cl_id4 = &(cp->rc_nfs_client);
960
961 /* allow 2 extra bytes for '/' & NUL */
962 dirlen = strlen(dss_path) + strlen(NFS4_DSS_STATE_LEAF) + 2;
963 dir = kmem_alloc(dirlen, KM_SLEEP);
964 (void) sprintf(dir, "%s/%s", dss_path, NFS4_DSS_STATE_LEAF);
965
966 ss_pn = rfs4_ss_pnalloc(dir, leaf);
967 /* rfs4_ss_pnalloc takes its own copy */
968 kmem_free(dir, dirlen);
969 if (ss_pn == NULL)
970 return;
971
972 if (vn_open(ss_pn->pn, UIO_SYSSPACE, FCREAT|FWRITE, 0600, &vp,
973 CRCREAT, 0)) {
974 rfs4_ss_pnfree(ss_pn);
975 return;
976 }
977
978 /*
979 * We need to record leaf - i.e. the filename - so that we know
980 * what to remove, in the future. However, the dir part of cp->ss_pn
981 * should never be referenced directly, since it's potentially only
982 * one of several paths with this leaf in it.
983 */
984 if (cp->rc_ss_pn != NULL) {
985 if (strcmp(cp->rc_ss_pn->leaf, leaf) == 0) {
986 /* we've already recorded *this* leaf */
987 rfs4_ss_pnfree(ss_pn);
988 } else {
989 /* replace with this leaf */
990 rfs4_ss_pnfree(cp->rc_ss_pn);
991 cp->rc_ss_pn = ss_pn;
992 }
993 } else {
994 cp->rc_ss_pn = ss_pn;
995 }
996
997 /*
998 * Build a scatter list that points to the nfs_client_id4
999 */
1000 iov[0].iov_base = (caddr_t)&file_vers;
1001 iov[0].iov_len = sizeof (int);
1002 iov[1].iov_base = (caddr_t)&(cl_id4->verifier);
1003 iov[1].iov_len = NFS4_VERIFIER_SIZE;
1004 iov[2].iov_base = (caddr_t)&(cl_id4->id_len);
1005 iov[2].iov_len = sizeof (uint_t);
1006 iov[3].iov_base = (caddr_t)cl_id4->id_val;
1007 iov[3].iov_len = cl_id4->id_len;
1008
1009 uio.uio_iov = iov;
1010 uio.uio_iovcnt = 4;
1011 uio.uio_loffset = 0;
1012 uio.uio_segflg = UIO_SYSSPACE;
1013 uio.uio_llimit = (rlim64_t)MAXOFFSET_T;
1014 uio.uio_resid = cl_id4->id_len + sizeof (int) +
1015 NFS4_VERIFIER_SIZE + sizeof (uint_t);
1016
1017 ioflag = uio.uio_fmode = (FWRITE|FSYNC);
1018 uio.uio_extflg = UIO_COPY_DEFAULT;
1019
1020 (void) VOP_RWLOCK(vp, V_WRITELOCK_TRUE, NULL);
1021 /* write the full client id to the file. */
1022 (void) VOP_WRITE(vp, &uio, ioflag, CRED(), NULL);
1023 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, NULL);
1024
1025 (void) VOP_CLOSE(vp, FWRITE, 1, (offset_t)0, CRED(), NULL);
1026 VN_RELE(vp);
1027 }
1028
1029 /*
1030 * DSS: distributed stable storage.
1031 * Unpack the list of paths passed by nfsd.
1032 * Use nvlist_alloc(9F) to manage the data.
1033 * The caller is responsible for allocating and freeing the buffer.
1034 */
1035 int
1036 rfs4_dss_setpaths(char *buf, size_t buflen)
1037 {
1038 int error;
1039
1040 /*
1041 * If this is a "warm start", i.e. we previously had DSS paths,
1042 * preserve the old paths.
1043 */
1044 if (rfs4_dss_paths != NULL) {
1045 /*
1046 * Before we lose the ptr, destroy the nvlist and pathnames
1047 * array from the warm start before this one.
1048 */
1049 nvlist_free(rfs4_dss_oldpaths);
1050 rfs4_dss_oldpaths = rfs4_dss_paths;
1051 }
1052
1053 /* unpack the buffer into a searchable nvlist */
1054 error = nvlist_unpack(buf, buflen, &rfs4_dss_paths, KM_SLEEP);
1055 if (error)
1056 return (error);
1057
1058 /*
1059 * Search the nvlist for the pathnames nvpair (which is the only nvpair
1060 * in the list, and record its location.
1061 */
1062 error = nvlist_lookup_string_array(rfs4_dss_paths, NFS4_DSS_NVPAIR_NAME,
1063 &rfs4_dss_newpaths, &rfs4_dss_numnewpaths);
1064 return (error);
1065 }
1066
1067 /*
1068 * Ultimately the nfssys() call NFS4_CLR_STATE endsup here
1069 * to find and mark the client for forced expire.
1070 */
1071 static void
1072 rfs4_client_scrub(rfs4_entry_t ent, void *arg)
1073 {
1074 rfs4_client_t *cp = (rfs4_client_t *)ent;
1075 struct nfs4clrst_args *clr = arg;
1076 struct sockaddr_in6 *ent_sin6;
1077 struct in6_addr clr_in6;
1078 struct sockaddr_in *ent_sin;
1079 struct in_addr clr_in;
1080
1081 if (clr->addr_type != cp->rc_addr.ss_family) {
1082 return;
1083 }
1084
1085 switch (clr->addr_type) {
1086
1087 case AF_INET6:
1088 /* copyin the address from user space */
1089 if (copyin(clr->ap, &clr_in6, sizeof (clr_in6))) {
1090 break;
1091 }
1092
1093 ent_sin6 = (struct sockaddr_in6 *)&cp->rc_addr;
1094
1095 /*
1096 * now compare, and if equivalent mark entry
1097 * for forced expiration
1098 */
1099 if (IN6_ARE_ADDR_EQUAL(&ent_sin6->sin6_addr, &clr_in6)) {
1100 cp->rc_forced_expire = 1;
1101 }
1102 break;
1103
1104 case AF_INET:
1105 /* copyin the address from user space */
1106 if (copyin(clr->ap, &clr_in, sizeof (clr_in))) {
1107 break;
1108 }
1109
1110 ent_sin = (struct sockaddr_in *)&cp->rc_addr;
1111
1112 /*
1113 * now compare, and if equivalent mark entry
1114 * for forced expiration
1115 */
1116 if (ent_sin->sin_addr.s_addr == clr_in.s_addr) {
1117 cp->rc_forced_expire = 1;
1118 }
1119 break;
1120
1121 default:
1122 /* force this assert to fail */
1123 ASSERT(clr->addr_type != clr->addr_type);
1124 }
1125 }
1126
1127 /*
1128 * This is called from nfssys() in order to clear server state
1129 * for the specified client IP Address.
1130 */
1131 void
1132 rfs4_clear_client_state(struct nfs4clrst_args *clr)
1133 {
1134 nfs4_srv_t *nsrv4;
1135 nsrv4 = zone_getspecific(rfs4_zone_key, curzone);
1136 (void) rfs4_dbe_walk(nsrv4->rfs4_client_tab, rfs4_client_scrub, clr);
1137 }
1138
1139 /*
1140 * Used to initialize the NFSv4 server's state or database. All of
1141 * the tables are created and timers are set.
1142 */
1143 void
1144 rfs4_state_g_init()
1145 {
1146 extern boolean_t rfs4_cpr_callb(void *, int);
1147 /*
1148 * Add a CPR callback so that we can update client
1149 * access times to extend the lease after a suspend
1150 * and resume (using the same class as rpcmod/connmgr)
1151 */
1152 cpr_id = callb_add(rfs4_cpr_callb, 0, CB_CL_CPR_RPC, "rfs4");
1153
1154 /*
1155 * NFSv4 server state databases
1156 *
1157 * Initilized when the module is loaded and used by NFSv4 state tables.
1158 * These kmem_cache free pools are used globally, the NFSv4 state
1159 * tables which make use of these kmem_cache free pools are per zone.
1160 *
1161 * initialize the global kmem_cache free pools which will be used by
1162 * the NFSv4 state tables.
1163 */
1164 /* CSTYLED */
1165 rfs4_client_mem_cache = nfs4_init_mem_cache("Client_entry_cache", 2, sizeof (rfs4_client_t), 0);
1166 /* CSTYLED */
1167 rfs4_clntIP_mem_cache = nfs4_init_mem_cache("ClntIP_entry_cache", 1, sizeof (rfs4_clntip_t), 1);
1168 /* CSTYLED */
1169 rfs4_openown_mem_cache = nfs4_init_mem_cache("OpenOwner_entry_cache", 1, sizeof (rfs4_openowner_t), 2);
1170 /* CSTYLED */
1171 rfs4_openstID_mem_cache = nfs4_init_mem_cache("OpenStateID_entry_cache", 3, sizeof (rfs4_state_t), 3);
1172 /* CSTYLED */
1173 rfs4_lockstID_mem_cache = nfs4_init_mem_cache("LockStateID_entry_cache", 3, sizeof (rfs4_lo_state_t), 4);
1174 /* CSTYLED */
1175 rfs4_lockown_mem_cache = nfs4_init_mem_cache("Lockowner_entry_cache", 2, sizeof (rfs4_lockowner_t), 5);
1176 /* CSTYLED */
1177 rfs4_file_mem_cache = nfs4_init_mem_cache("File_entry_cache", 1, sizeof (rfs4_file_t), 6);
1178 /* CSTYLED */
1179 rfs4_delegstID_mem_cache = nfs4_init_mem_cache("DelegStateID_entry_cache", 2, sizeof (rfs4_deleg_state_t), 7);
1180
1181 rfs4_client_clrst = rfs4_clear_client_state;
1182 }
1183
1184
1185 /*
1186 * Used at server shutdown to cleanup all of the NFSv4 server's structures
1187 * and other state.
1188 */
1189 void
1190 rfs4_state_g_fini()
1191 {
1192 int i;
1193 /*
1194 * Cleanup the CPR callback.
1195 */
1196 if (cpr_id)
1197 (void) callb_delete(cpr_id);
1198
1199 rfs4_client_clrst = NULL;
1200
1201 /* free the NFSv4 state databases */
1202 for (i = 0; i < RFS4_DB_MEM_CACHE_NUM; i++) {
1203 kmem_cache_destroy(rfs4_db_mem_cache_table[i].r_db_mem_cache);
1204 rfs4_db_mem_cache_table[i].r_db_mem_cache = NULL;
1205 }
1206
1207 rfs4_client_mem_cache = NULL;
1208 rfs4_clntIP_mem_cache = NULL;
1209 rfs4_openown_mem_cache = NULL;
1210 rfs4_openstID_mem_cache = NULL;
1211 rfs4_lockstID_mem_cache = NULL;
1212 rfs4_lockown_mem_cache = NULL;
1213 rfs4_file_mem_cache = NULL;
1214 rfs4_delegstID_mem_cache = NULL;
1215
1216 /* DSS: distributed stable storage */
1217 nvlist_free(rfs4_dss_oldpaths);
1218 nvlist_free(rfs4_dss_paths);
1219 rfs4_dss_paths = rfs4_dss_oldpaths = NULL;
1220 }
1221
1222 /*
1223 * Used to initialize the per zone NFSv4 server's state
1224 */
1225 void
1226 rfs4_state_zone_init(nfs4_srv_t *nsrv4)
1227 {
1228 time_t start_time;
1229 int start_grace;
1230 char *dss_path = NFS4_DSS_VAR_DIR;
1231
1232 /* DSS: distributed stable storage: initialise served paths list */
1233 nsrv4->dss_pathlist = NULL;
1234
1235 /*
1236 * Set the boot time. If the server
1237 * has been restarted quickly and has had the opportunity to
1238 * service clients, then the start_time needs to be bumped
1239 * regardless. A small window but it exists...
1240 */
1241 start_time = gethrestime_sec();
1242 if (nsrv4->rfs4_start_time < start_time)
1243 nsrv4->rfs4_start_time = start_time;
1244 else
1245 nsrv4->rfs4_start_time++;
1246
1247 /*
1248 * Create the first server instance, or a new one if the server has
1249 * been restarted; see above comments on rfs4_start_time. Don't
1250 * start its grace period; that will be done later, to maximise the
1251 * clients' recovery window.
1252 */
1253 start_grace = 0;
1254 rfs4_servinst_create(nsrv4, start_grace, 1, &dss_path);
1255
1256 /* reset the "first NFSv4 request" status */
1257 nsrv4->seen_first_compound = 0;
1258
1259 mutex_enter(&nsrv4->state_lock);
1260
1261 /*
1262 * If the server state database has already been initialized,
1263 * skip it
1264 */
1265 if (nsrv4->nfs4_server_state != NULL) {
1266 mutex_exit(&nsrv4->state_lock);
1267 return;
1268 }
1269
1270 rw_init(&nsrv4->rfs4_findclient_lock, NULL, RW_DEFAULT, NULL);
1271
1272 /* set the various cache timers for table creation */
1273 if (nsrv4->rfs4_client_cache_time == 0)
1274 nsrv4->rfs4_client_cache_time = CLIENT_CACHE_TIME;
1275 if (nsrv4->rfs4_openowner_cache_time == 0)
1276 nsrv4->rfs4_openowner_cache_time = OPENOWNER_CACHE_TIME;
1277 if (nsrv4->rfs4_state_cache_time == 0)
1278 nsrv4->rfs4_state_cache_time = STATE_CACHE_TIME;
1279 if (nsrv4->rfs4_lo_state_cache_time == 0)
1280 nsrv4->rfs4_lo_state_cache_time = LO_STATE_CACHE_TIME;
1281 if (nsrv4->rfs4_lockowner_cache_time == 0)
1282 nsrv4->rfs4_lockowner_cache_time = LOCKOWNER_CACHE_TIME;
1283 if (nsrv4->rfs4_file_cache_time == 0)
1284 nsrv4->rfs4_file_cache_time = FILE_CACHE_TIME;
1285 if (nsrv4->rfs4_deleg_state_cache_time == 0)
1286 nsrv4->rfs4_deleg_state_cache_time = DELEG_STATE_CACHE_TIME;
1287
1288 /* Create the overall database to hold all server state */
1289 nsrv4->nfs4_server_state = rfs4_database_create(rfs4_database_debug);
1290
1291 /* Now create the individual tables */
1292 nsrv4->rfs4_client_cache_time *= rfs4_lease_time;
1293 nsrv4->rfs4_client_tab = rfs4_table_create(nsrv4->nfs4_server_state,
1294 "Client",
1295 nsrv4->rfs4_client_cache_time,
1296 2,
1297 rfs4_client_create,
1298 rfs4_client_destroy,
1299 rfs4_client_expiry,
1300 sizeof (rfs4_client_t),
1301 TABSIZE,
1302 MAXTABSZ/8, 100);
1303 nsrv4->rfs4_nfsclnt_idx = rfs4_index_create(nsrv4->rfs4_client_tab,
1304 "nfs_client_id4", nfsclnt_hash,
1305 nfsclnt_compare, nfsclnt_mkkey,
1306 TRUE);
1307 nsrv4->rfs4_clientid_idx = rfs4_index_create(nsrv4->rfs4_client_tab,
1308 "client_id", clientid_hash,
1309 clientid_compare, clientid_mkkey,
1310 FALSE);
1311
1312 nsrv4->rfs4_clntip_cache_time = 86400 * 365; /* about a year */
1313 nsrv4->rfs4_clntip_tab = rfs4_table_create(nsrv4->nfs4_server_state,
1314 "ClntIP",
1315 nsrv4->rfs4_clntip_cache_time,
1316 1,
1317 rfs4_clntip_create,
1318 rfs4_clntip_destroy,
1319 rfs4_clntip_expiry,
1320 sizeof (rfs4_clntip_t),
1321 TABSIZE,
1322 MAXTABSZ, 100);
1323 nsrv4->rfs4_clntip_idx = rfs4_index_create(nsrv4->rfs4_clntip_tab,
1324 "client_ip", clntip_hash,
1325 clntip_compare, clntip_mkkey,
1326 TRUE);
1327
1328 nsrv4->rfs4_openowner_cache_time *= rfs4_lease_time;
1329 nsrv4->rfs4_openowner_tab = rfs4_table_create(nsrv4->nfs4_server_state,
1330 "OpenOwner",
1331 nsrv4->rfs4_openowner_cache_time,
1332 1,
1333 rfs4_openowner_create,
1334 rfs4_openowner_destroy,
1335 rfs4_openowner_expiry,
1336 sizeof (rfs4_openowner_t),
1337 TABSIZE,
1338 MAXTABSZ, 100);
1339 nsrv4->rfs4_openowner_idx = rfs4_index_create(nsrv4->rfs4_openowner_tab,
1340 "open_owner4", openowner_hash,
1341 openowner_compare,
1342 openowner_mkkey, TRUE);
1343
1344 nsrv4->rfs4_state_cache_time *= rfs4_lease_time;
1345 nsrv4->rfs4_state_tab = rfs4_table_create(nsrv4->nfs4_server_state,
1346 "OpenStateID",
1347 nsrv4->rfs4_state_cache_time,
1348 3,
1349 rfs4_state_create,
1350 rfs4_state_destroy,
1351 rfs4_state_expiry,
1352 sizeof (rfs4_state_t),
1353 TABSIZE,
1354 MAXTABSZ, 100);
1355
1356 /* CSTYLED */
1357 nsrv4->rfs4_state_owner_file_idx = rfs4_index_create(nsrv4->rfs4_state_tab,
1358 "Openowner-File",
1359 state_owner_file_hash,
1360 state_owner_file_compare,
1361 state_owner_file_mkkey, TRUE);
1362
1363 nsrv4->rfs4_state_idx = rfs4_index_create(nsrv4->rfs4_state_tab,
1364 "State-id", state_hash,
1365 state_compare, state_mkkey, FALSE);
1366
1367 nsrv4->rfs4_state_file_idx = rfs4_index_create(nsrv4->rfs4_state_tab,
1368 "File", state_file_hash,
1369 state_file_compare, state_file_mkkey,
1370 FALSE);
1371
1372 nsrv4->rfs4_lo_state_cache_time *= rfs4_lease_time;
1373 nsrv4->rfs4_lo_state_tab = rfs4_table_create(nsrv4->nfs4_server_state,
1374 "LockStateID",
1375 nsrv4->rfs4_lo_state_cache_time,
1376 2,
1377 rfs4_lo_state_create,
1378 rfs4_lo_state_destroy,
1379 rfs4_lo_state_expiry,
1380 sizeof (rfs4_lo_state_t),
1381 TABSIZE,
1382 MAXTABSZ, 100);
1383
1384 /* CSTYLED */
1385 nsrv4->rfs4_lo_state_owner_idx = rfs4_index_create(nsrv4->rfs4_lo_state_tab,
1386 "lockownerxstate",
1387 lo_state_lo_hash,
1388 lo_state_lo_compare,
1389 lo_state_lo_mkkey, TRUE);
1390
1391 nsrv4->rfs4_lo_state_idx = rfs4_index_create(nsrv4->rfs4_lo_state_tab,
1392 "State-id",
1393 lo_state_hash, lo_state_compare,
1394 lo_state_mkkey, FALSE);
1395
1396 nsrv4->rfs4_lockowner_cache_time *= rfs4_lease_time;
1397
1398 nsrv4->rfs4_lockowner_tab = rfs4_table_create(nsrv4->nfs4_server_state,
1399 "Lockowner",
1400 nsrv4->rfs4_lockowner_cache_time,
1401 2,
1402 rfs4_lockowner_create,
1403 rfs4_lockowner_destroy,
1404 rfs4_lockowner_expiry,
1405 sizeof (rfs4_lockowner_t),
1406 TABSIZE,
1407 MAXTABSZ, 100);
1408
1409 nsrv4->rfs4_lockowner_idx = rfs4_index_create(nsrv4->rfs4_lockowner_tab,
1410 "lock_owner4", lockowner_hash,
1411 lockowner_compare,
1412 lockowner_mkkey, TRUE);
1413
1414 /* CSTYLED */
1415 nsrv4->rfs4_lockowner_pid_idx = rfs4_index_create(nsrv4->rfs4_lockowner_tab,
1416 "pid", pid_hash,
1417 pid_compare, pid_mkkey,
1418 FALSE);
1419
1420 nsrv4->rfs4_file_cache_time *= rfs4_lease_time;
1421 nsrv4->rfs4_file_tab = rfs4_table_create(nsrv4->nfs4_server_state,
1422 "File",
1423 nsrv4->rfs4_file_cache_time,
1424 1,
1425 rfs4_file_create,
1426 rfs4_file_destroy,
1427 NULL,
1428 sizeof (rfs4_file_t),
1429 TABSIZE,
1430 MAXTABSZ, -1);
1431
1432 nsrv4->rfs4_file_idx = rfs4_index_create(nsrv4->rfs4_file_tab,
1433 "Filehandle", file_hash,
1434 file_compare, file_mkkey, TRUE);
1435
1436 nsrv4->rfs4_deleg_state_cache_time *= rfs4_lease_time;
1437 /* CSTYLED */
1438 nsrv4->rfs4_deleg_state_tab = rfs4_table_create(nsrv4->nfs4_server_state,
1439 "DelegStateID",
1440 nsrv4->rfs4_deleg_state_cache_time,
1441 2,
1442 rfs4_deleg_state_create,
1443 rfs4_deleg_state_destroy,
1444 rfs4_deleg_state_expiry,
1445 sizeof (rfs4_deleg_state_t),
1446 TABSIZE,
1447 MAXTABSZ, 100);
1448 nsrv4->rfs4_deleg_idx = rfs4_index_create(nsrv4->rfs4_deleg_state_tab,
1449 "DelegByFileClient",
1450 deleg_hash,
1451 deleg_compare,
1452 deleg_mkkey, TRUE);
1453
1454 /* CSTYLED */
1455 nsrv4->rfs4_deleg_state_idx = rfs4_index_create(nsrv4->rfs4_deleg_state_tab,
1456 "DelegState",
1457 deleg_state_hash,
1458 deleg_state_compare,
1459 deleg_state_mkkey, FALSE);
1460
1461 mutex_exit(&nsrv4->state_lock);
1462
1463 /*
1464 * Init the stable storage.
1465 */
1466 rfs4_ss_init(nsrv4);
1467 }
1468
1469 /*
1470 * Used at server shutdown to cleanup all of NFSv4 server's zone structures
1471 * and state.
1472 */
1473 void
1474 rfs4_state_zone_fini()
1475 {
1476 rfs4_database_t *dbp;
1477 nfs4_srv_t *nsrv4;
1478 nsrv4 = zone_getspecific(rfs4_zone_key, curzone);
1479
1480 rfs4_set_deleg_policy(nsrv4, SRV_NEVER_DELEGATE);
1481
1482 mutex_enter(&nsrv4->state_lock);
1483
1484 if (nsrv4->nfs4_server_state == NULL) {
1485 mutex_exit(&nsrv4->state_lock);
1486 return;
1487 }
1488
1489 /* destroy server instances and current instance ptr */
1490 rfs4_servinst_destroy_all(nsrv4);
1491
1492 /* reset the "first NFSv4 request" status */
1493 nsrv4->seen_first_compound = 0;
1494
1495 dbp = nsrv4->nfs4_server_state;
1496 nsrv4->nfs4_server_state = NULL;
1497
1498 rw_destroy(&nsrv4->rfs4_findclient_lock);
1499
1500 /* First stop all of the reaper threads in the database */
1501 rfs4_database_shutdown(dbp);
1502 /*
1503 * XXX workaround
1504 * Skip destrying the state database yet just in case there
1505 * are unfinished operations depending on it.
1506 */
1507 /* Now destroy/release the database tables */
1508 /* rfs4_database_destroy(dbp); */
1509
1510 /* Reset the cache timers for next time */
1511 nsrv4->rfs4_client_cache_time = 0;
1512 nsrv4->rfs4_openowner_cache_time = 0;
1513 nsrv4->rfs4_state_cache_time = 0;
1514 nsrv4->rfs4_lo_state_cache_time = 0;
1515 nsrv4->rfs4_lockowner_cache_time = 0;
1516 nsrv4->rfs4_file_cache_time = 0;
1517 nsrv4->rfs4_deleg_state_cache_time = 0;
1518
1519 mutex_exit(&nsrv4->state_lock);
1520
1521 /* clean up any dangling stable storage structures */
1522 rfs4_ss_fini(nsrv4);
1523 }
1524
1525 typedef union {
1526 struct {
1527 uint32_t start_time;
1528 uint32_t c_id;
1529 } impl_id;
1530 clientid4 id4;
1531 } cid;
1532
1533 static int foreign_stateid(stateid_t *id);
1534 static int foreign_clientid(cid *cidp);
1535 static void embed_nodeid(cid *cidp);
1536
1537 typedef union {
1538 struct {
1539 uint32_t c_id;
1540 uint32_t gen_num;
1541 } cv_impl;
1542 verifier4 confirm_verf;
1543 } scid_confirm_verf;
1544
1545 static uint32_t
1546 clientid_hash(void *key)
1547 {
1548 cid *idp = key;
1549
1550 return (idp->impl_id.c_id);
1551 }
1552
1553 static bool_t
1554 clientid_compare(rfs4_entry_t entry, void *key)
1555 {
1556 rfs4_client_t *cp = (rfs4_client_t *)entry;
1557 clientid4 *idp = key;
1558
1559 return (*idp == cp->rc_clientid);
1560 }
1561
1562 static void *
1563 clientid_mkkey(rfs4_entry_t entry)
1564 {
1565 rfs4_client_t *cp = (rfs4_client_t *)entry;
1566
1567 return (&cp->rc_clientid);
1568 }
1569
1570 static uint32_t
1571 nfsclnt_hash(void *key)
1572 {
1573 nfs_client_id4 *client = key;
1574 int i;
1575 uint32_t hash = 0;
1576
1577 for (i = 0; i < client->id_len; i++) {
1578 hash <<= 1;
1579 hash += (uint_t)client->id_val[i];
1580 }
1581 return (hash);
1582 }
1583
1584
1585 static bool_t
1586 nfsclnt_compare(rfs4_entry_t entry, void *key)
1587 {
1588 rfs4_client_t *cp = (rfs4_client_t *)entry;
1589 nfs_client_id4 *nfs_client = key;
1590
1591 if (cp->rc_nfs_client.id_len != nfs_client->id_len)
1592 return (FALSE);
1593
1594 return (bcmp(cp->rc_nfs_client.id_val, nfs_client->id_val,
1595 nfs_client->id_len) == 0);
1596 }
1597
1598 static void *
1599 nfsclnt_mkkey(rfs4_entry_t entry)
1600 {
1601 rfs4_client_t *cp = (rfs4_client_t *)entry;
1602
1603 return (&cp->rc_nfs_client);
1604 }
1605
1606 static bool_t
1607 rfs4_client_expiry(rfs4_entry_t u_entry)
1608 {
1609 rfs4_client_t *cp = (rfs4_client_t *)u_entry;
1610 bool_t cp_expired;
1611
1612 if (rfs4_dbe_is_invalid(cp->rc_dbe)) {
1613 cp->rc_ss_remove = 1;
1614 return (TRUE);
1615 }
1616 /*
1617 * If the sysadmin has used clear_locks for this
1618 * entry then forced_expire will be set and we
1619 * want this entry to be reaped. Or the entry
1620 * has exceeded its lease period.
1621 */
1622 cp_expired = (cp->rc_forced_expire ||
1623 (gethrestime_sec() - cp->rc_last_access
1624 > rfs4_lease_time));
1625
1626 if (!cp->rc_ss_remove && cp_expired)
1627 cp->rc_ss_remove = 1;
1628 return (cp_expired);
1629 }
1630
1631 /*
1632 * Remove the leaf file from all distributed stable storage paths.
1633 */
1634 static void
1635 rfs4_dss_remove_cpleaf(rfs4_client_t *cp)
1636 {
1637 nfs4_srv_t *nsrv4;
1638 rfs4_servinst_t *sip;
1639 char *leaf = cp->rc_ss_pn->leaf;
1640
1641 /*
1642 * since the state files are written to all DSS
1643 * paths we must remove this leaf file instance
1644 * from all server instances.
1645 */
1646
1647 nsrv4 = zone_getspecific(rfs4_zone_key, curzone);
1648 mutex_enter(&nsrv4->servinst_lock);
1649 for (sip = nsrv4->nfs4_cur_servinst; sip != NULL; sip = sip->prev) {
1650 /* remove the leaf file associated with this server instance */
1651 rfs4_dss_remove_leaf(sip, NFS4_DSS_STATE_LEAF, leaf);
1652 }
1653 mutex_exit(&nsrv4->servinst_lock);
1654 }
1655
1656 static void
1657 rfs4_dss_remove_leaf(rfs4_servinst_t *sip, char *dir_leaf, char *leaf)
1658 {
1659 int i, npaths = sip->dss_npaths;
1660
1661 for (i = 0; i < npaths; i++) {
1662 rfs4_dss_path_t *dss_path = sip->dss_paths[i];
1663 char *path, *dir;
1664 size_t pathlen;
1665
1666 /* the HA-NFSv4 path might have been failed-over away from us */
1667 if (dss_path == NULL)
1668 continue;
1669
1670 dir = dss_path->path;
1671
1672 /* allow 3 extra bytes for two '/' & a NUL */
1673 pathlen = strlen(dir) + strlen(dir_leaf) + strlen(leaf) + 3;
1674 path = kmem_alloc(pathlen, KM_SLEEP);
1675 (void) sprintf(path, "%s/%s/%s", dir, dir_leaf, leaf);
1676
1677 (void) vn_remove(path, UIO_SYSSPACE, RMFILE);
1678
1679 kmem_free(path, pathlen);
1680 }
1681 }
1682
1683 static void
1684 rfs4_client_destroy(rfs4_entry_t u_entry)
1685 {
1686 rfs4_client_t *cp = (rfs4_client_t *)u_entry;
1687
1688 mutex_destroy(cp->rc_cbinfo.cb_lock);
1689 cv_destroy(cp->rc_cbinfo.cb_cv);
1690 cv_destroy(cp->rc_cbinfo.cb_cv_nullcaller);
1691 list_destroy(&cp->rc_openownerlist);
1692
1693 /* free callback info */
1694 rfs4_cbinfo_free(&cp->rc_cbinfo);
1695
1696 if (cp->rc_cp_confirmed)
1697 rfs4_client_rele(cp->rc_cp_confirmed);
1698
1699 if (cp->rc_ss_pn) {
1700 /* check if the stable storage files need to be removed */
1701 if (cp->rc_ss_remove)
1702 rfs4_dss_remove_cpleaf(cp);
1703 rfs4_ss_pnfree(cp->rc_ss_pn);
1704 }
1705
1706 /* Free the client supplied client id */
1707 kmem_free(cp->rc_nfs_client.id_val, cp->rc_nfs_client.id_len);
1708
1709 if (cp->rc_sysidt != LM_NOSYSID)
1710 lm_free_sysidt(cp->rc_sysidt);
1711 }
1712
1713 static bool_t
1714 rfs4_client_create(rfs4_entry_t u_entry, void *arg)
1715 {
1716 rfs4_client_t *cp = (rfs4_client_t *)u_entry;
1717 nfs_client_id4 *client = (nfs_client_id4 *)arg;
1718 struct sockaddr *ca;
1719 cid *cidp;
1720 scid_confirm_verf *scvp;
1721 nfs4_srv_t *nsrv4;
1722
1723 nsrv4 = zone_getspecific(rfs4_zone_key, curzone);
1724
1725 /* Get a clientid to give to the client */
1726 cidp = (cid *)&cp->rc_clientid;
1727 cidp->impl_id.start_time = nsrv4->rfs4_start_time;
1728 cidp->impl_id.c_id = (uint32_t)rfs4_dbe_getid(cp->rc_dbe);
1729
1730 /* If we are booted as a cluster node, embed our nodeid */
1731 if (cluster_bootflags & CLUSTER_BOOTED)
1732 embed_nodeid(cidp);
1733
1734 /* Allocate and copy client's client id value */
1735 cp->rc_nfs_client.id_val = kmem_alloc(client->id_len, KM_SLEEP);
1736 cp->rc_nfs_client.id_len = client->id_len;
1737 bcopy(client->id_val, cp->rc_nfs_client.id_val, client->id_len);
1738 cp->rc_nfs_client.verifier = client->verifier;
1739
1740 /* Copy client's IP address */
1741 ca = client->cl_addr;
1742 if (ca->sa_family == AF_INET)
1743 bcopy(ca, &cp->rc_addr, sizeof (struct sockaddr_in));
1744 else if (ca->sa_family == AF_INET6)
1745 bcopy(ca, &cp->rc_addr, sizeof (struct sockaddr_in6));
1746 cp->rc_nfs_client.cl_addr = (struct sockaddr *)&cp->rc_addr;
1747
1748 /* Init the value for the SETCLIENTID_CONFIRM verifier */
1749 scvp = (scid_confirm_verf *)&cp->rc_confirm_verf;
1750 scvp->cv_impl.c_id = cidp->impl_id.c_id;
1751 scvp->cv_impl.gen_num = 0;
1752
1753 /* An F_UNLKSYS has been done for this client */
1754 cp->rc_unlksys_completed = FALSE;
1755
1756 /* We need the client to ack us */
1757 cp->rc_need_confirm = TRUE;
1758 cp->rc_cp_confirmed = NULL;
1759
1760 /* TRUE all the time until the callback path actually fails */
1761 cp->rc_cbinfo.cb_notified_of_cb_path_down = TRUE;
1762
1763 /* Initialize the access time to now */
1764 cp->rc_last_access = gethrestime_sec();
1765
1766 cp->rc_cr_set = NULL;
1767
1768 cp->rc_sysidt = LM_NOSYSID;
1769
1770 list_create(&cp->rc_openownerlist, sizeof (rfs4_openowner_t),
1771 offsetof(rfs4_openowner_t, ro_node));
1772
1773 /* set up the callback control structure */
1774 cp->rc_cbinfo.cb_state = CB_UNINIT;
1775 mutex_init(cp->rc_cbinfo.cb_lock, NULL, MUTEX_DEFAULT, NULL);
1776 cv_init(cp->rc_cbinfo.cb_cv, NULL, CV_DEFAULT, NULL);
1777 cv_init(cp->rc_cbinfo.cb_cv_nullcaller, NULL, CV_DEFAULT, NULL);
1778
1779 /*
1780 * Associate the client_t with the current server instance.
1781 * The hold is solely to satisfy the calling requirement of
1782 * rfs4_servinst_assign(). In this case it's not strictly necessary.
1783 */
1784 rfs4_dbe_hold(cp->rc_dbe);
1785 rfs4_servinst_assign(nsrv4, cp, nsrv4->nfs4_cur_servinst);
1786 rfs4_dbe_rele(cp->rc_dbe);
1787
1788 return (TRUE);
1789 }
1790
1791 /*
1792 * Caller wants to generate/update the setclientid_confirm verifier
1793 * associated with a client. This is done during the SETCLIENTID
1794 * processing.
1795 */
1796 void
1797 rfs4_client_scv_next(rfs4_client_t *cp)
1798 {
1799 scid_confirm_verf *scvp;
1800
1801 /* Init the value for the SETCLIENTID_CONFIRM verifier */
1802 scvp = (scid_confirm_verf *)&cp->rc_confirm_verf;
1803 scvp->cv_impl.gen_num++;
1804 }
1805
1806 void
1807 rfs4_client_rele(rfs4_client_t *cp)
1808 {
1809 rfs4_dbe_rele(cp->rc_dbe);
1810 }
1811
1812 rfs4_client_t *
1813 rfs4_findclient(nfs_client_id4 *client, bool_t *create, rfs4_client_t *oldcp)
1814 {
1815 rfs4_client_t *cp;
1816 nfs4_srv_t *nsrv4;
1817 nsrv4 = zone_getspecific(rfs4_zone_key, curzone);
1818
1819
1820 if (oldcp) {
1821 rw_enter(&nsrv4->rfs4_findclient_lock, RW_WRITER);
1822 rfs4_dbe_hide(oldcp->rc_dbe);
1823 } else {
1824 rw_enter(&nsrv4->rfs4_findclient_lock, RW_READER);
1825 }
1826
1827 cp = (rfs4_client_t *)rfs4_dbsearch(nsrv4->rfs4_nfsclnt_idx, client,
1828 create, (void *)client, RFS4_DBS_VALID);
1829
1830 if (oldcp)
1831 rfs4_dbe_unhide(oldcp->rc_dbe);
1832
1833 rw_exit(&nsrv4->rfs4_findclient_lock);
1834
1835 return (cp);
1836 }
1837
1838 rfs4_client_t *
1839 rfs4_findclient_by_id(clientid4 clientid, bool_t find_unconfirmed)
1840 {
1841 rfs4_client_t *cp;
1842 bool_t create = FALSE;
1843 cid *cidp = (cid *)&clientid;
1844 nfs4_srv_t *nsrv4 = zone_getspecific(rfs4_zone_key, curzone);
1845
1846 /* If we're a cluster and the nodeid isn't right, short-circuit */
1847 if (cluster_bootflags & CLUSTER_BOOTED && foreign_clientid(cidp))
1848 return (NULL);
1849
1850 rw_enter(&nsrv4->rfs4_findclient_lock, RW_READER);
1851
1852 cp = (rfs4_client_t *)rfs4_dbsearch(nsrv4->rfs4_clientid_idx, &clientid,
1853 &create, NULL, RFS4_DBS_VALID);
1854
1855 rw_exit(&nsrv4->rfs4_findclient_lock);
1856
1857 if (cp && cp->rc_need_confirm && find_unconfirmed == FALSE) {
1858 rfs4_client_rele(cp);
1859 return (NULL);
1860 } else {
1861 return (cp);
1862 }
1863 }
1864
1865 static uint32_t
1866 clntip_hash(void *key)
1867 {
1868 struct sockaddr *addr = key;
1869 int i, len = 0;
1870 uint32_t hash = 0;
1871 char *ptr;
1872
1873 if (addr->sa_family == AF_INET) {
1874 struct sockaddr_in *a = (struct sockaddr_in *)addr;
1875 len = sizeof (struct in_addr);
1876 ptr = (char *)&a->sin_addr;
1877 } else if (addr->sa_family == AF_INET6) {
1878 struct sockaddr_in6 *a = (struct sockaddr_in6 *)addr;
1879 len = sizeof (struct in6_addr);
1880 ptr = (char *)&a->sin6_addr;
1881 } else
1882 return (0);
1883
1884 for (i = 0; i < len; i++) {
1885 hash <<= 1;
1886 hash += (uint_t)ptr[i];
1887 }
1888 return (hash);
1889 }
1890
1891 static bool_t
1892 clntip_compare(rfs4_entry_t entry, void *key)
1893 {
1894 rfs4_clntip_t *cp = (rfs4_clntip_t *)entry;
1895 struct sockaddr *addr = key;
1896 int len = 0;
1897 char *p1, *p2;
1898
1899 if (addr->sa_family == AF_INET) {
1900 struct sockaddr_in *a1 = (struct sockaddr_in *)&cp->ri_addr;
1901 struct sockaddr_in *a2 = (struct sockaddr_in *)addr;
1902 len = sizeof (struct in_addr);
1903 p1 = (char *)&a1->sin_addr;
1904 p2 = (char *)&a2->sin_addr;
1905 } else if (addr->sa_family == AF_INET6) {
1906 struct sockaddr_in6 *a1 = (struct sockaddr_in6 *)&cp->ri_addr;
1907 struct sockaddr_in6 *a2 = (struct sockaddr_in6 *)addr;
1908 len = sizeof (struct in6_addr);
1909 p1 = (char *)&a1->sin6_addr;
1910 p2 = (char *)&a2->sin6_addr;
1911 } else
1912 return (0);
1913
1914 return (bcmp(p1, p2, len) == 0);
1915 }
1916
1917 static void *
1918 clntip_mkkey(rfs4_entry_t entry)
1919 {
1920 rfs4_clntip_t *cp = (rfs4_clntip_t *)entry;
1921
1922 return (&cp->ri_addr);
1923 }
1924
1925 static bool_t
1926 rfs4_clntip_expiry(rfs4_entry_t u_entry)
1927 {
1928 rfs4_clntip_t *cp = (rfs4_clntip_t *)u_entry;
1929
1930 if (rfs4_dbe_is_invalid(cp->ri_dbe))
1931 return (TRUE);
1932 return (FALSE);
1933 }
1934
1935 /* ARGSUSED */
1936 static void
1937 rfs4_clntip_destroy(rfs4_entry_t u_entry)
1938 {
1939 }
1940
1941 static bool_t
1942 rfs4_clntip_create(rfs4_entry_t u_entry, void *arg)
1943 {
1944 rfs4_clntip_t *cp = (rfs4_clntip_t *)u_entry;
1945 struct sockaddr *ca = (struct sockaddr *)arg;
1946
1947 /* Copy client's IP address */
1948 if (ca->sa_family == AF_INET)
1949 bcopy(ca, &cp->ri_addr, sizeof (struct sockaddr_in));
1950 else if (ca->sa_family == AF_INET6)
1951 bcopy(ca, &cp->ri_addr, sizeof (struct sockaddr_in6));
1952 else
1953 return (FALSE);
1954 cp->ri_no_referrals = 1;
1955
1956 return (TRUE);
1957 }
1958
1959 rfs4_clntip_t *
1960 rfs4_find_clntip(struct sockaddr *addr, bool_t *create)
1961 {
1962 rfs4_clntip_t *cp;
1963 nfs4_srv_t *nsrv4;
1964
1965 nsrv4 = zone_getspecific(rfs4_zone_key, curzone);
1966
1967 rw_enter(&nsrv4->rfs4_findclient_lock, RW_READER);
1968
1969 cp = (rfs4_clntip_t *)rfs4_dbsearch(nsrv4->rfs4_clntip_idx, addr,
1970 create, addr, RFS4_DBS_VALID);
1971
1972 rw_exit(&nsrv4->rfs4_findclient_lock);
1973
1974 return (cp);
1975 }
1976
1977 void
1978 rfs4_invalidate_clntip(struct sockaddr *addr)
1979 {
1980 rfs4_clntip_t *cp;
1981 bool_t create = FALSE;
1982 nfs4_srv_t *nsrv4 = zone_getspecific(rfs4_zone_key, curzone);
1983
1984 rw_enter(&nsrv4->rfs4_findclient_lock, RW_READER);
1985
1986 cp = (rfs4_clntip_t *)rfs4_dbsearch(nsrv4->rfs4_clntip_idx, addr,
1987 &create, NULL, RFS4_DBS_VALID);
1988 if (cp == NULL) {
1989 rw_exit(&nsrv4->rfs4_findclient_lock);
1990 return;
1991 }
1992 rfs4_dbe_invalidate(cp->ri_dbe);
1993 rfs4_dbe_rele(cp->ri_dbe);
1994
1995 rw_exit(&nsrv4->rfs4_findclient_lock);
1996 }
1997
1998 bool_t
1999 rfs4_lease_expired(rfs4_client_t *cp)
2000 {
2001 bool_t rc;
2002
2003 rfs4_dbe_lock(cp->rc_dbe);
2004
2005 /*
2006 * If the admin has executed clear_locks for this
2007 * client id, force expire will be set, so no need
2008 * to calculate anything because it's "outa here".
2009 */
2010 if (cp->rc_forced_expire) {
2011 rc = TRUE;
2012 } else {
2013 rc = (gethrestime_sec() - cp->rc_last_access > rfs4_lease_time);
2014 }
2015
2016 /*
2017 * If the lease has expired we will also want
2018 * to remove any stable storage state data. So
2019 * mark the client id accordingly.
2020 */
2021 if (!cp->rc_ss_remove)
2022 cp->rc_ss_remove = (rc == TRUE);
2023
2024 rfs4_dbe_unlock(cp->rc_dbe);
2025
2026 return (rc);
2027 }
2028
2029 void
2030 rfs4_update_lease(rfs4_client_t *cp)
2031 {
2032 rfs4_dbe_lock(cp->rc_dbe);
2033 if (!cp->rc_forced_expire)
2034 cp->rc_last_access = gethrestime_sec();
2035 rfs4_dbe_unlock(cp->rc_dbe);
2036 }
2037
2038
2039 static bool_t
2040 EQOPENOWNER(open_owner4 *a, open_owner4 *b)
2041 {
2042 bool_t rc;
2043
2044 if (a->clientid != b->clientid)
2045 return (FALSE);
2046
2047 if (a->owner_len != b->owner_len)
2048 return (FALSE);
2049
2050 rc = (bcmp(a->owner_val, b->owner_val, a->owner_len) == 0);
2051
2052 return (rc);
2053 }
2054
2055 static uint_t
2056 openowner_hash(void *key)
2057 {
2058 int i;
2059 open_owner4 *openowner = key;
2060 uint_t hash = 0;
2061
2062 for (i = 0; i < openowner->owner_len; i++) {
2063 hash <<= 4;
2064 hash += (uint_t)openowner->owner_val[i];
2065 }
2066 hash += (uint_t)openowner->clientid;
2067 hash |= (openowner->clientid >> 32);
2068
2069 return (hash);
2070 }
2071
2072 static bool_t
2073 openowner_compare(rfs4_entry_t u_entry, void *key)
2074 {
2075 rfs4_openowner_t *oo = (rfs4_openowner_t *)u_entry;
2076 open_owner4 *arg = key;
2077
2078 return (EQOPENOWNER(&oo->ro_owner, arg));
2079 }
2080
2081 void *
2082 openowner_mkkey(rfs4_entry_t u_entry)
2083 {
2084 rfs4_openowner_t *oo = (rfs4_openowner_t *)u_entry;
2085
2086 return (&oo->ro_owner);
2087 }
2088
2089 /* ARGSUSED */
2090 static bool_t
2091 rfs4_openowner_expiry(rfs4_entry_t u_entry)
2092 {
2093 /* openstateid held us and did all needed delay */
2094 return (TRUE);
2095 }
2096
2097 static void
2098 rfs4_openowner_destroy(rfs4_entry_t u_entry)
2099 {
2100 rfs4_openowner_t *oo = (rfs4_openowner_t *)u_entry;
2101
2102 /* Remove open owner from client's lists of open owners */
2103 rfs4_dbe_lock(oo->ro_client->rc_dbe);
2104 list_remove(&oo->ro_client->rc_openownerlist, oo);
2105 rfs4_dbe_unlock(oo->ro_client->rc_dbe);
2106
2107 /* One less reference to the client */
2108 rfs4_client_rele(oo->ro_client);
2109 oo->ro_client = NULL;
2110
2111 /* Free the last reply for this lock owner */
2112 rfs4_free_reply(&oo->ro_reply);
2113
2114 if (oo->ro_reply_fh.nfs_fh4_val) {
2115 kmem_free(oo->ro_reply_fh.nfs_fh4_val,
2116 oo->ro_reply_fh.nfs_fh4_len);
2117 oo->ro_reply_fh.nfs_fh4_val = NULL;
2118 oo->ro_reply_fh.nfs_fh4_len = 0;
2119 }
2120
2121 rfs4_sw_destroy(&oo->ro_sw);
2122 list_destroy(&oo->ro_statelist);
2123
2124 /* Free the lock owner id */
2125 kmem_free(oo->ro_owner.owner_val, oo->ro_owner.owner_len);
2126 }
2127
2128 void
2129 rfs4_openowner_rele(rfs4_openowner_t *oo)
2130 {
2131 rfs4_dbe_rele(oo->ro_dbe);
2132 }
2133
2134 static bool_t
2135 rfs4_openowner_create(rfs4_entry_t u_entry, void *arg)
2136 {
2137 rfs4_openowner_t *oo = (rfs4_openowner_t *)u_entry;
2138 rfs4_openowner_t *argp = (rfs4_openowner_t *)arg;
2139 open_owner4 *openowner = &argp->ro_owner;
2140 seqid4 seqid = argp->ro_open_seqid;
2141 rfs4_client_t *cp;
2142 bool_t create = FALSE;
2143 nfs4_srv_t *nsrv4 = zone_getspecific(rfs4_zone_key, curzone);
2144
2145 rw_enter(&nsrv4->rfs4_findclient_lock, RW_READER);
2146
2147 cp = (rfs4_client_t *)rfs4_dbsearch(nsrv4->rfs4_clientid_idx,
2148 &openowner->clientid,
2149 &create, NULL, RFS4_DBS_VALID);
2150
2151 rw_exit(&nsrv4->rfs4_findclient_lock);
2152
2153 if (cp == NULL)
2154 return (FALSE);
2155
2156 oo->ro_reply_fh.nfs_fh4_len = 0;
2157 oo->ro_reply_fh.nfs_fh4_val = NULL;
2158
2159 oo->ro_owner.clientid = openowner->clientid;
2160 oo->ro_owner.owner_val =
2161 kmem_alloc(openowner->owner_len, KM_SLEEP);
2162
2163 bcopy(openowner->owner_val,
2164 oo->ro_owner.owner_val, openowner->owner_len);
2165
2166 oo->ro_owner.owner_len = openowner->owner_len;
2167
2168 oo->ro_need_confirm = TRUE;
2169
2170 rfs4_sw_init(&oo->ro_sw);
2171
2172 oo->ro_open_seqid = seqid;
2173 bzero(&oo->ro_reply, sizeof (nfs_resop4));
2174 oo->ro_client = cp;
2175 oo->ro_cr_set = NULL;
2176
2177 list_create(&oo->ro_statelist, sizeof (rfs4_state_t),
2178 offsetof(rfs4_state_t, rs_node));
2179
2180 /* Insert openowner into client's open owner list */
2181 rfs4_dbe_lock(cp->rc_dbe);
2182 list_insert_tail(&cp->rc_openownerlist, oo);
2183 rfs4_dbe_unlock(cp->rc_dbe);
2184
2185 return (TRUE);
2186 }
2187
2188 rfs4_openowner_t *
2189 rfs4_findopenowner(open_owner4 *openowner, bool_t *create, seqid4 seqid)
2190 {
2191 rfs4_openowner_t *oo;
2192 rfs4_openowner_t arg;
2193 nfs4_srv_t *nsrv4 = zone_getspecific(rfs4_zone_key, curzone);
2194
2195 arg.ro_owner = *openowner;
2196 arg.ro_open_seqid = seqid;
2197 /* CSTYLED */
2198 oo = (rfs4_openowner_t *)rfs4_dbsearch(nsrv4->rfs4_openowner_idx, openowner,
2199 create, &arg, RFS4_DBS_VALID);
2200
2201 return (oo);
2202 }
2203
2204 void
2205 rfs4_update_open_sequence(rfs4_openowner_t *oo)
2206 {
2207
2208 rfs4_dbe_lock(oo->ro_dbe);
2209
2210 oo->ro_open_seqid++;
2211
2212 rfs4_dbe_unlock(oo->ro_dbe);
2213 }
2214
2215 void
2216 rfs4_update_open_resp(rfs4_openowner_t *oo, nfs_resop4 *resp, nfs_fh4 *fh)
2217 {
2218
2219 rfs4_dbe_lock(oo->ro_dbe);
2220
2221 rfs4_free_reply(&oo->ro_reply);
2222
2223 rfs4_copy_reply(&oo->ro_reply, resp);
2224
2225 /* Save the filehandle if provided and free if not used */
2226 if (resp->nfs_resop4_u.opopen.status == NFS4_OK &&
2227 fh && fh->nfs_fh4_len) {
2228 if (oo->ro_reply_fh.nfs_fh4_val == NULL)
2229 oo->ro_reply_fh.nfs_fh4_val =
2230 kmem_alloc(fh->nfs_fh4_len, KM_SLEEP);
2231 nfs_fh4_copy(fh, &oo->ro_reply_fh);
2232 } else {
2233 if (oo->ro_reply_fh.nfs_fh4_val) {
2234 kmem_free(oo->ro_reply_fh.nfs_fh4_val,
2235 oo->ro_reply_fh.nfs_fh4_len);
2236 oo->ro_reply_fh.nfs_fh4_val = NULL;
2237 oo->ro_reply_fh.nfs_fh4_len = 0;
2238 }
2239 }
2240
2241 rfs4_dbe_unlock(oo->ro_dbe);
2242 }
2243
2244 static bool_t
2245 lockowner_compare(rfs4_entry_t u_entry, void *key)
2246 {
2247 rfs4_lockowner_t *lo = (rfs4_lockowner_t *)u_entry;
2248 lock_owner4 *b = (lock_owner4 *)key;
2249
2250 if (lo->rl_owner.clientid != b->clientid)
2251 return (FALSE);
2252
2253 if (lo->rl_owner.owner_len != b->owner_len)
2254 return (FALSE);
2255
2256 return (bcmp(lo->rl_owner.owner_val, b->owner_val,
2257 lo->rl_owner.owner_len) == 0);
2258 }
2259
2260 void *
2261 lockowner_mkkey(rfs4_entry_t u_entry)
2262 {
2263 rfs4_lockowner_t *lo = (rfs4_lockowner_t *)u_entry;
2264
2265 return (&lo->rl_owner);
2266 }
2267
2268 static uint32_t
2269 lockowner_hash(void *key)
2270 {
2271 int i;
2272 lock_owner4 *lockowner = key;
2273 uint_t hash = 0;
2274
2275 for (i = 0; i < lockowner->owner_len; i++) {
2276 hash <<= 4;
2277 hash += (uint_t)lockowner->owner_val[i];
2278 }
2279 hash += (uint_t)lockowner->clientid;
2280 hash |= (lockowner->clientid >> 32);
2281
2282 return (hash);
2283 }
2284
2285 static uint32_t
2286 pid_hash(void *key)
2287 {
2288 return ((uint32_t)(uintptr_t)key);
2289 }
2290
2291 static void *
2292 pid_mkkey(rfs4_entry_t u_entry)
2293 {
2294 rfs4_lockowner_t *lo = (rfs4_lockowner_t *)u_entry;
2295
2296 return ((void *)(uintptr_t)lo->rl_pid);
2297 }
2298
2299 static bool_t
2300 pid_compare(rfs4_entry_t u_entry, void *key)
2301 {
2302 rfs4_lockowner_t *lo = (rfs4_lockowner_t *)u_entry;
2303
2304 return (lo->rl_pid == (pid_t)(uintptr_t)key);
2305 }
2306
2307 static void
2308 rfs4_lockowner_destroy(rfs4_entry_t u_entry)
2309 {
2310 rfs4_lockowner_t *lo = (rfs4_lockowner_t *)u_entry;
2311
2312 /* Free the lock owner id */
2313 kmem_free(lo->rl_owner.owner_val, lo->rl_owner.owner_len);
2314 rfs4_client_rele(lo->rl_client);
2315 }
2316
2317 void
2318 rfs4_lockowner_rele(rfs4_lockowner_t *lo)
2319 {
2320 rfs4_dbe_rele(lo->rl_dbe);
2321 }
2322
2323 /* ARGSUSED */
2324 static bool_t
2325 rfs4_lockowner_expiry(rfs4_entry_t u_entry)
2326 {
2327 /*
2328 * Since expiry is called with no other references on
2329 * this struct, go ahead and have it removed.
2330 */
2331 return (TRUE);
2332 }
2333
2334 static bool_t
2335 rfs4_lockowner_create(rfs4_entry_t u_entry, void *arg)
2336 {
2337 rfs4_lockowner_t *lo = (rfs4_lockowner_t *)u_entry;
2338 lock_owner4 *lockowner = (lock_owner4 *)arg;
2339 rfs4_client_t *cp;
2340 bool_t create = FALSE;
2341 nfs4_srv_t *nsrv4 = zone_getspecific(rfs4_zone_key, curzone);
2342
2343 rw_enter(&nsrv4->rfs4_findclient_lock, RW_READER);
2344
2345 cp = (rfs4_client_t *)rfs4_dbsearch(nsrv4->rfs4_clientid_idx,
2346 &lockowner->clientid,
2347 &create, NULL, RFS4_DBS_VALID);
2348
2349 rw_exit(&nsrv4->rfs4_findclient_lock);
2350
2351 if (cp == NULL)
2352 return (FALSE);
2353
2354 /* Reference client */
2355 lo->rl_client = cp;
2356 lo->rl_owner.clientid = lockowner->clientid;
2357 lo->rl_owner.owner_val = kmem_alloc(lockowner->owner_len, KM_SLEEP);
2358 bcopy(lockowner->owner_val, lo->rl_owner.owner_val,
2359 lockowner->owner_len);
2360 lo->rl_owner.owner_len = lockowner->owner_len;
2361 lo->rl_pid = rfs4_dbe_getid(lo->rl_dbe);
2362
2363 return (TRUE);
2364 }
2365
2366 rfs4_lockowner_t *
2367 rfs4_findlockowner(lock_owner4 *lockowner, bool_t *create)
2368 {
2369 rfs4_lockowner_t *lo;
2370 nfs4_srv_t *nsrv4 = zone_getspecific(rfs4_zone_key, curzone);
2371
2372 /* CSTYLED */
2373 lo = (rfs4_lockowner_t *)rfs4_dbsearch(nsrv4->rfs4_lockowner_idx, lockowner,
2374 create, lockowner, RFS4_DBS_VALID);
2375
2376 return (lo);
2377 }
2378
2379 rfs4_lockowner_t *
2380 rfs4_findlockowner_by_pid(pid_t pid)
2381 {
2382 rfs4_lockowner_t *lo;
2383 bool_t create = FALSE;
2384 nfs4_srv_t *nsrv4 = zone_getspecific(rfs4_zone_key, curzone);
2385
2386 lo = (rfs4_lockowner_t *)rfs4_dbsearch(nsrv4->rfs4_lockowner_pid_idx,
2387 (void *)(uintptr_t)pid, &create, NULL, RFS4_DBS_VALID);
2388
2389 return (lo);
2390 }
2391
2392
2393 static uint32_t
2394 file_hash(void *key)
2395 {
2396 return (ADDRHASH(key));
2397 }
2398
2399 static void *
2400 file_mkkey(rfs4_entry_t u_entry)
2401 {
2402 rfs4_file_t *fp = (rfs4_file_t *)u_entry;
2403
2404 return (fp->rf_vp);
2405 }
2406
2407 static bool_t
2408 file_compare(rfs4_entry_t u_entry, void *key)
2409 {
2410 rfs4_file_t *fp = (rfs4_file_t *)u_entry;
2411
2412 return (fp->rf_vp == (vnode_t *)key);
2413 }
2414
2415 static void
2416 rfs4_file_destroy(rfs4_entry_t u_entry)
2417 {
2418 rfs4_file_t *fp = (rfs4_file_t *)u_entry;
2419
2420 list_destroy(&fp->rf_delegstatelist);
2421
2422 if (fp->rf_filehandle.nfs_fh4_val)
2423 kmem_free(fp->rf_filehandle.nfs_fh4_val,
2424 fp->rf_filehandle.nfs_fh4_len);
2425 cv_destroy(fp->rf_dinfo.rd_recall_cv);
2426 if (fp->rf_vp) {
2427 vnode_t *vp = fp->rf_vp;
2428
2429 mutex_enter(&vp->v_vsd_lock);
2430 (void) vsd_set(vp, nfs4_srv_vkey, NULL);
2431 mutex_exit(&vp->v_vsd_lock);
2432 VN_RELE(vp);
2433 fp->rf_vp = NULL;
2434 }
2435 rw_destroy(&fp->rf_file_rwlock);
2436 }
2437
2438 /*
2439 * Used to unlock the underlying dbe struct only
2440 */
2441 void
2442 rfs4_file_rele(rfs4_file_t *fp)
2443 {
2444 rfs4_dbe_rele(fp->rf_dbe);
2445 }
2446
2447 typedef struct {
2448 vnode_t *vp;
2449 nfs_fh4 *fh;
2450 } rfs4_fcreate_arg;
2451
2452 static bool_t
2453 rfs4_file_create(rfs4_entry_t u_entry, void *arg)
2454 {
2455 rfs4_file_t *fp = (rfs4_file_t *)u_entry;
2456 rfs4_fcreate_arg *ap = (rfs4_fcreate_arg *)arg;
2457 vnode_t *vp = ap->vp;
2458 nfs_fh4 *fh = ap->fh;
2459
2460 VN_HOLD(vp);
2461
2462 fp->rf_filehandle.nfs_fh4_len = 0;
2463 fp->rf_filehandle.nfs_fh4_val = NULL;
2464 ASSERT(fh && fh->nfs_fh4_len);
2465 if (fh && fh->nfs_fh4_len) {
2466 fp->rf_filehandle.nfs_fh4_val =
2467 kmem_alloc(fh->nfs_fh4_len, KM_SLEEP);
2468 nfs_fh4_copy(fh, &fp->rf_filehandle);
2469 }
2470 fp->rf_vp = vp;
2471
2472 list_create(&fp->rf_delegstatelist, sizeof (rfs4_deleg_state_t),
2473 offsetof(rfs4_deleg_state_t, rds_node));
2474
2475 fp->rf_share_deny = fp->rf_share_access = fp->rf_access_read = 0;
2476 fp->rf_access_write = fp->rf_deny_read = fp->rf_deny_write = 0;
2477
2478 mutex_init(fp->rf_dinfo.rd_recall_lock, NULL, MUTEX_DEFAULT, NULL);
2479 cv_init(fp->rf_dinfo.rd_recall_cv, NULL, CV_DEFAULT, NULL);
2480
2481 fp->rf_dinfo.rd_dtype = OPEN_DELEGATE_NONE;
2482
2483 rw_init(&fp->rf_file_rwlock, NULL, RW_DEFAULT, NULL);
2484
2485 mutex_enter(&vp->v_vsd_lock);
2486 VERIFY(vsd_set(vp, nfs4_srv_vkey, (void *)fp) == 0);
2487 mutex_exit(&vp->v_vsd_lock);
2488
2489 return (TRUE);
2490 }
2491
2492 rfs4_file_t *
2493 rfs4_findfile(vnode_t *vp, nfs_fh4 *fh, bool_t *create)
2494 {
2495 rfs4_file_t *fp;
2496 rfs4_fcreate_arg arg;
2497 nfs4_srv_t *nsrv4 = zone_getspecific(rfs4_zone_key, curzone);
2498
2499 arg.vp = vp;
2500 arg.fh = fh;
2501
2502 if (*create == TRUE)
2503 /* CSTYLED */
2504 fp = (rfs4_file_t *)rfs4_dbsearch(nsrv4->rfs4_file_idx, vp, create,
2505 &arg, RFS4_DBS_VALID);
2506 else {
2507 mutex_enter(&vp->v_vsd_lock);
2508 fp = (rfs4_file_t *)vsd_get(vp, nfs4_srv_vkey);
2509 if (fp) {
2510 rfs4_dbe_lock(fp->rf_dbe);
2511 if (rfs4_dbe_is_invalid(fp->rf_dbe) ||
2512 (rfs4_dbe_refcnt(fp->rf_dbe) == 0)) {
2513 rfs4_dbe_unlock(fp->rf_dbe);
2514 fp = NULL;
2515 } else {
2516 rfs4_dbe_hold(fp->rf_dbe);
2517 rfs4_dbe_unlock(fp->rf_dbe);
2518 }
2519 }
2520 mutex_exit(&vp->v_vsd_lock);
2521 }
2522 return (fp);
2523 }
2524
2525 /*
2526 * Find a file in the db and once it is located, take the rw lock.
2527 * Need to check the vnode pointer and if it does not exist (it was
2528 * removed between the db location and check) redo the find. This
2529 * assumes that a file struct that has a NULL vnode pointer is marked
2530 * at 'invalid' and will not be found in the db the second time
2531 * around.
2532 */
2533 rfs4_file_t *
2534 rfs4_findfile_withlock(vnode_t *vp, nfs_fh4 *fh, bool_t *create)
2535 {
2536 rfs4_file_t *fp;
2537 rfs4_fcreate_arg arg;
2538 bool_t screate = *create;
2539 nfs4_srv_t *nsrv4 = zone_getspecific(rfs4_zone_key, curzone);
2540
2541 if (screate == FALSE) {
2542 mutex_enter(&vp->v_vsd_lock);
2543 fp = (rfs4_file_t *)vsd_get(vp, nfs4_srv_vkey);
2544 if (fp) {
2545 rfs4_dbe_lock(fp->rf_dbe);
2546 if (rfs4_dbe_is_invalid(fp->rf_dbe) ||
2547 (rfs4_dbe_refcnt(fp->rf_dbe) == 0)) {
2548 rfs4_dbe_unlock(fp->rf_dbe);
2549 mutex_exit(&vp->v_vsd_lock);
2550 fp = NULL;
2551 } else {
2552 rfs4_dbe_hold(fp->rf_dbe);
2553 rfs4_dbe_unlock(fp->rf_dbe);
2554 mutex_exit(&vp->v_vsd_lock);
2555 rw_enter(&fp->rf_file_rwlock, RW_WRITER);
2556 if (fp->rf_vp == NULL) {
2557 rw_exit(&fp->rf_file_rwlock);
2558 rfs4_file_rele(fp);
2559 fp = NULL;
2560 }
2561 }
2562 } else {
2563 mutex_exit(&vp->v_vsd_lock);
2564 }
2565 } else {
2566 retry:
2567 arg.vp = vp;
2568 arg.fh = fh;
2569
2570 fp = (rfs4_file_t *)rfs4_dbsearch(nsrv4->rfs4_file_idx, vp,
2571 create, &arg, RFS4_DBS_VALID);
2572 if (fp != NULL) {
2573 rw_enter(&fp->rf_file_rwlock, RW_WRITER);
2574 if (fp->rf_vp == NULL) {
2575 rw_exit(&fp->rf_file_rwlock);
2576 rfs4_file_rele(fp);
2577 *create = screate;
2578 goto retry;
2579 }
2580 }
2581 }
2582
2583 return (fp);
2584 }
2585
2586 static uint32_t
2587 lo_state_hash(void *key)
2588 {
2589 stateid_t *id = key;
2590
2591 return (id->bits.ident+id->bits.pid);
2592 }
2593
2594 static bool_t
2595 lo_state_compare(rfs4_entry_t u_entry, void *key)
2596 {
2597 rfs4_lo_state_t *lsp = (rfs4_lo_state_t *)u_entry;
2598 stateid_t *id = key;
2599 bool_t rc;
2600
2601 rc = (lsp->rls_lockid.bits.boottime == id->bits.boottime &&
2602 lsp->rls_lockid.bits.type == id->bits.type &&
2603 lsp->rls_lockid.bits.ident == id->bits.ident &&
2604 lsp->rls_lockid.bits.pid == id->bits.pid);
2605
2606 return (rc);
2607 }
2608
2609 static void *
2610 lo_state_mkkey(rfs4_entry_t u_entry)
2611 {
2612 rfs4_lo_state_t *lsp = (rfs4_lo_state_t *)u_entry;
2613
2614 return (&lsp->rls_lockid);
2615 }
2616
2617 static bool_t
2618 rfs4_lo_state_expiry(rfs4_entry_t u_entry)
2619 {
2620 rfs4_lo_state_t *lsp = (rfs4_lo_state_t *)u_entry;
2621
2622 if (rfs4_dbe_is_invalid(lsp->rls_dbe))
2623 return (TRUE);
2624 if (lsp->rls_state->rs_closed)
2625 return (TRUE);
2626 return ((gethrestime_sec() -
2627 lsp->rls_state->rs_owner->ro_client->rc_last_access
2628 > rfs4_lease_time));
2629 }
2630
2631 static void
2632 rfs4_lo_state_destroy(rfs4_entry_t u_entry)
2633 {
2634 rfs4_lo_state_t *lsp = (rfs4_lo_state_t *)u_entry;
2635
2636 rfs4_dbe_lock(lsp->rls_state->rs_dbe);
2637 list_remove(&lsp->rls_state->rs_lostatelist, lsp);
2638 rfs4_dbe_unlock(lsp->rls_state->rs_dbe);
2639
2640 rfs4_sw_destroy(&lsp->rls_sw);
2641
2642 /* Make sure to release the file locks */
2643 if (lsp->rls_locks_cleaned == FALSE) {
2644 lsp->rls_locks_cleaned = TRUE;
2645 if (lsp->rls_locker->rl_client->rc_sysidt != LM_NOSYSID) {
2646 /* Is the PxFS kernel module loaded? */
2647 if (lm_remove_file_locks != NULL) {
2648 int new_sysid;
2649
2650 /* Encode the cluster nodeid in new sysid */
2651 new_sysid =
2652 lsp->rls_locker->rl_client->rc_sysidt;
2653 lm_set_nlmid_flk(&new_sysid);
2654
2655 /*
2656 * This PxFS routine removes file locks for a
2657 * client over all nodes of a cluster.
2658 */
2659 DTRACE_PROBE1(nfss_i_clust_rm_lck,
2660 int, new_sysid);
2661 (*lm_remove_file_locks)(new_sysid);
2662 } else {
2663 (void) cleanlocks(
2664 lsp->rls_state->rs_finfo->rf_vp,
2665 lsp->rls_locker->rl_pid,
2666 lsp->rls_locker->rl_client->rc_sysidt);
2667 }
2668 }
2669 }
2670
2671 /* Free the last reply for this state */
2672 rfs4_free_reply(&lsp->rls_reply);
2673
2674 rfs4_lockowner_rele(lsp->rls_locker);
2675 lsp->rls_locker = NULL;
2676
2677 rfs4_state_rele_nounlock(lsp->rls_state);
2678 lsp->rls_state = NULL;
2679 }
2680
2681 static bool_t
2682 rfs4_lo_state_create(rfs4_entry_t u_entry, void *arg)
2683 {
2684 rfs4_lo_state_t *lsp = (rfs4_lo_state_t *)u_entry;
2685 rfs4_lo_state_t *argp = (rfs4_lo_state_t *)arg;
2686 rfs4_lockowner_t *lo = argp->rls_locker;
2687 rfs4_state_t *sp = argp->rls_state;
2688
2689 lsp->rls_state = sp;
2690
2691 lsp->rls_lockid = sp->rs_stateid;
2692 lsp->rls_lockid.bits.type = LOCKID;
2693 lsp->rls_lockid.bits.chgseq = 0;
2694 lsp->rls_lockid.bits.pid = lo->rl_pid;
2695
2696 lsp->rls_locks_cleaned = FALSE;
2697 lsp->rls_lock_completed = FALSE;
2698
2699 rfs4_sw_init(&lsp->rls_sw);
2700
2701 /* Attached the supplied lock owner */
2702 rfs4_dbe_hold(lo->rl_dbe);
2703 lsp->rls_locker = lo;
2704
2705 rfs4_dbe_lock(sp->rs_dbe);
2706 list_insert_tail(&sp->rs_lostatelist, lsp);
2707 rfs4_dbe_hold(sp->rs_dbe);
2708 rfs4_dbe_unlock(sp->rs_dbe);
2709
2710 return (TRUE);
2711 }
2712
2713 void
2714 rfs4_lo_state_rele(rfs4_lo_state_t *lsp, bool_t unlock_fp)
2715 {
2716 if (unlock_fp == TRUE)
2717 rw_exit(&lsp->rls_state->rs_finfo->rf_file_rwlock);
2718 rfs4_dbe_rele(lsp->rls_dbe);
2719 }
2720
2721 static rfs4_lo_state_t *
2722 rfs4_findlo_state(stateid_t *id, bool_t lock_fp)
2723 {
2724 rfs4_lo_state_t *lsp;
2725 bool_t create = FALSE;
2726 nfs4_srv_t *nsrv4 = zone_getspecific(rfs4_zone_key, curzone);
2727
2728 lsp = (rfs4_lo_state_t *)rfs4_dbsearch(nsrv4->rfs4_lo_state_idx, id,
2729 &create, NULL, RFS4_DBS_VALID);
2730 if (lock_fp == TRUE && lsp != NULL)
2731 rw_enter(&lsp->rls_state->rs_finfo->rf_file_rwlock, RW_READER);
2732
2733 return (lsp);
2734 }
2735
2736
2737 static uint32_t
2738 lo_state_lo_hash(void *key)
2739 {
2740 rfs4_lo_state_t *lsp = key;
2741
2742 return (ADDRHASH(lsp->rls_locker) ^ ADDRHASH(lsp->rls_state));
2743 }
2744
2745 static bool_t
2746 lo_state_lo_compare(rfs4_entry_t u_entry, void *key)
2747 {
2748 rfs4_lo_state_t *lsp = (rfs4_lo_state_t *)u_entry;
2749 rfs4_lo_state_t *keyp = key;
2750
2751 return (keyp->rls_locker == lsp->rls_locker &&
2752 keyp->rls_state == lsp->rls_state);
2753 }
2754
2755 static void *
2756 lo_state_lo_mkkey(rfs4_entry_t u_entry)
2757 {
2758 return (u_entry);
2759 }
2760
2761 rfs4_lo_state_t *
2762 rfs4_findlo_state_by_owner(rfs4_lockowner_t *lo, rfs4_state_t *sp,
2763 bool_t *create)
2764 {
2765 rfs4_lo_state_t *lsp;
2766 rfs4_lo_state_t arg;
2767 nfs4_srv_t *nsrv4 = zone_getspecific(rfs4_zone_key, curzone);
2768
2769 arg.rls_locker = lo;
2770 arg.rls_state = sp;
2771
2772 lsp = (rfs4_lo_state_t *)rfs4_dbsearch(nsrv4->rfs4_lo_state_owner_idx,
2773 &arg, create, &arg, RFS4_DBS_VALID);
2774
2775 return (lsp);
2776 }
2777
2778 static stateid_t
2779 get_stateid(id_t eid)
2780 {
2781 stateid_t id;
2782 nfs4_srv_t *nsrv4;
2783
2784 nsrv4 = zone_getspecific(rfs4_zone_key, curzone);
2785
2786 id.bits.boottime = nsrv4->rfs4_start_time;
2787 id.bits.ident = eid;
2788 id.bits.chgseq = 0;
2789 id.bits.type = 0;
2790 id.bits.pid = 0;
2791
2792 /*
2793 * If we are booted as a cluster node, embed our nodeid.
2794 * We've already done sanity checks in rfs4_client_create() so no
2795 * need to repeat them here.
2796 */
2797 id.bits.clnodeid = (cluster_bootflags & CLUSTER_BOOTED) ?
2798 clconf_get_nodeid() : 0;
2799
2800 return (id);
2801 }
2802
2803 /*
2804 * For use only when booted as a cluster node.
2805 * Returns TRUE if the embedded nodeid indicates that this stateid was
2806 * generated on another node.
2807 */
2808 static int
2809 foreign_stateid(stateid_t *id)
2810 {
2811 ASSERT(cluster_bootflags & CLUSTER_BOOTED);
2812 return (id->bits.clnodeid != (uint32_t)clconf_get_nodeid());
2813 }
2814
2815 /*
2816 * For use only when booted as a cluster node.
2817 * Returns TRUE if the embedded nodeid indicates that this clientid was
2818 * generated on another node.
2819 */
2820 static int
2821 foreign_clientid(cid *cidp)
2822 {
2823 ASSERT(cluster_bootflags & CLUSTER_BOOTED);
2824 return (cidp->impl_id.c_id >> CLUSTER_NODEID_SHIFT !=
2825 (uint32_t)clconf_get_nodeid());
2826 }
2827
2828 /*
2829 * For use only when booted as a cluster node.
2830 * Embed our cluster nodeid into the clientid.
2831 */
2832 static void
2833 embed_nodeid(cid *cidp)
2834 {
2835 int clnodeid;
2836 /*
2837 * Currently, our state tables are small enough that their
2838 * ids will leave enough bits free for the nodeid. If the
2839 * tables become larger, we mustn't overwrite the id.
2840 * Equally, we only have room for so many bits of nodeid, so
2841 * must check that too.
2842 */
2843 ASSERT(cluster_bootflags & CLUSTER_BOOTED);
2844 ASSERT(cidp->impl_id.c_id >> CLUSTER_NODEID_SHIFT == 0);
2845 clnodeid = clconf_get_nodeid();
2846 ASSERT(clnodeid <= CLUSTER_MAX_NODEID);
2847 ASSERT(clnodeid != NODEID_UNKNOWN);
2848 cidp->impl_id.c_id |= (clnodeid << CLUSTER_NODEID_SHIFT);
2849 }
2850
2851 static uint32_t
2852 state_hash(void *key)
2853 {
2854 stateid_t *ip = (stateid_t *)key;
2855
2856 return (ip->bits.ident);
2857 }
2858
2859 static bool_t
2860 state_compare(rfs4_entry_t u_entry, void *key)
2861 {
2862 rfs4_state_t *sp = (rfs4_state_t *)u_entry;
2863 stateid_t *id = (stateid_t *)key;
2864 bool_t rc;
2865
2866 rc = (sp->rs_stateid.bits.boottime == id->bits.boottime &&
2867 sp->rs_stateid.bits.ident == id->bits.ident);
2868
2869 return (rc);
2870 }
2871
2872 static void *
2873 state_mkkey(rfs4_entry_t u_entry)
2874 {
2875 rfs4_state_t *sp = (rfs4_state_t *)u_entry;
2876
2877 return (&sp->rs_stateid);
2878 }
2879
2880 static void
2881 rfs4_state_destroy(rfs4_entry_t u_entry)
2882 {
2883 rfs4_state_t *sp = (rfs4_state_t *)u_entry;
2884
2885 /* remove from openowner list */
2886 rfs4_dbe_lock(sp->rs_owner->ro_dbe);
2887 list_remove(&sp->rs_owner->ro_statelist, sp);
2888 rfs4_dbe_unlock(sp->rs_owner->ro_dbe);
2889
2890 list_destroy(&sp->rs_lostatelist);
2891
2892 /* release any share locks for this stateid if it's still open */
2893 if (!sp->rs_closed) {
2894 rfs4_dbe_lock(sp->rs_dbe);
2895 (void) rfs4_unshare(sp);
2896 rfs4_dbe_unlock(sp->rs_dbe);
2897 }
2898
2899 /* Were done with the file */
2900 rfs4_file_rele(sp->rs_finfo);
2901 sp->rs_finfo = NULL;
2902
2903 /* And now with the openowner */
2904 rfs4_openowner_rele(sp->rs_owner);
2905 sp->rs_owner = NULL;
2906 }
2907
2908 static void
2909 rfs4_state_rele_nounlock(rfs4_state_t *sp)
2910 {
2911 rfs4_dbe_rele(sp->rs_dbe);
2912 }
2913
2914 void
2915 rfs4_state_rele(rfs4_state_t *sp)
2916 {
2917 rw_exit(&sp->rs_finfo->rf_file_rwlock);
2918 rfs4_dbe_rele(sp->rs_dbe);
2919 }
2920
2921 static uint32_t
2922 deleg_hash(void *key)
2923 {
2924 rfs4_deleg_state_t *dsp = (rfs4_deleg_state_t *)key;
2925
2926 return (ADDRHASH(dsp->rds_client) ^ ADDRHASH(dsp->rds_finfo));
2927 }
2928
2929 static bool_t
2930 deleg_compare(rfs4_entry_t u_entry, void *key)
2931 {
2932 rfs4_deleg_state_t *dsp = (rfs4_deleg_state_t *)u_entry;
2933 rfs4_deleg_state_t *kdsp = (rfs4_deleg_state_t *)key;
2934
2935 return (dsp->rds_client == kdsp->rds_client &&
2936 dsp->rds_finfo == kdsp->rds_finfo);
2937 }
2938
2939 static void *
2940 deleg_mkkey(rfs4_entry_t u_entry)
2941 {
2942 return (u_entry);
2943 }
2944
2945 static uint32_t
2946 deleg_state_hash(void *key)
2947 {
2948 stateid_t *ip = (stateid_t *)key;
2949
2950 return (ip->bits.ident);
2951 }
2952
2953 static bool_t
2954 deleg_state_compare(rfs4_entry_t u_entry, void *key)
2955 {
2956 rfs4_deleg_state_t *dsp = (rfs4_deleg_state_t *)u_entry;
2957 stateid_t *id = (stateid_t *)key;
2958 bool_t rc;
2959
2960 if (id->bits.type != DELEGID)
2961 return (FALSE);
2962
2963 rc = (dsp->rds_delegid.bits.boottime == id->bits.boottime &&
2964 dsp->rds_delegid.bits.ident == id->bits.ident);
2965
2966 return (rc);
2967 }
2968
2969 static void *
2970 deleg_state_mkkey(rfs4_entry_t u_entry)
2971 {
2972 rfs4_deleg_state_t *dsp = (rfs4_deleg_state_t *)u_entry;
2973
2974 return (&dsp->rds_delegid);
2975 }
2976
2977 static bool_t
2978 rfs4_deleg_state_expiry(rfs4_entry_t u_entry)
2979 {
2980 rfs4_deleg_state_t *dsp = (rfs4_deleg_state_t *)u_entry;
2981
2982 if (rfs4_dbe_is_invalid(dsp->rds_dbe))
2983 return (TRUE);
2984
2985 if (dsp->rds_dtype == OPEN_DELEGATE_NONE)
2986 return (TRUE);
2987
2988 if ((gethrestime_sec() - dsp->rds_client->rc_last_access
2989 > rfs4_lease_time)) {
2990 rfs4_dbe_invalidate(dsp->rds_dbe);
2991 return (TRUE);
2992 }
2993
2994 return (FALSE);
2995 }
2996
2997 static bool_t
2998 rfs4_deleg_state_create(rfs4_entry_t u_entry, void *argp)
2999 {
3000 rfs4_deleg_state_t *dsp = (rfs4_deleg_state_t *)u_entry;
3001 rfs4_file_t *fp = ((rfs4_deleg_state_t *)argp)->rds_finfo;
3002 rfs4_client_t *cp = ((rfs4_deleg_state_t *)argp)->rds_client;
3003
3004 rfs4_dbe_hold(fp->rf_dbe);
3005 rfs4_dbe_hold(cp->rc_dbe);
3006
3007 dsp->rds_delegid = get_stateid(rfs4_dbe_getid(dsp->rds_dbe));
3008 dsp->rds_delegid.bits.type = DELEGID;
3009 dsp->rds_finfo = fp;
3010 dsp->rds_client = cp;
3011 dsp->rds_dtype = OPEN_DELEGATE_NONE;
3012
3013 dsp->rds_time_granted = gethrestime_sec(); /* observability */
3014 dsp->rds_time_revoked = 0;
3015
3016 list_link_init(&dsp->rds_node);
3017
3018 return (TRUE);
3019 }
3020
3021 static void
3022 rfs4_deleg_state_destroy(rfs4_entry_t u_entry)
3023 {
3024 rfs4_deleg_state_t *dsp = (rfs4_deleg_state_t *)u_entry;
3025
3026 /* return delegation if necessary */
3027 rfs4_return_deleg(dsp, FALSE);
3028
3029 /* Were done with the file */
3030 rfs4_file_rele(dsp->rds_finfo);
3031 dsp->rds_finfo = NULL;
3032
3033 /* And now with the openowner */
3034 rfs4_client_rele(dsp->rds_client);
3035 dsp->rds_client = NULL;
3036 }
3037
3038 rfs4_deleg_state_t *
3039 rfs4_finddeleg(rfs4_state_t *sp, bool_t *create)
3040 {
3041 rfs4_deleg_state_t ds, *dsp;
3042 nfs4_srv_t *nsrv4 = zone_getspecific(rfs4_zone_key, curzone);
3043
3044 ds.rds_client = sp->rs_owner->ro_client;
3045 ds.rds_finfo = sp->rs_finfo;
3046
3047 dsp = (rfs4_deleg_state_t *)rfs4_dbsearch(nsrv4->rfs4_deleg_idx, &ds,
3048 create, &ds, RFS4_DBS_VALID);
3049
3050 return (dsp);
3051 }
3052
3053 rfs4_deleg_state_t *
3054 rfs4_finddelegstate(stateid_t *id)
3055 {
3056 rfs4_deleg_state_t *dsp;
3057 bool_t create = FALSE;
3058 nfs4_srv_t *nsrv4 = zone_getspecific(rfs4_zone_key, curzone);
3059
3060 dsp = (rfs4_deleg_state_t *)rfs4_dbsearch(nsrv4->rfs4_deleg_state_idx,
3061 id, &create, NULL, RFS4_DBS_VALID);
3062
3063 return (dsp);
3064 }
3065
3066 void
3067 rfs4_deleg_state_rele(rfs4_deleg_state_t *dsp)
3068 {
3069 rfs4_dbe_rele(dsp->rds_dbe);
3070 }
3071
3072 void
3073 rfs4_update_lock_sequence(rfs4_lo_state_t *lsp)
3074 {
3075
3076 rfs4_dbe_lock(lsp->rls_dbe);
3077
3078 /*
3079 * If we are skipping sequence id checking, this means that
3080 * this is the first lock request and therefore the sequence
3081 * id does not need to be updated. This only happens on the
3082 * first lock request for a lockowner
3083 */
3084 if (!lsp->rls_skip_seqid_check)
3085 lsp->rls_seqid++;
3086
3087 rfs4_dbe_unlock(lsp->rls_dbe);
3088 }
3089
3090 void
3091 rfs4_update_lock_resp(rfs4_lo_state_t *lsp, nfs_resop4 *resp)
3092 {
3093
3094 rfs4_dbe_lock(lsp->rls_dbe);
3095
3096 rfs4_free_reply(&lsp->rls_reply);
3097
3098 rfs4_copy_reply(&lsp->rls_reply, resp);
3099
3100 rfs4_dbe_unlock(lsp->rls_dbe);
3101 }
3102
3103 void
3104 rfs4_free_opens(rfs4_openowner_t *oo, bool_t invalidate,
3105 bool_t close_of_client)
3106 {
3107 rfs4_state_t *sp;
3108
3109 rfs4_dbe_lock(oo->ro_dbe);
3110
3111 for (sp = list_head(&oo->ro_statelist); sp != NULL;
3112 sp = list_next(&oo->ro_statelist, sp)) {
3113 rfs4_state_close(sp, FALSE, close_of_client, CRED());
3114 if (invalidate == TRUE)
3115 rfs4_dbe_invalidate(sp->rs_dbe);
3116 }
3117
3118 rfs4_dbe_invalidate(oo->ro_dbe);
3119 rfs4_dbe_unlock(oo->ro_dbe);
3120 }
3121
3122 static uint32_t
3123 state_owner_file_hash(void *key)
3124 {
3125 rfs4_state_t *sp = key;
3126
3127 return (ADDRHASH(sp->rs_owner) ^ ADDRHASH(sp->rs_finfo));
3128 }
3129
3130 static bool_t
3131 state_owner_file_compare(rfs4_entry_t u_entry, void *key)
3132 {
3133 rfs4_state_t *sp = (rfs4_state_t *)u_entry;
3134 rfs4_state_t *arg = key;
3135
3136 if (sp->rs_closed == TRUE)
3137 return (FALSE);
3138
3139 return (arg->rs_owner == sp->rs_owner && arg->rs_finfo == sp->rs_finfo);
3140 }
3141
3142 static void *
3143 state_owner_file_mkkey(rfs4_entry_t u_entry)
3144 {
3145 return (u_entry);
3146 }
3147
3148 static uint32_t
3149 state_file_hash(void *key)
3150 {
3151 return (ADDRHASH(key));
3152 }
3153
3154 static bool_t
3155 state_file_compare(rfs4_entry_t u_entry, void *key)
3156 {
3157 rfs4_state_t *sp = (rfs4_state_t *)u_entry;
3158 rfs4_file_t *fp = key;
3159
3160 if (sp->rs_closed == TRUE)
3161 return (FALSE);
3162
3163 return (fp == sp->rs_finfo);
3164 }
3165
3166 static void *
3167 state_file_mkkey(rfs4_entry_t u_entry)
3168 {
3169 rfs4_state_t *sp = (rfs4_state_t *)u_entry;
3170
3171 return (sp->rs_finfo);
3172 }
3173
3174 rfs4_state_t *
3175 rfs4_findstate_by_owner_file(rfs4_openowner_t *oo, rfs4_file_t *fp,
3176 bool_t *create)
3177 {
3178 rfs4_state_t *sp;
3179 rfs4_state_t key;
3180 nfs4_srv_t *nsrv4 = zone_getspecific(rfs4_zone_key, curzone);
3181
3182 key.rs_owner = oo;
3183 key.rs_finfo = fp;
3184
3185 sp = (rfs4_state_t *)rfs4_dbsearch(nsrv4->rfs4_state_owner_file_idx,
3186 &key, create, &key, RFS4_DBS_VALID);
3187
3188 return (sp);
3189 }
3190
3191 /* This returns ANY state struct that refers to this file */
3192 static rfs4_state_t *
3193 rfs4_findstate_by_file(rfs4_file_t *fp)
3194 {
3195 bool_t create = FALSE;
3196 nfs4_srv_t *nsrv4 = zone_getspecific(rfs4_zone_key, curzone);
3197
3198 return ((rfs4_state_t *)rfs4_dbsearch(nsrv4->rfs4_state_file_idx, fp,
3199 &create, fp, RFS4_DBS_VALID));
3200 }
3201
3202 static bool_t
3203 rfs4_state_expiry(rfs4_entry_t u_entry)
3204 {
3205 rfs4_state_t *sp = (rfs4_state_t *)u_entry;
3206
3207 if (rfs4_dbe_is_invalid(sp->rs_dbe))
3208 return (TRUE);
3209
3210 if (sp->rs_closed == TRUE &&
3211 ((gethrestime_sec() - rfs4_dbe_get_timerele(sp->rs_dbe))
3212 > rfs4_lease_time))
3213 return (TRUE);
3214
3215 return ((gethrestime_sec() - sp->rs_owner->ro_client->rc_last_access
3216 > rfs4_lease_time));
3217 }
3218
3219 static bool_t
3220 rfs4_state_create(rfs4_entry_t u_entry, void *argp)
3221 {
3222 rfs4_state_t *sp = (rfs4_state_t *)u_entry;
3223 rfs4_file_t *fp = ((rfs4_state_t *)argp)->rs_finfo;
3224 rfs4_openowner_t *oo = ((rfs4_state_t *)argp)->rs_owner;
3225
3226 rfs4_dbe_hold(fp->rf_dbe);
3227 rfs4_dbe_hold(oo->ro_dbe);
3228 sp->rs_stateid = get_stateid(rfs4_dbe_getid(sp->rs_dbe));
3229 sp->rs_stateid.bits.type = OPENID;
3230 sp->rs_owner = oo;
3231 sp->rs_finfo = fp;
3232
3233 list_create(&sp->rs_lostatelist, sizeof (rfs4_lo_state_t),
3234 offsetof(rfs4_lo_state_t, rls_node));
3235
3236 /* Insert state on per open owner's list */
3237 rfs4_dbe_lock(oo->ro_dbe);
3238 list_insert_tail(&oo->ro_statelist, sp);
3239 rfs4_dbe_unlock(oo->ro_dbe);
3240
3241 return (TRUE);
3242 }
3243
3244 static rfs4_state_t *
3245 rfs4_findstate(stateid_t *id, rfs4_dbsearch_type_t find_invalid, bool_t lock_fp)
3246 {
3247 rfs4_state_t *sp;
3248 bool_t create = FALSE;
3249 nfs4_srv_t *nsrv4 = zone_getspecific(rfs4_zone_key, curzone);
3250
3251 sp = (rfs4_state_t *)rfs4_dbsearch(nsrv4->rfs4_state_idx, id,
3252 &create, NULL, find_invalid);
3253 if (lock_fp == TRUE && sp != NULL)
3254 rw_enter(&sp->rs_finfo->rf_file_rwlock, RW_READER);
3255
3256 return (sp);
3257 }
3258
3259 void
3260 rfs4_state_close(rfs4_state_t *sp, bool_t lock_held, bool_t close_of_client,
3261 cred_t *cr)
3262 {
3263 /* Remove the associated lo_state owners */
3264 if (!lock_held)
3265 rfs4_dbe_lock(sp->rs_dbe);
3266
3267 /*
3268 * If refcnt == 0, the dbe is about to be destroyed.
3269 * lock state will be released by the reaper thread.
3270 */
3271
3272 if (rfs4_dbe_refcnt(sp->rs_dbe) > 0) {
3273 if (sp->rs_closed == FALSE) {
3274 rfs4_release_share_lock_state(sp, cr, close_of_client);
3275 sp->rs_closed = TRUE;
3276 }
3277 }
3278
3279 if (!lock_held)
3280 rfs4_dbe_unlock(sp->rs_dbe);
3281 }
3282
3283 /*
3284 * Remove all state associated with the given client.
3285 */
3286 void
3287 rfs4_client_state_remove(rfs4_client_t *cp)
3288 {
3289 rfs4_openowner_t *oo;
3290
3291 rfs4_dbe_lock(cp->rc_dbe);
3292
3293 for (oo = list_head(&cp->rc_openownerlist); oo != NULL;
3294 oo = list_next(&cp->rc_openownerlist, oo)) {
3295 rfs4_free_opens(oo, TRUE, TRUE);
3296 }
3297
3298 rfs4_dbe_unlock(cp->rc_dbe);
3299 }
3300
3301 void
3302 rfs4_client_close(rfs4_client_t *cp)
3303 {
3304 /* Mark client as going away. */
3305 rfs4_dbe_lock(cp->rc_dbe);
3306 rfs4_dbe_invalidate(cp->rc_dbe);
3307 rfs4_dbe_unlock(cp->rc_dbe);
3308
3309 rfs4_client_state_remove(cp);
3310
3311 /* Release the client */
3312 rfs4_client_rele(cp);
3313 }
3314
3315 nfsstat4
3316 rfs4_check_clientid(clientid4 *cp, int setclid_confirm)
3317 {
3318 cid *cidp = (cid *) cp;
3319 nfs4_srv_t *nsrv4;
3320
3321 nsrv4 = zone_getspecific(rfs4_zone_key, curzone);
3322
3323 /*
3324 * If we are booted as a cluster node, check the embedded nodeid.
3325 * If it indicates that this clientid was generated on another node,
3326 * inform the client accordingly.
3327 */
3328 if (cluster_bootflags & CLUSTER_BOOTED && foreign_clientid(cidp))
3329 return (NFS4ERR_STALE_CLIENTID);
3330
3331 /*
3332 * If the server start time matches the time provided
3333 * by the client (via the clientid) and this is NOT a
3334 * setclientid_confirm then return EXPIRED.
3335 */
3336 if (!setclid_confirm &&
3337 cidp->impl_id.start_time == nsrv4->rfs4_start_time)
3338 return (NFS4ERR_EXPIRED);
3339
3340 return (NFS4ERR_STALE_CLIENTID);
3341 }
3342
3343 /*
3344 * This is used when a stateid has not been found amongst the
3345 * current server's state. Check the stateid to see if it
3346 * was from this server instantiation or not.
3347 */
3348 static nfsstat4
3349 what_stateid_error(stateid_t *id, stateid_type_t type)
3350 {
3351 nfs4_srv_t *nsrv4;
3352
3353 nsrv4 = zone_getspecific(rfs4_zone_key, curzone);
3354
3355 /* If we are booted as a cluster node, was stateid locally generated? */
3356 if ((cluster_bootflags & CLUSTER_BOOTED) && foreign_stateid(id))
3357 return (NFS4ERR_STALE_STATEID);
3358
3359 /* If types don't match then no use checking further */
3360 if (type != id->bits.type)
3361 return (NFS4ERR_BAD_STATEID);
3362
3363 /* From a different server instantiation, return STALE */
3364 if (id->bits.boottime != nsrv4->rfs4_start_time)
3365 return (NFS4ERR_STALE_STATEID);
3366
3367 /*
3368 * From this server but the state is most likely beyond lease
3369 * timeout: return NFS4ERR_EXPIRED. However, there is the
3370 * case of a delegation stateid. For delegations, there is a
3371 * case where the state can be removed without the client's
3372 * knowledge/consent: revocation. In the case of delegation
3373 * revocation, the delegation state will be removed and will
3374 * not be found. If the client does something like a
3375 * DELEGRETURN or even a READ/WRITE with a delegatoin stateid
3376 * that has been revoked, the server should return BAD_STATEID
3377 * instead of the more common EXPIRED error.
3378 */
3379 if (id->bits.boottime == nsrv4->rfs4_start_time) {
3380 if (type == DELEGID)
3381 return (NFS4ERR_BAD_STATEID);
3382 else
3383 return (NFS4ERR_EXPIRED);
3384 }
3385
3386 return (NFS4ERR_BAD_STATEID);
3387 }
3388
3389 /*
3390 * Used later on to find the various state structs. When called from
3391 * rfs4_check_stateid()->rfs4_get_all_state(), no file struct lock is
3392 * taken (it is not needed) and helps on the read/write path with
3393 * respect to performance.
3394 */
3395 static nfsstat4
3396 rfs4_get_state_lockit(stateid4 *stateid, rfs4_state_t **spp,
3397 rfs4_dbsearch_type_t find_invalid, bool_t lock_fp)
3398 {
3399 stateid_t *id = (stateid_t *)stateid;
3400 rfs4_state_t *sp;
3401
3402 *spp = NULL;
3403
3404 /* If we are booted as a cluster node, was stateid locally generated? */
3405 if ((cluster_bootflags & CLUSTER_BOOTED) && foreign_stateid(id))
3406 return (NFS4ERR_STALE_STATEID);
3407
3408 sp = rfs4_findstate(id, find_invalid, lock_fp);
3409 if (sp == NULL) {
3410 return (what_stateid_error(id, OPENID));
3411 }
3412
3413 if (rfs4_lease_expired(sp->rs_owner->ro_client)) {
3414 if (lock_fp == TRUE)
3415 rfs4_state_rele(sp);
3416 else
3417 rfs4_state_rele_nounlock(sp);
3418 return (NFS4ERR_EXPIRED);
3419 }
3420
3421 *spp = sp;
3422
3423 return (NFS4_OK);
3424 }
3425
3426 nfsstat4
3427 rfs4_get_state(stateid4 *stateid, rfs4_state_t **spp,
3428 rfs4_dbsearch_type_t find_invalid)
3429 {
3430 return (rfs4_get_state_lockit(stateid, spp, find_invalid, TRUE));
3431 }
3432
3433 int
3434 rfs4_check_stateid_seqid(rfs4_state_t *sp, stateid4 *stateid)
3435 {
3436 stateid_t *id = (stateid_t *)stateid;
3437
3438 if (rfs4_lease_expired(sp->rs_owner->ro_client))
3439 return (NFS4_CHECK_STATEID_EXPIRED);
3440
3441 /* Stateid is some time in the future - that's bad */
3442 if (sp->rs_stateid.bits.chgseq < id->bits.chgseq)
3443 return (NFS4_CHECK_STATEID_BAD);
3444
3445 if (sp->rs_stateid.bits.chgseq == id->bits.chgseq + 1)
3446 return (NFS4_CHECK_STATEID_REPLAY);
3447
3448 /* Stateid is some time in the past - that's old */
3449 if (sp->rs_stateid.bits.chgseq > id->bits.chgseq)
3450 return (NFS4_CHECK_STATEID_OLD);
3451
3452 /* Caller needs to know about confirmation before closure */
3453 if (sp->rs_owner->ro_need_confirm)
3454 return (NFS4_CHECK_STATEID_UNCONFIRMED);
3455
3456 if (sp->rs_closed == TRUE)
3457 return (NFS4_CHECK_STATEID_CLOSED);
3458
3459 return (NFS4_CHECK_STATEID_OKAY);
3460 }
3461
3462 int
3463 rfs4_check_lo_stateid_seqid(rfs4_lo_state_t *lsp, stateid4 *stateid)
3464 {
3465 stateid_t *id = (stateid_t *)stateid;
3466
3467 if (rfs4_lease_expired(lsp->rls_state->rs_owner->ro_client))
3468 return (NFS4_CHECK_STATEID_EXPIRED);
3469
3470 /* Stateid is some time in the future - that's bad */
3471 if (lsp->rls_lockid.bits.chgseq < id->bits.chgseq)
3472 return (NFS4_CHECK_STATEID_BAD);
3473
3474 if (lsp->rls_lockid.bits.chgseq == id->bits.chgseq + 1)
3475 return (NFS4_CHECK_STATEID_REPLAY);
3476
3477 /* Stateid is some time in the past - that's old */
3478 if (lsp->rls_lockid.bits.chgseq > id->bits.chgseq)
3479 return (NFS4_CHECK_STATEID_OLD);
3480
3481 if (lsp->rls_state->rs_closed == TRUE)
3482 return (NFS4_CHECK_STATEID_CLOSED);
3483
3484 return (NFS4_CHECK_STATEID_OKAY);
3485 }
3486
3487 nfsstat4
3488 rfs4_get_deleg_state(stateid4 *stateid, rfs4_deleg_state_t **dspp)
3489 {
3490 stateid_t *id = (stateid_t *)stateid;
3491 rfs4_deleg_state_t *dsp;
3492
3493 *dspp = NULL;
3494
3495 /* If we are booted as a cluster node, was stateid locally generated? */
3496 if ((cluster_bootflags & CLUSTER_BOOTED) && foreign_stateid(id))
3497 return (NFS4ERR_STALE_STATEID);
3498
3499 dsp = rfs4_finddelegstate(id);
3500 if (dsp == NULL) {
3501 return (what_stateid_error(id, DELEGID));
3502 }
3503
3504 if (rfs4_lease_expired(dsp->rds_client)) {
3505 rfs4_deleg_state_rele(dsp);
3506 return (NFS4ERR_EXPIRED);
3507 }
3508
3509 *dspp = dsp;
3510
3511 return (NFS4_OK);
3512 }
3513
3514 nfsstat4
3515 rfs4_get_lo_state(stateid4 *stateid, rfs4_lo_state_t **lspp, bool_t lock_fp)
3516 {
3517 stateid_t *id = (stateid_t *)stateid;
3518 rfs4_lo_state_t *lsp;
3519
3520 *lspp = NULL;
3521
3522 /* If we are booted as a cluster node, was stateid locally generated? */
3523 if ((cluster_bootflags & CLUSTER_BOOTED) && foreign_stateid(id))
3524 return (NFS4ERR_STALE_STATEID);
3525
3526 lsp = rfs4_findlo_state(id, lock_fp);
3527 if (lsp == NULL) {
3528 return (what_stateid_error(id, LOCKID));
3529 }
3530
3531 if (rfs4_lease_expired(lsp->rls_state->rs_owner->ro_client)) {
3532 rfs4_lo_state_rele(lsp, lock_fp);
3533 return (NFS4ERR_EXPIRED);
3534 }
3535
3536 *lspp = lsp;
3537
3538 return (NFS4_OK);
3539 }
3540
3541 static nfsstat4
3542 rfs4_get_all_state(stateid4 *sid, rfs4_state_t **spp,
3543 rfs4_deleg_state_t **dspp, rfs4_lo_state_t **lspp)
3544 {
3545 rfs4_state_t *sp = NULL;
3546 rfs4_deleg_state_t *dsp = NULL;
3547 rfs4_lo_state_t *lsp = NULL;
3548 stateid_t *id;
3549 nfsstat4 status;
3550
3551 *spp = NULL; *dspp = NULL; *lspp = NULL;
3552
3553 id = (stateid_t *)sid;
3554 switch (id->bits.type) {
3555 case OPENID:
3556 status = rfs4_get_state_lockit(sid, &sp, FALSE, FALSE);
3557 break;
3558 case DELEGID:
3559 status = rfs4_get_deleg_state(sid, &dsp);
3560 break;
3561 case LOCKID:
3562 status = rfs4_get_lo_state(sid, &lsp, FALSE);
3563 if (status == NFS4_OK) {
3564 sp = lsp->rls_state;
3565 rfs4_dbe_hold(sp->rs_dbe);
3566 }
3567 break;
3568 default:
3569 status = NFS4ERR_BAD_STATEID;
3570 }
3571
3572 if (status == NFS4_OK) {
3573 *spp = sp;
3574 *dspp = dsp;
3575 *lspp = lsp;
3576 }
3577
3578 return (status);
3579 }
3580
3581 /*
3582 * Given the I/O mode (FREAD or FWRITE), this checks whether the
3583 * rfs4_state_t struct has access to do this operation and if so
3584 * return NFS4_OK; otherwise the proper NFSv4 error is returned.
3585 */
3586 nfsstat4
3587 rfs4_state_has_access(rfs4_state_t *sp, int mode, vnode_t *vp)
3588 {
3589 nfsstat4 stat = NFS4_OK;
3590 rfs4_file_t *fp;
3591 bool_t create = FALSE;
3592
3593 rfs4_dbe_lock(sp->rs_dbe);
3594 if (mode == FWRITE) {
3595 if (!(sp->rs_share_access & OPEN4_SHARE_ACCESS_WRITE)) {
3596 stat = NFS4ERR_OPENMODE;
3597 }
3598 } else if (mode == FREAD) {
3599 if (!(sp->rs_share_access & OPEN4_SHARE_ACCESS_READ)) {
3600 /*
3601 * If we have OPENed the file with DENYing access
3602 * to both READ and WRITE then no one else could
3603 * have OPENed the file, hence no conflicting READ
3604 * deny. This check is merely an optimization.
3605 */
3606 if (sp->rs_share_deny == OPEN4_SHARE_DENY_BOTH)
3607 goto out;
3608
3609 /* Check against file struct's DENY mode */
3610 fp = rfs4_findfile(vp, NULL, &create);
3611 if (fp != NULL) {
3612 int deny_read = 0;
3613 rfs4_dbe_lock(fp->rf_dbe);
3614 /*
3615 * Check if any other open owner has the file
3616 * OPENed with deny READ.
3617 */
3618 if (sp->rs_share_deny & OPEN4_SHARE_DENY_READ)
3619 deny_read = 1;
3620 ASSERT(fp->rf_deny_read >= deny_read);
3621 if (fp->rf_deny_read > deny_read)
3622 stat = NFS4ERR_OPENMODE;
3623 rfs4_dbe_unlock(fp->rf_dbe);
3624 rfs4_file_rele(fp);
3625 }
3626 }
3627 } else {
3628 /* Illegal I/O mode */
3629 stat = NFS4ERR_INVAL;
3630 }
3631 out:
3632 rfs4_dbe_unlock(sp->rs_dbe);
3633 return (stat);
3634 }
3635
3636 /*
3637 * Given the I/O mode (FREAD or FWRITE), the vnode, the stateid and whether
3638 * the file is being truncated, return NFS4_OK if allowed or appropriate
3639 * V4 error if not. Note NFS4ERR_DELAY will be returned and a recall on
3640 * the associated file will be done if the I/O is not consistent with any
3641 * delegation in effect on the file. Should be holding VOP_RWLOCK, either
3642 * as reader or writer as appropriate. rfs4_op_open will acquire the
3643 * VOP_RWLOCK as writer when setting up delegation. If the stateid is bad
3644 * this routine will return NFS4ERR_BAD_STATEID. In addition, through the
3645 * deleg parameter, we will return whether a write delegation is held by
3646 * the client associated with this stateid.
3647 * If the server instance associated with the relevant client is in its
3648 * grace period, return NFS4ERR_GRACE.
3649 */
3650
3651 nfsstat4
3652 rfs4_check_stateid(int mode, vnode_t *vp,
3653 stateid4 *stateid, bool_t trunc, bool_t *deleg,
3654 bool_t do_access, caller_context_t *ct)
3655 {
3656 rfs4_file_t *fp;
3657 bool_t create = FALSE;
3658 rfs4_state_t *sp;
3659 rfs4_deleg_state_t *dsp;
3660 rfs4_lo_state_t *lsp;
3661 stateid_t *id = (stateid_t *)stateid;
3662 nfsstat4 stat = NFS4_OK;
3663
3664 if (ct != NULL) {
3665 ct->cc_sysid = 0;
3666 ct->cc_pid = 0;
3667 ct->cc_caller_id = nfs4_srv_caller_id;
3668 ct->cc_flags = CC_DONTBLOCK;
3669 }
3670
3671 if (ISSPECIAL(stateid)) {
3672 fp = rfs4_findfile(vp, NULL, &create);
3673 if (fp == NULL)
3674 return (NFS4_OK);
3675 if (fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_NONE) {
3676 rfs4_file_rele(fp);
3677 return (NFS4_OK);
3678 }
3679 if (mode == FWRITE ||
3680 fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_WRITE) {
3681 rfs4_recall_deleg(fp, trunc, NULL);
3682 rfs4_file_rele(fp);
3683 return (NFS4ERR_DELAY);
3684 }
3685 rfs4_file_rele(fp);
3686 return (NFS4_OK);
3687 } else {
3688 stat = rfs4_get_all_state(stateid, &sp, &dsp, &lsp);
3689 if (stat != NFS4_OK)
3690 return (stat);
3691 if (lsp != NULL) {
3692 /* Is associated server instance in its grace period? */
3693 if (rfs4_clnt_in_grace(lsp->rls_locker->rl_client)) {
3694 rfs4_lo_state_rele(lsp, FALSE);
3695 if (sp != NULL)
3696 rfs4_state_rele_nounlock(sp);
3697 return (NFS4ERR_GRACE);
3698 }
3699 if (id->bits.type == LOCKID) {
3700 /* Seqid in the future? - that's bad */
3701 if (lsp->rls_lockid.bits.chgseq <
3702 id->bits.chgseq) {
3703 rfs4_lo_state_rele(lsp, FALSE);
3704 if (sp != NULL)
3705 rfs4_state_rele_nounlock(sp);
3706 return (NFS4ERR_BAD_STATEID);
3707 }
3708 /* Seqid in the past? - that's old */
3709 if (lsp->rls_lockid.bits.chgseq >
3710 id->bits.chgseq) {
3711 rfs4_lo_state_rele(lsp, FALSE);
3712 if (sp != NULL)
3713 rfs4_state_rele_nounlock(sp);
3714 return (NFS4ERR_OLD_STATEID);
3715 }
3716 /* Ensure specified filehandle matches */
3717 if (lsp->rls_state->rs_finfo->rf_vp != vp) {
3718 rfs4_lo_state_rele(lsp, FALSE);
3719 if (sp != NULL)
3720 rfs4_state_rele_nounlock(sp);
3721 return (NFS4ERR_BAD_STATEID);
3722 }
3723 }
3724 if (ct != NULL) {
3725 ct->cc_sysid =
3726 lsp->rls_locker->rl_client->rc_sysidt;
3727 ct->cc_pid = lsp->rls_locker->rl_pid;
3728 }
3729 rfs4_lo_state_rele(lsp, FALSE);
3730 }
3731
3732 /* Stateid provided was an "open" stateid */
3733 if (sp != NULL) {
3734 /* Is associated server instance in its grace period? */
3735 if (rfs4_clnt_in_grace(sp->rs_owner->ro_client)) {
3736 rfs4_state_rele_nounlock(sp);
3737 return (NFS4ERR_GRACE);
3738 }
3739 if (id->bits.type == OPENID) {
3740 /* Seqid in the future? - that's bad */
3741 if (sp->rs_stateid.bits.chgseq <
3742 id->bits.chgseq) {
3743 rfs4_state_rele_nounlock(sp);
3744 return (NFS4ERR_BAD_STATEID);
3745 }
3746 /* Seqid in the past - that's old */
3747 if (sp->rs_stateid.bits.chgseq >
3748 id->bits.chgseq) {
3749 rfs4_state_rele_nounlock(sp);
3750 return (NFS4ERR_OLD_STATEID);
3751 }
3752 }
3753 /* Ensure specified filehandle matches */
3754 if (sp->rs_finfo->rf_vp != vp) {
3755 rfs4_state_rele_nounlock(sp);
3756 return (NFS4ERR_BAD_STATEID);
3757 }
3758
3759 if (sp->rs_owner->ro_need_confirm) {
3760 rfs4_state_rele_nounlock(sp);
3761 return (NFS4ERR_BAD_STATEID);
3762 }
3763
3764 if (sp->rs_closed == TRUE) {
3765 rfs4_state_rele_nounlock(sp);
3766 return (NFS4ERR_OLD_STATEID);
3767 }
3768
3769 if (do_access)
3770 stat = rfs4_state_has_access(sp, mode, vp);
3771 else
3772 stat = NFS4_OK;
3773
3774 /*
3775 * Return whether this state has write
3776 * delegation if desired
3777 */
3778 if (deleg && (sp->rs_finfo->rf_dinfo.rd_dtype ==
3779 OPEN_DELEGATE_WRITE))
3780 *deleg = TRUE;
3781
3782 /*
3783 * We got a valid stateid, so we update the
3784 * lease on the client. Ideally we would like
3785 * to do this after the calling op succeeds,
3786 * but for now this will be good
3787 * enough. Callers of this routine are
3788 * currently insulated from the state stuff.
3789 */
3790 rfs4_update_lease(sp->rs_owner->ro_client);
3791
3792 /*
3793 * If a delegation is present on this file and
3794 * this is a WRITE, then update the lastwrite
3795 * time to indicate that activity is present.
3796 */
3797 if (sp->rs_finfo->rf_dinfo.rd_dtype ==
3798 OPEN_DELEGATE_WRITE &&
3799 mode == FWRITE) {
3800 sp->rs_finfo->rf_dinfo.rd_time_lastwrite =
3801 gethrestime_sec();
3802 }
3803
3804 rfs4_state_rele_nounlock(sp);
3805
3806 return (stat);
3807 }
3808
3809 if (dsp != NULL) {
3810 /* Is associated server instance in its grace period? */
3811 if (rfs4_clnt_in_grace(dsp->rds_client)) {
3812 rfs4_deleg_state_rele(dsp);
3813 return (NFS4ERR_GRACE);
3814 }
3815 if (dsp->rds_delegid.bits.chgseq != id->bits.chgseq) {
3816 rfs4_deleg_state_rele(dsp);
3817 return (NFS4ERR_BAD_STATEID);
3818 }
3819
3820 /* Ensure specified filehandle matches */
3821 if (dsp->rds_finfo->rf_vp != vp) {
3822 rfs4_deleg_state_rele(dsp);
3823 return (NFS4ERR_BAD_STATEID);
3824 }
3825 /*
3826 * Return whether this state has write
3827 * delegation if desired
3828 */
3829 if (deleg && (dsp->rds_finfo->rf_dinfo.rd_dtype ==
3830 OPEN_DELEGATE_WRITE))
3831 *deleg = TRUE;
3832
3833 rfs4_update_lease(dsp->rds_client);
3834
3835 /*
3836 * If a delegation is present on this file and
3837 * this is a WRITE, then update the lastwrite
3838 * time to indicate that activity is present.
3839 */
3840 if (dsp->rds_finfo->rf_dinfo.rd_dtype ==
3841 OPEN_DELEGATE_WRITE && mode == FWRITE) {
3842 dsp->rds_finfo->rf_dinfo.rd_time_lastwrite =
3843 gethrestime_sec();
3844 }
3845
3846 /*
3847 * XXX - what happens if this is a WRITE and the
3848 * delegation type of for READ.
3849 */
3850 rfs4_deleg_state_rele(dsp);
3851
3852 return (stat);
3853 }
3854 /*
3855 * If we got this far, something bad happened
3856 */
3857 return (NFS4ERR_BAD_STATEID);
3858 }
3859 }
3860
3861
3862 /*
3863 * This is a special function in that for the file struct provided the
3864 * server wants to remove/close all current state associated with the
3865 * file. The prime use of this would be with OP_REMOVE to force the
3866 * release of state and particularly of file locks.
3867 *
3868 * There is an assumption that there is no delegations outstanding on
3869 * this file at this point. The caller should have waited for those
3870 * to be returned or revoked.
3871 */
3872 void
3873 rfs4_close_all_state(rfs4_file_t *fp)
3874 {
3875 rfs4_state_t *sp;
3876
3877 rfs4_dbe_lock(fp->rf_dbe);
3878
3879 #ifdef DEBUG
3880 /* only applies when server is handing out delegations */
3881 if (nfs4_get_deleg_policy() != SRV_NEVER_DELEGATE)
3882 ASSERT(fp->rf_dinfo.rd_hold_grant > 0);
3883 #endif
3884
3885 /* No delegations for this file */
3886 ASSERT(list_is_empty(&fp->rf_delegstatelist));
3887
3888 /* Make sure that it can not be found */
3889 rfs4_dbe_invalidate(fp->rf_dbe);
3890
3891 if (fp->rf_vp == NULL) {
3892 rfs4_dbe_unlock(fp->rf_dbe);
3893 return;
3894 }
3895 rfs4_dbe_unlock(fp->rf_dbe);
3896
3897 /*
3898 * Hold as writer to prevent other server threads from
3899 * processing requests related to the file while all state is
3900 * being removed.
3901 */
3902 rw_enter(&fp->rf_file_rwlock, RW_WRITER);
3903
3904 /* Remove ALL state from the file */
3905 while (sp = rfs4_findstate_by_file(fp)) {
3906 rfs4_state_close(sp, FALSE, FALSE, CRED());
3907 rfs4_state_rele_nounlock(sp);
3908 }
3909
3910 /*
3911 * This is only safe since there are no further references to
3912 * the file.
3913 */
3914 rfs4_dbe_lock(fp->rf_dbe);
3915 if (fp->rf_vp) {
3916 vnode_t *vp = fp->rf_vp;
3917
3918 mutex_enter(&vp->v_vsd_lock);
3919 (void) vsd_set(vp, nfs4_srv_vkey, NULL);
3920 mutex_exit(&vp->v_vsd_lock);
3921 VN_RELE(vp);
3922 fp->rf_vp = NULL;
3923 }
3924 rfs4_dbe_unlock(fp->rf_dbe);
3925
3926 /* Finally let other references to proceed */
3927 rw_exit(&fp->rf_file_rwlock);
3928 }
3929
3930 /*
3931 * This function is used as a target for the rfs4_dbe_walk() call
3932 * below. The purpose of this function is to see if the
3933 * lockowner_state refers to a file that resides within the exportinfo
3934 * export. If so, then remove the lock_owner state (file locks and
3935 * share "locks") for this object since the intent is the server is
3936 * unexporting the specified directory. Be sure to invalidate the
3937 * object after the state has been released
3938 */
3939 static void
3940 rfs4_lo_state_walk_callout(rfs4_entry_t u_entry, void *e)
3941 {
3942 rfs4_lo_state_t *lsp = (rfs4_lo_state_t *)u_entry;
3943 struct exportinfo *exi = (struct exportinfo *)e;
3944 nfs_fh4_fmt_t fhfmt4, *exi_fhp, *finfo_fhp;
3945 fhandle_t *efhp;
3946
3947 efhp = (fhandle_t *)&exi->exi_fh;
3948 exi_fhp = (nfs_fh4_fmt_t *)&fhfmt4;
3949
3950 FH_TO_FMT4(efhp, exi_fhp);
3951
3952 finfo_fhp = (nfs_fh4_fmt_t *)lsp->rls_state->rs_finfo->
3953 rf_filehandle.nfs_fh4_val;
3954
3955 if (EQFSID(&finfo_fhp->fh4_fsid, &exi_fhp->fh4_fsid) &&
3956 bcmp(&finfo_fhp->fh4_xdata, &exi_fhp->fh4_xdata,
3957 exi_fhp->fh4_xlen) == 0) {
3958 rfs4_state_close(lsp->rls_state, FALSE, FALSE, CRED());
3959 rfs4_dbe_invalidate(lsp->rls_dbe);
3960 rfs4_dbe_invalidate(lsp->rls_state->rs_dbe);
3961 }
3962 }
3963
3964 /*
3965 * This function is used as a target for the rfs4_dbe_walk() call
3966 * below. The purpose of this function is to see if the state refers
3967 * to a file that resides within the exportinfo export. If so, then
3968 * remove the open state for this object since the intent is the
3969 * server is unexporting the specified directory. The main result for
3970 * this type of entry is to invalidate it such it will not be found in
3971 * the future.
3972 */
3973 static void
3974 rfs4_state_walk_callout(rfs4_entry_t u_entry, void *e)
3975 {
3976 rfs4_state_t *sp = (rfs4_state_t *)u_entry;
3977 struct exportinfo *exi = (struct exportinfo *)e;
3978 nfs_fh4_fmt_t fhfmt4, *exi_fhp, *finfo_fhp;
3979 fhandle_t *efhp;
3980
3981 efhp = (fhandle_t *)&exi->exi_fh;
3982 exi_fhp = (nfs_fh4_fmt_t *)&fhfmt4;
3983
3984 FH_TO_FMT4(efhp, exi_fhp);
3985
3986 finfo_fhp =
3987 (nfs_fh4_fmt_t *)sp->rs_finfo->rf_filehandle.nfs_fh4_val;
3988
3989 if (EQFSID(&finfo_fhp->fh4_fsid, &exi_fhp->fh4_fsid) &&
3990 bcmp(&finfo_fhp->fh4_xdata, &exi_fhp->fh4_xdata,
3991 exi_fhp->fh4_xlen) == 0) {
3992 rfs4_state_close(sp, TRUE, FALSE, CRED());
3993 rfs4_dbe_invalidate(sp->rs_dbe);
3994 }
3995 }
3996
3997 /*
3998 * This function is used as a target for the rfs4_dbe_walk() call
3999 * below. The purpose of this function is to see if the state refers
4000 * to a file that resides within the exportinfo export. If so, then
4001 * remove the deleg state for this object since the intent is the
4002 * server is unexporting the specified directory. The main result for
4003 * this type of entry is to invalidate it such it will not be found in
4004 * the future.
4005 */
4006 static void
4007 rfs4_deleg_state_walk_callout(rfs4_entry_t u_entry, void *e)
4008 {
4009 rfs4_deleg_state_t *dsp = (rfs4_deleg_state_t *)u_entry;
4010 struct exportinfo *exi = (struct exportinfo *)e;
4011 nfs_fh4_fmt_t fhfmt4, *exi_fhp, *finfo_fhp;
4012 fhandle_t *efhp;
4013
4014 efhp = (fhandle_t *)&exi->exi_fh;
4015 exi_fhp = (nfs_fh4_fmt_t *)&fhfmt4;
4016
4017 FH_TO_FMT4(efhp, exi_fhp);
4018
4019 finfo_fhp =
4020 (nfs_fh4_fmt_t *)dsp->rds_finfo->rf_filehandle.nfs_fh4_val;
4021
4022 if (EQFSID(&finfo_fhp->fh4_fsid, &exi_fhp->fh4_fsid) &&
4023 bcmp(&finfo_fhp->fh4_xdata, &exi_fhp->fh4_xdata,
4024 exi_fhp->fh4_xlen) == 0) {
4025 rfs4_dbe_invalidate(dsp->rds_dbe);
4026 }
4027 }
4028
4029 /*
4030 * This function is used as a target for the rfs4_dbe_walk() call
4031 * below. The purpose of this function is to see if the state refers
4032 * to a file that resides within the exportinfo export. If so, then
4033 * release vnode hold for this object since the intent is the server
4034 * is unexporting the specified directory. Invalidation will prevent
4035 * this struct from being found in the future.
4036 */
4037 static void
4038 rfs4_file_walk_callout(rfs4_entry_t u_entry, void *e)
4039 {
4040 rfs4_file_t *fp = (rfs4_file_t *)u_entry;
4041 struct exportinfo *exi = (struct exportinfo *)e;
4042 nfs_fh4_fmt_t fhfmt4, *exi_fhp, *finfo_fhp;
4043 fhandle_t *efhp;
4044
4045 efhp = (fhandle_t *)&exi->exi_fh;
4046 exi_fhp = (nfs_fh4_fmt_t *)&fhfmt4;
4047
4048 FH_TO_FMT4(efhp, exi_fhp);
4049
4050 finfo_fhp = (nfs_fh4_fmt_t *)fp->rf_filehandle.nfs_fh4_val;
4051
4052 if (EQFSID(&finfo_fhp->fh4_fsid, &exi_fhp->fh4_fsid) &&
4053 bcmp(&finfo_fhp->fh4_xdata, &exi_fhp->fh4_xdata,
4054 exi_fhp->fh4_xlen) == 0) {
4055 if (fp->rf_vp) {
4056 vnode_t *vp = fp->rf_vp;
4057
4058 /*
4059 * don't leak monitors and remove the reference
4060 * put on the vnode when the delegation was granted.
4061 */
4062 if (fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_READ) {
4063 (void) fem_uninstall(vp, deleg_rdops,
4064 (void *)fp);
4065 vn_open_downgrade(vp, FREAD);
4066 } else if (fp->rf_dinfo.rd_dtype ==
4067 OPEN_DELEGATE_WRITE) {
4068 (void) fem_uninstall(vp, deleg_wrops,
4069 (void *)fp);
4070 vn_open_downgrade(vp, FREAD|FWRITE);
4071 }
4072 mutex_enter(&vp->v_vsd_lock);
4073 (void) vsd_set(vp, nfs4_srv_vkey, NULL);
4074 mutex_exit(&vp->v_vsd_lock);
4075 VN_RELE(vp);
4076 fp->rf_vp = NULL;
4077 }
4078 rfs4_dbe_invalidate(fp->rf_dbe);
4079 }
4080 }
4081
4082 /*
4083 * Given a directory that is being unexported, cleanup/release all
4084 * state in the server that refers to objects residing underneath this
4085 * particular export. The ordering of the release is important.
4086 * Lock_owner, then state and then file.
4087 */
4088 void
4089 rfs4_clean_state_exi(struct exportinfo *exi)
4090 {
4091 nfs4_srv_t *nsrv4;
4092
4093 nsrv4 = zone_getspecific(rfs4_zone_key, curzone);
4094 mutex_enter(&nsrv4->state_lock);
4095
4096 if (nsrv4->nfs4_server_state == NULL) {
4097 mutex_exit(&nsrv4->state_lock);
4098 return;
4099 }
4100
4101 /* CSTYLED */
4102 rfs4_dbe_walk(nsrv4->rfs4_lo_state_tab, rfs4_lo_state_walk_callout, exi);
4103 rfs4_dbe_walk(nsrv4->rfs4_state_tab, rfs4_state_walk_callout, exi);
4104 /* CSTYLED */
4105 rfs4_dbe_walk(nsrv4->rfs4_deleg_state_tab, rfs4_deleg_state_walk_callout, exi);
4106 rfs4_dbe_walk(nsrv4->rfs4_file_tab, rfs4_file_walk_callout, exi);
4107
4108 mutex_exit(&nsrv4->state_lock);
4109 }