Print this page
Dan mods to NFS desgin problems re. multiple zone keys
Do rfs4_ss_fini() BEFORE rfs4_servinst_destroy_all().
Go ahead and destroy the NFSv4 database tables
| Split |
Close |
| Expand all |
| Collapse all |
--- old/usr/src/uts/common/fs/nfs/nfs4_state.c
+++ new/usr/src/uts/common/fs/nfs/nfs4_state.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21
22 22 /*
23 23 * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
24 24 */
25 25
26 26 /*
27 27 * Copyright 2018 Nexenta Systems, Inc.
28 28 * Copyright 2019 Nexenta by DDN, Inc.
29 29 */
30 30
31 31 #include <sys/systm.h>
32 32 #include <sys/kmem.h>
33 33 #include <sys/cmn_err.h>
34 34 #include <sys/atomic.h>
35 35 #include <sys/clconf.h>
36 36 #include <sys/cladm.h>
37 37 #include <sys/flock.h>
38 38 #include <nfs/export.h>
39 39 #include <nfs/nfs.h>
40 40 #include <nfs/nfs4.h>
41 41 #include <nfs/nfssys.h>
42 42 #include <nfs/lm.h>
43 43 #include <sys/pathname.h>
44 44 #include <sys/sdt.h>
45 45 #include <sys/nvpair.h>
46 46
47 47 extern u_longlong_t nfs4_srv_caller_id;
48 48
49 49 extern uint_t nfs4_srv_vkey;
50 50
51 51 stateid4 special0 = {
52 52 0,
53 53 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }
54 54 };
55 55
56 56 stateid4 special1 = {
57 57 0xffffffff,
58 58 {
59 59 (char)0xff, (char)0xff, (char)0xff, (char)0xff,
60 60 (char)0xff, (char)0xff, (char)0xff, (char)0xff,
61 61 (char)0xff, (char)0xff, (char)0xff, (char)0xff
62 62 }
63 63 };
64 64
65 65
66 66 #define ISSPECIAL(id) (stateid4_cmp(id, &special0) || \
67 67 stateid4_cmp(id, &special1))
68 68
69 69 /* For embedding the cluster nodeid into our clientid */
70 70 #define CLUSTER_NODEID_SHIFT 24
71 71 #define CLUSTER_MAX_NODEID 255
72 72
73 73 #ifdef DEBUG
74 74 int rfs4_debug;
75 75 #endif
76 76
77 77 static uint32_t rfs4_database_debug = 0x00;
78 78
79 79 /* CSTYLED */
80 80 static void rfs4_ss_clid_write(nfs4_srv_t *nsrv4, rfs4_client_t *cp, char *leaf);
81 81 static void rfs4_ss_clid_write_one(rfs4_client_t *cp, char *dir, char *leaf);
82 82 static void rfs4_dss_clear_oldstate(rfs4_servinst_t *sip);
83 83 static void rfs4_ss_chkclid_sip(rfs4_client_t *cp, rfs4_servinst_t *sip);
84 84
85 85 /*
86 86 * Couple of simple init/destroy functions for a general waiter
87 87 */
88 88 void
89 89 rfs4_sw_init(rfs4_state_wait_t *swp)
90 90 {
91 91 mutex_init(swp->sw_cv_lock, NULL, MUTEX_DEFAULT, NULL);
92 92 cv_init(swp->sw_cv, NULL, CV_DEFAULT, NULL);
93 93 swp->sw_active = FALSE;
94 94 swp->sw_wait_count = 0;
95 95 }
96 96
97 97 void
98 98 rfs4_sw_destroy(rfs4_state_wait_t *swp)
99 99 {
100 100 mutex_destroy(swp->sw_cv_lock);
101 101 cv_destroy(swp->sw_cv);
102 102 }
103 103
104 104 void
105 105 rfs4_sw_enter(rfs4_state_wait_t *swp)
106 106 {
107 107 mutex_enter(swp->sw_cv_lock);
108 108 while (swp->sw_active) {
109 109 swp->sw_wait_count++;
110 110 cv_wait(swp->sw_cv, swp->sw_cv_lock);
111 111 swp->sw_wait_count--;
112 112 }
113 113 ASSERT(swp->sw_active == FALSE);
114 114 swp->sw_active = TRUE;
115 115 mutex_exit(swp->sw_cv_lock);
116 116 }
117 117
118 118 void
119 119 rfs4_sw_exit(rfs4_state_wait_t *swp)
120 120 {
121 121 mutex_enter(swp->sw_cv_lock);
122 122 ASSERT(swp->sw_active == TRUE);
123 123 swp->sw_active = FALSE;
124 124 if (swp->sw_wait_count != 0)
125 125 cv_broadcast(swp->sw_cv);
126 126 mutex_exit(swp->sw_cv_lock);
127 127 }
128 128
129 129 static void
130 130 deep_lock_copy(LOCK4res *dres, LOCK4res *sres)
131 131 {
132 132 lock_owner4 *slo = &sres->LOCK4res_u.denied.owner;
133 133 lock_owner4 *dlo = &dres->LOCK4res_u.denied.owner;
134 134
135 135 if (sres->status == NFS4ERR_DENIED) {
136 136 dlo->owner_val = kmem_alloc(slo->owner_len, KM_SLEEP);
137 137 bcopy(slo->owner_val, dlo->owner_val, slo->owner_len);
138 138 }
139 139 }
140 140
141 141 /*
142 142 * CPR callback id -- not related to v4 callbacks
143 143 */
144 144 static callb_id_t cpr_id = 0;
145 145
146 146 static void
147 147 deep_lock_free(LOCK4res *res)
148 148 {
149 149 lock_owner4 *lo = &res->LOCK4res_u.denied.owner;
150 150
151 151 if (res->status == NFS4ERR_DENIED)
152 152 kmem_free(lo->owner_val, lo->owner_len);
153 153 }
154 154
155 155 static void
156 156 deep_open_copy(OPEN4res *dres, OPEN4res *sres)
157 157 {
158 158 nfsace4 *sacep, *dacep;
159 159
160 160 if (sres->status != NFS4_OK) {
161 161 return;
162 162 }
163 163
164 164 dres->attrset = sres->attrset;
165 165
166 166 switch (sres->delegation.delegation_type) {
167 167 case OPEN_DELEGATE_NONE:
168 168 return;
169 169 case OPEN_DELEGATE_READ:
170 170 sacep = &sres->delegation.open_delegation4_u.read.permissions;
171 171 dacep = &dres->delegation.open_delegation4_u.read.permissions;
172 172 break;
173 173 case OPEN_DELEGATE_WRITE:
174 174 sacep = &sres->delegation.open_delegation4_u.write.permissions;
175 175 dacep = &dres->delegation.open_delegation4_u.write.permissions;
176 176 break;
177 177 }
178 178 dacep->who.utf8string_val =
179 179 kmem_alloc(sacep->who.utf8string_len, KM_SLEEP);
180 180 bcopy(sacep->who.utf8string_val, dacep->who.utf8string_val,
181 181 sacep->who.utf8string_len);
182 182 }
183 183
184 184 static void
185 185 deep_open_free(OPEN4res *res)
186 186 {
187 187 nfsace4 *acep;
188 188 if (res->status != NFS4_OK)
189 189 return;
190 190
191 191 switch (res->delegation.delegation_type) {
192 192 case OPEN_DELEGATE_NONE:
193 193 return;
194 194 case OPEN_DELEGATE_READ:
195 195 acep = &res->delegation.open_delegation4_u.read.permissions;
196 196 break;
197 197 case OPEN_DELEGATE_WRITE:
198 198 acep = &res->delegation.open_delegation4_u.write.permissions;
199 199 break;
200 200 }
201 201
202 202 if (acep->who.utf8string_val) {
203 203 kmem_free(acep->who.utf8string_val, acep->who.utf8string_len);
204 204 acep->who.utf8string_val = NULL;
205 205 }
206 206 }
207 207
208 208 void
209 209 rfs4_free_reply(nfs_resop4 *rp)
210 210 {
211 211 switch (rp->resop) {
212 212 case OP_LOCK:
213 213 deep_lock_free(&rp->nfs_resop4_u.oplock);
214 214 break;
215 215 case OP_OPEN:
216 216 deep_open_free(&rp->nfs_resop4_u.opopen);
217 217 default:
218 218 break;
219 219 }
220 220 }
221 221
222 222 void
223 223 rfs4_copy_reply(nfs_resop4 *dst, nfs_resop4 *src)
224 224 {
225 225 *dst = *src;
226 226
227 227 /* Handle responses that need deep copy */
228 228 switch (src->resop) {
229 229 case OP_LOCK:
230 230 deep_lock_copy(&dst->nfs_resop4_u.oplock,
231 231 &src->nfs_resop4_u.oplock);
232 232 break;
233 233 case OP_OPEN:
234 234 deep_open_copy(&dst->nfs_resop4_u.opopen,
235 235 &src->nfs_resop4_u.opopen);
236 236 break;
237 237 default:
238 238 break;
239 239 };
240 240 }
241 241
242 242 /*
243 243 * This is the implementation of the underlying state engine. The
244 244 * public interface to this engine is described by
245 245 * nfs4_state.h. Callers to the engine should hold no state engine
246 246 * locks when they call in to it. If the protocol needs to lock data
247 247 * structures it should do so after acquiring all references to them
248 248 * first and then follow the following lock order:
249 249 *
250 250 * client > openowner > state > lo_state > lockowner > file.
251 251 *
252 252 * Internally we only allow a thread to hold one hash bucket lock at a
253 253 * time and the lock is higher in the lock order (must be acquired
254 254 * first) than the data structure that is on that hash list.
255 255 *
256 256 * If a new reference was acquired by the caller, that reference needs
257 257 * to be released after releasing all acquired locks with the
258 258 * corresponding rfs4_*_rele routine.
259 259 */
260 260
261 261 /*
262 262 * This code is some what prototypical for now. Its purpose currently is to
263 263 * implement the interfaces sufficiently to finish the higher protocol
264 264 * elements. This will be replaced by a dynamically resizeable tables
265 265 * backed by kmem_cache allocator. However synchronization is handled
266 266 * correctly (I hope) and will not change by much. The mutexes for
267 267 * the hash buckets that can be used to create new instances of data
268 268 * structures might be good candidates to evolve into reader writer
269 269 * locks. If it has to do a creation, it would be holding the
270 270 * mutex across a kmem_alloc with KM_SLEEP specified.
271 271 */
272 272
273 273 #ifdef DEBUG
274 274 #define TABSIZE 17
275 275 #else
276 276 #define TABSIZE 2047
277 277 #endif
278 278
279 279 #define ADDRHASH(key) ((unsigned long)(key) >> 3)
280 280
281 281 #define MAXTABSZ 1024*1024
282 282
283 283 /* The values below are rfs4_lease_time units */
284 284
285 285 #ifdef DEBUG
286 286 #define CLIENT_CACHE_TIME 1
287 287 #define OPENOWNER_CACHE_TIME 1
288 288 #define STATE_CACHE_TIME 1
289 289 #define LO_STATE_CACHE_TIME 1
290 290 #define LOCKOWNER_CACHE_TIME 1
291 291 #define FILE_CACHE_TIME 3
292 292 #define DELEG_STATE_CACHE_TIME 1
293 293 #else
294 294 #define CLIENT_CACHE_TIME 10
295 295 #define OPENOWNER_CACHE_TIME 5
296 296 #define STATE_CACHE_TIME 1
297 297 #define LO_STATE_CACHE_TIME 1
298 298 #define LOCKOWNER_CACHE_TIME 3
299 299 #define FILE_CACHE_TIME 40
300 300 #define DELEG_STATE_CACHE_TIME 1
301 301 #endif
302 302
303 303 /*
304 304 * NFSv4 server state databases
305 305 *
306 306 * Initilized when the module is loaded and used by NFSv4 state tables.
307 307 * These kmem_cache databases are global, the tables that make use of these
308 308 * are per zone.
309 309 */
310 310 kmem_cache_t *rfs4_client_mem_cache;
311 311 kmem_cache_t *rfs4_clntIP_mem_cache;
312 312 kmem_cache_t *rfs4_openown_mem_cache;
313 313 kmem_cache_t *rfs4_openstID_mem_cache;
314 314 kmem_cache_t *rfs4_lockstID_mem_cache;
315 315 kmem_cache_t *rfs4_lockown_mem_cache;
316 316 kmem_cache_t *rfs4_file_mem_cache;
317 317 kmem_cache_t *rfs4_delegstID_mem_cache;
318 318
319 319 /*
320 320 * NFSv4 state table functions
321 321 */
322 322 static bool_t rfs4_client_create(rfs4_entry_t, void *);
323 323 static void rfs4_dss_remove_cpleaf(rfs4_client_t *);
324 324 static void rfs4_dss_remove_leaf(rfs4_servinst_t *, char *, char *);
325 325 static void rfs4_client_destroy(rfs4_entry_t);
326 326 static bool_t rfs4_client_expiry(rfs4_entry_t);
327 327 static uint32_t clientid_hash(void *);
328 328 static bool_t clientid_compare(rfs4_entry_t, void *);
329 329 static void *clientid_mkkey(rfs4_entry_t);
330 330 static uint32_t nfsclnt_hash(void *);
331 331 static bool_t nfsclnt_compare(rfs4_entry_t, void *);
332 332 static void *nfsclnt_mkkey(rfs4_entry_t);
333 333 static bool_t rfs4_clntip_expiry(rfs4_entry_t);
334 334 static void rfs4_clntip_destroy(rfs4_entry_t);
335 335 static bool_t rfs4_clntip_create(rfs4_entry_t, void *);
336 336 static uint32_t clntip_hash(void *);
337 337 static bool_t clntip_compare(rfs4_entry_t, void *);
338 338 static void *clntip_mkkey(rfs4_entry_t);
339 339 static bool_t rfs4_openowner_create(rfs4_entry_t, void *);
340 340 static void rfs4_openowner_destroy(rfs4_entry_t);
341 341 static bool_t rfs4_openowner_expiry(rfs4_entry_t);
342 342 static uint32_t openowner_hash(void *);
343 343 static bool_t openowner_compare(rfs4_entry_t, void *);
344 344 static void *openowner_mkkey(rfs4_entry_t);
345 345 static bool_t rfs4_state_create(rfs4_entry_t, void *);
346 346 static void rfs4_state_destroy(rfs4_entry_t);
347 347 static bool_t rfs4_state_expiry(rfs4_entry_t);
348 348 static uint32_t state_hash(void *);
349 349 static bool_t state_compare(rfs4_entry_t, void *);
350 350 static void *state_mkkey(rfs4_entry_t);
351 351 static uint32_t state_owner_file_hash(void *);
352 352 static bool_t state_owner_file_compare(rfs4_entry_t, void *);
353 353 static void *state_owner_file_mkkey(rfs4_entry_t);
354 354 static uint32_t state_file_hash(void *);
355 355 static bool_t state_file_compare(rfs4_entry_t, void *);
356 356 static void *state_file_mkkey(rfs4_entry_t);
357 357 static bool_t rfs4_lo_state_create(rfs4_entry_t, void *);
358 358 static void rfs4_lo_state_destroy(rfs4_entry_t);
359 359 static bool_t rfs4_lo_state_expiry(rfs4_entry_t);
360 360 static uint32_t lo_state_hash(void *);
361 361 static bool_t lo_state_compare(rfs4_entry_t, void *);
362 362 static void *lo_state_mkkey(rfs4_entry_t);
363 363 static uint32_t lo_state_lo_hash(void *);
364 364 static bool_t lo_state_lo_compare(rfs4_entry_t, void *);
365 365 static void *lo_state_lo_mkkey(rfs4_entry_t);
366 366 static bool_t rfs4_lockowner_create(rfs4_entry_t, void *);
367 367 static void rfs4_lockowner_destroy(rfs4_entry_t);
368 368 static bool_t rfs4_lockowner_expiry(rfs4_entry_t);
369 369 static uint32_t lockowner_hash(void *);
370 370 static bool_t lockowner_compare(rfs4_entry_t, void *);
371 371 static void *lockowner_mkkey(rfs4_entry_t);
372 372 static uint32_t pid_hash(void *);
373 373 static bool_t pid_compare(rfs4_entry_t, void *);
374 374 static void *pid_mkkey(rfs4_entry_t);
375 375 static bool_t rfs4_file_create(rfs4_entry_t, void *);
376 376 static void rfs4_file_destroy(rfs4_entry_t);
377 377 static uint32_t file_hash(void *);
378 378 static bool_t file_compare(rfs4_entry_t, void *);
379 379 static void *file_mkkey(rfs4_entry_t);
380 380 static bool_t rfs4_deleg_state_create(rfs4_entry_t, void *);
381 381 static void rfs4_deleg_state_destroy(rfs4_entry_t);
382 382 static bool_t rfs4_deleg_state_expiry(rfs4_entry_t);
383 383 static uint32_t deleg_hash(void *);
384 384 static bool_t deleg_compare(rfs4_entry_t, void *);
385 385 static void *deleg_mkkey(rfs4_entry_t);
386 386 static uint32_t deleg_state_hash(void *);
387 387 static bool_t deleg_state_compare(rfs4_entry_t, void *);
388 388 static void *deleg_state_mkkey(rfs4_entry_t);
389 389
390 390 static void rfs4_state_rele_nounlock(rfs4_state_t *);
391 391
392 392 static int rfs4_ss_enabled = 0;
393 393
394 394 extern void (*rfs4_client_clrst)(struct nfs4clrst_args *);
395 395
396 396 void
397 397 rfs4_ss_pnfree(rfs4_ss_pn_t *ss_pn)
398 398 {
399 399 kmem_free(ss_pn, sizeof (rfs4_ss_pn_t));
400 400 }
401 401
402 402 static rfs4_ss_pn_t *
403 403 rfs4_ss_pnalloc(char *dir, char *leaf)
404 404 {
405 405 rfs4_ss_pn_t *ss_pn;
406 406 int dir_len, leaf_len;
407 407
408 408 /*
409 409 * validate we have a resonable path
410 410 * (account for the '/' and trailing null)
411 411 */
412 412 if ((dir_len = strlen(dir)) > MAXPATHLEN ||
413 413 (leaf_len = strlen(leaf)) > MAXNAMELEN ||
414 414 (dir_len + leaf_len + 2) > MAXPATHLEN) {
415 415 return (NULL);
416 416 }
417 417
418 418 ss_pn = kmem_alloc(sizeof (rfs4_ss_pn_t), KM_SLEEP);
419 419
420 420 (void) snprintf(ss_pn->pn, MAXPATHLEN, "%s/%s", dir, leaf);
421 421 /* Handy pointer to just the leaf name */
422 422 ss_pn->leaf = ss_pn->pn + dir_len + 1;
423 423 return (ss_pn);
424 424 }
425 425
426 426
427 427 /*
428 428 * Move the "leaf" filename from "sdir" directory
429 429 * to the "ddir" directory. Return the pathname of
430 430 * the destination unless the rename fails in which
431 431 * case we need to return the source pathname.
432 432 */
433 433 static rfs4_ss_pn_t *
434 434 rfs4_ss_movestate(char *sdir, char *ddir, char *leaf)
435 435 {
436 436 rfs4_ss_pn_t *src, *dst;
437 437
438 438 if ((src = rfs4_ss_pnalloc(sdir, leaf)) == NULL)
439 439 return (NULL);
440 440
441 441 if ((dst = rfs4_ss_pnalloc(ddir, leaf)) == NULL) {
442 442 rfs4_ss_pnfree(src);
443 443 return (NULL);
444 444 }
445 445
446 446 /*
447 447 * If the rename fails we shall return the src
448 448 * pathname and free the dst. Otherwise we need
449 449 * to free the src and return the dst pathanme.
450 450 */
451 451 if (vn_rename(src->pn, dst->pn, UIO_SYSSPACE)) {
452 452 rfs4_ss_pnfree(dst);
453 453 return (src);
454 454 }
455 455 rfs4_ss_pnfree(src);
456 456 return (dst);
457 457 }
458 458
459 459
460 460 static rfs4_oldstate_t *
461 461 rfs4_ss_getstate(vnode_t *dvp, rfs4_ss_pn_t *ss_pn)
462 462 {
463 463 struct uio uio;
464 464 struct iovec iov[3];
465 465
466 466 rfs4_oldstate_t *cl_ss = NULL;
467 467 vnode_t *vp;
468 468 vattr_t va;
469 469 uint_t id_len;
470 470 int err, kill_file, file_vers;
471 471
472 472 if (ss_pn == NULL)
473 473 return (NULL);
474 474
475 475 /*
476 476 * open the state file.
477 477 */
478 478 if (vn_open(ss_pn->pn, UIO_SYSSPACE, FREAD, 0, &vp, 0, 0) != 0) {
479 479 return (NULL);
480 480 }
481 481
482 482 if (vp->v_type != VREG) {
483 483 (void) VOP_CLOSE(vp, FREAD, 1, (offset_t)0, CRED(), NULL);
484 484 VN_RELE(vp);
485 485 return (NULL);
486 486 }
487 487
488 488 err = VOP_ACCESS(vp, VREAD, 0, CRED(), NULL);
489 489 if (err) {
490 490 /*
491 491 * We don't have read access? better get the heck out.
492 492 */
493 493 (void) VOP_CLOSE(vp, FREAD, 1, (offset_t)0, CRED(), NULL);
494 494 VN_RELE(vp);
495 495 return (NULL);
496 496 }
497 497
498 498 (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL);
499 499 /*
500 500 * get the file size to do some basic validation
501 501 */
502 502 va.va_mask = AT_SIZE;
503 503 err = VOP_GETATTR(vp, &va, 0, CRED(), NULL);
504 504
505 505 kill_file = (va.va_size == 0 || va.va_size <
506 506 (NFS4_VERIFIER_SIZE + sizeof (uint_t)+1));
507 507
508 508 if (err || kill_file) {
509 509 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
510 510 (void) VOP_CLOSE(vp, FREAD, 1, (offset_t)0, CRED(), NULL);
511 511 VN_RELE(vp);
512 512 if (kill_file) {
513 513 (void) VOP_REMOVE(dvp, ss_pn->leaf, CRED(), NULL, 0);
514 514 }
515 515 return (NULL);
516 516 }
517 517
518 518 cl_ss = kmem_alloc(sizeof (rfs4_oldstate_t), KM_SLEEP);
519 519
520 520 /*
521 521 * build iovecs to read in the file_version, verifier and id_len
522 522 */
523 523 iov[0].iov_base = (caddr_t)&file_vers;
524 524 iov[0].iov_len = sizeof (int);
525 525 iov[1].iov_base = (caddr_t)&cl_ss->cl_id4.verifier;
526 526 iov[1].iov_len = NFS4_VERIFIER_SIZE;
527 527 iov[2].iov_base = (caddr_t)&id_len;
528 528 iov[2].iov_len = sizeof (uint_t);
529 529
530 530 uio.uio_iov = iov;
531 531 uio.uio_iovcnt = 3;
532 532 uio.uio_segflg = UIO_SYSSPACE;
533 533 uio.uio_loffset = 0;
534 534 uio.uio_resid = sizeof (int) + NFS4_VERIFIER_SIZE + sizeof (uint_t);
535 535
536 536 if (err = VOP_READ(vp, &uio, FREAD, CRED(), NULL)) {
537 537 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
538 538 (void) VOP_CLOSE(vp, FREAD, 1, (offset_t)0, CRED(), NULL);
539 539 VN_RELE(vp);
540 540 kmem_free(cl_ss, sizeof (rfs4_oldstate_t));
541 541 return (NULL);
542 542 }
543 543
544 544 /*
545 545 * if the file_version doesn't match or if the
546 546 * id_len is zero or the combination of the verifier,
547 547 * id_len and id_val is bigger than the file we have
548 548 * a problem. If so ditch the file.
549 549 */
550 550 kill_file = (file_vers != NFS4_SS_VERSION || id_len == 0 ||
551 551 (id_len + NFS4_VERIFIER_SIZE + sizeof (uint_t)) > va.va_size);
552 552
553 553 if (err || kill_file) {
554 554 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
555 555 (void) VOP_CLOSE(vp, FREAD, 1, (offset_t)0, CRED(), NULL);
556 556 VN_RELE(vp);
557 557 kmem_free(cl_ss, sizeof (rfs4_oldstate_t));
558 558 if (kill_file) {
559 559 (void) VOP_REMOVE(dvp, ss_pn->leaf, CRED(), NULL, 0);
560 560 }
561 561 return (NULL);
562 562 }
563 563
564 564 /*
565 565 * now get the client id value
566 566 */
567 567 cl_ss->cl_id4.id_val = kmem_alloc(id_len, KM_SLEEP);
568 568 iov[0].iov_base = cl_ss->cl_id4.id_val;
569 569 iov[0].iov_len = id_len;
570 570
571 571 uio.uio_iov = iov;
572 572 uio.uio_iovcnt = 1;
573 573 uio.uio_segflg = UIO_SYSSPACE;
574 574 uio.uio_resid = cl_ss->cl_id4.id_len = id_len;
575 575
576 576 if (err = VOP_READ(vp, &uio, FREAD, CRED(), NULL)) {
577 577 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
578 578 (void) VOP_CLOSE(vp, FREAD, 1, (offset_t)0, CRED(), NULL);
579 579 VN_RELE(vp);
580 580 kmem_free(cl_ss->cl_id4.id_val, id_len);
581 581 kmem_free(cl_ss, sizeof (rfs4_oldstate_t));
582 582 return (NULL);
583 583 }
584 584
585 585 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
586 586 (void) VOP_CLOSE(vp, FREAD, 1, (offset_t)0, CRED(), NULL);
587 587 VN_RELE(vp);
588 588 return (cl_ss);
589 589 }
590 590
591 591 #ifdef nextdp
592 592 #undef nextdp
593 593 #endif
594 594 #define nextdp(dp) ((struct dirent64 *)((char *)(dp) + (dp)->d_reclen))
595 595
596 596 /*
597 597 * Add entries from statedir to supplied oldstate list.
598 598 * Optionally, move all entries from statedir -> destdir.
599 599 */
600 600 void
601 601 rfs4_ss_oldstate(rfs4_oldstate_t *oldstate, char *statedir, char *destdir)
602 602 {
603 603 rfs4_ss_pn_t *ss_pn;
604 604 rfs4_oldstate_t *cl_ss = NULL;
605 605 char *dirt = NULL;
606 606 int err, dir_eof = 0, size = 0;
607 607 vnode_t *dvp;
608 608 struct iovec iov;
609 609 struct uio uio;
610 610 struct dirent64 *dep;
611 611 offset_t dirchunk_offset = 0;
612 612
613 613 /*
614 614 * open the state directory
615 615 */
616 616 if (vn_open(statedir, UIO_SYSSPACE, FREAD, 0, &dvp, 0, 0))
617 617 return;
618 618
619 619 if (dvp->v_type != VDIR || VOP_ACCESS(dvp, VREAD, 0, CRED(), NULL))
620 620 goto out;
621 621
622 622 dirt = kmem_alloc(RFS4_SS_DIRSIZE, KM_SLEEP);
623 623
624 624 /*
625 625 * Get and process the directory entries
626 626 */
627 627 while (!dir_eof) {
628 628 (void) VOP_RWLOCK(dvp, V_WRITELOCK_FALSE, NULL);
629 629 iov.iov_base = dirt;
630 630 iov.iov_len = RFS4_SS_DIRSIZE;
631 631 uio.uio_iov = &iov;
632 632 uio.uio_iovcnt = 1;
633 633 uio.uio_segflg = UIO_SYSSPACE;
634 634 uio.uio_loffset = dirchunk_offset;
635 635 uio.uio_resid = RFS4_SS_DIRSIZE;
636 636
637 637 err = VOP_READDIR(dvp, &uio, CRED(), &dir_eof, NULL, 0);
638 638 VOP_RWUNLOCK(dvp, V_WRITELOCK_FALSE, NULL);
639 639 if (err)
640 640 goto out;
641 641
642 642 size = RFS4_SS_DIRSIZE - uio.uio_resid;
643 643
644 644 /*
645 645 * Process all the directory entries in this
646 646 * readdir chunk
647 647 */
648 648 for (dep = (struct dirent64 *)dirt; size > 0;
649 649 dep = nextdp(dep)) {
650 650
651 651 size -= dep->d_reclen;
652 652 dirchunk_offset = dep->d_off;
653 653
654 654 /*
655 655 * Skip '.' and '..'
656 656 */
657 657 if (NFS_IS_DOTNAME(dep->d_name))
658 658 continue;
659 659
660 660 ss_pn = rfs4_ss_pnalloc(statedir, dep->d_name);
661 661 if (ss_pn == NULL)
662 662 continue;
663 663
664 664 if (cl_ss = rfs4_ss_getstate(dvp, ss_pn)) {
665 665 if (destdir != NULL) {
666 666 rfs4_ss_pnfree(ss_pn);
667 667 cl_ss->ss_pn = rfs4_ss_movestate(
668 668 statedir, destdir, dep->d_name);
669 669 } else {
670 670 cl_ss->ss_pn = ss_pn;
671 671 }
672 672 insque(cl_ss, oldstate);
673 673 } else {
674 674 rfs4_ss_pnfree(ss_pn);
675 675 }
676 676 }
677 677 }
678 678
679 679 out:
680 680 (void) VOP_CLOSE(dvp, FREAD, 1, (offset_t)0, CRED(), NULL);
681 681 VN_RELE(dvp);
682 682 if (dirt)
683 683 kmem_free((caddr_t)dirt, RFS4_SS_DIRSIZE);
684 684 }
685 685
686 686 static void
687 687 rfs4_ss_init(nfs4_srv_t *nsrv4)
688 688 {
689 689 int npaths = 1;
690 690 char *default_dss_path = NFS4_DSS_VAR_DIR;
691 691
692 692 /* read the default stable storage state */
693 693 rfs4_dss_readstate(nsrv4, npaths, &default_dss_path);
694 694
695 695 rfs4_ss_enabled = 1;
696 696 }
697 697
698 698 static void
699 699 rfs4_ss_fini(nfs4_srv_t *nsrv4)
700 700 {
701 701 rfs4_servinst_t *sip;
702 702
703 703 mutex_enter(&nsrv4->servinst_lock);
704 704 sip = nsrv4->nfs4_cur_servinst;
705 705 while (sip != NULL) {
706 706 rfs4_dss_clear_oldstate(sip);
707 707 sip = sip->next;
708 708 }
709 709 mutex_exit(&nsrv4->servinst_lock);
710 710 }
711 711
712 712 /*
713 713 * Remove all oldstate files referenced by this servinst.
714 714 */
715 715 static void
716 716 rfs4_dss_clear_oldstate(rfs4_servinst_t *sip)
717 717 {
718 718 rfs4_oldstate_t *os_head, *osp;
719 719
720 720 rw_enter(&sip->oldstate_lock, RW_WRITER);
721 721 os_head = sip->oldstate;
722 722
723 723 if (os_head == NULL) {
724 724 rw_exit(&sip->oldstate_lock);
725 725 return;
726 726 }
727 727
728 728 /* skip dummy entry */
729 729 osp = os_head->next;
730 730 while (osp != os_head) {
731 731 char *leaf = osp->ss_pn->leaf;
732 732 rfs4_oldstate_t *os_next;
733 733
734 734 rfs4_dss_remove_leaf(sip, NFS4_DSS_OLDSTATE_LEAF, leaf);
735 735
736 736 if (osp->cl_id4.id_val)
737 737 kmem_free(osp->cl_id4.id_val, osp->cl_id4.id_len);
738 738 rfs4_ss_pnfree(osp->ss_pn);
739 739
740 740 os_next = osp->next;
741 741 remque(osp);
742 742 kmem_free(osp, sizeof (rfs4_oldstate_t));
743 743 osp = os_next;
744 744 }
745 745
746 746 rw_exit(&sip->oldstate_lock);
747 747 }
748 748
749 749 /*
750 750 * Form the state and oldstate paths, and read in the stable storage files.
751 751 */
752 752 void
753 753 rfs4_dss_readstate(nfs4_srv_t *nsrv4, int npaths, char **paths)
754 754 {
755 755 int i;
756 756 char *state, *oldstate;
757 757
758 758 state = kmem_alloc(MAXPATHLEN, KM_SLEEP);
759 759 oldstate = kmem_alloc(MAXPATHLEN, KM_SLEEP);
760 760
761 761 for (i = 0; i < npaths; i++) {
762 762 char *path = paths[i];
763 763
764 764 (void) sprintf(state, "%s/%s", path, NFS4_DSS_STATE_LEAF);
765 765 (void) sprintf(oldstate, "%s/%s", path, NFS4_DSS_OLDSTATE_LEAF);
766 766
767 767 /*
768 768 * Populate the current server instance's oldstate list.
769 769 *
770 770 * 1. Read stable storage data from old state directory,
771 771 * leaving its contents alone.
772 772 *
773 773 * 2. Read stable storage data from state directory,
774 774 * and move the latter's contents to old state
775 775 * directory.
776 776 */
777 777 /* CSTYLED */
778 778 rfs4_ss_oldstate(nsrv4->nfs4_cur_servinst->oldstate, oldstate, NULL);
779 779 /* CSTYLED */
780 780 rfs4_ss_oldstate(nsrv4->nfs4_cur_servinst->oldstate, state, oldstate);
781 781 }
782 782
783 783 kmem_free(state, MAXPATHLEN);
784 784 kmem_free(oldstate, MAXPATHLEN);
785 785 }
786 786
787 787
788 788 /*
789 789 * Check if we are still in grace and if the client can be
790 790 * granted permission to perform reclaims.
791 791 */
792 792 void
793 793 rfs4_ss_chkclid(nfs4_srv_t *nsrv4, rfs4_client_t *cp)
794 794 {
795 795 rfs4_servinst_t *sip;
796 796
797 797 /*
798 798 * It should be sufficient to check the oldstate data for just
799 799 * this client's instance. However, since our per-instance
800 800 * client grouping is solely temporal, HA-NFSv4 RG failover
801 801 * might result in clients of the same RG being partitioned into
802 802 * separate instances.
803 803 *
804 804 * Until the client grouping is improved, we must check the
805 805 * oldstate data for all instances with an active grace period.
806 806 *
807 807 * This also serves as the mechanism to remove stale oldstate data.
808 808 * The first time we check an instance after its grace period has
809 809 * expired, the oldstate data should be cleared.
810 810 *
811 811 * Start at the current instance, and walk the list backwards
812 812 * to the first.
813 813 */
814 814 mutex_enter(&nsrv4->servinst_lock);
815 815 for (sip = nsrv4->nfs4_cur_servinst; sip != NULL; sip = sip->prev) {
816 816 rfs4_ss_chkclid_sip(cp, sip);
817 817
818 818 /* if the above check found this client, we're done */
819 819 if (cp->rc_can_reclaim)
820 820 break;
821 821 }
822 822 mutex_exit(&nsrv4->servinst_lock);
823 823 }
824 824
825 825 static void
826 826 rfs4_ss_chkclid_sip(rfs4_client_t *cp, rfs4_servinst_t *sip)
827 827 {
828 828 rfs4_oldstate_t *osp, *os_head;
829 829
830 830 /* short circuit everything if this server instance has no oldstate */
831 831 rw_enter(&sip->oldstate_lock, RW_READER);
832 832 os_head = sip->oldstate;
833 833 rw_exit(&sip->oldstate_lock);
834 834 if (os_head == NULL)
835 835 return;
836 836
837 837 /*
838 838 * If this server instance is no longer in a grace period then
839 839 * the client won't be able to reclaim. No further need for this
840 840 * instance's oldstate data, so it can be cleared.
841 841 */
842 842 if (!rfs4_servinst_in_grace(sip))
843 843 return;
844 844
845 845 /* this instance is still in grace; search for the clientid */
846 846
847 847 rw_enter(&sip->oldstate_lock, RW_READER);
848 848
849 849 os_head = sip->oldstate;
850 850 /* skip dummy entry */
851 851 osp = os_head->next;
852 852 while (osp != os_head) {
853 853 if (osp->cl_id4.id_len == cp->rc_nfs_client.id_len) {
854 854 if (bcmp(osp->cl_id4.id_val, cp->rc_nfs_client.id_val,
855 855 osp->cl_id4.id_len) == 0) {
856 856 cp->rc_can_reclaim = 1;
857 857 break;
858 858 }
859 859 }
860 860 osp = osp->next;
861 861 }
862 862
863 863 rw_exit(&sip->oldstate_lock);
864 864 }
865 865
866 866 /*
867 867 * Place client information into stable storage: 1/3.
868 868 * First, generate the leaf filename, from the client's IP address and
869 869 * the server-generated short-hand clientid.
870 870 */
871 871 void
872 872 rfs4_ss_clid(nfs4_srv_t *nsrv4, rfs4_client_t *cp)
873 873 {
874 874 const char *kinet_ntop6(uchar_t *, char *, size_t);
875 875 char leaf[MAXNAMELEN], buf[INET6_ADDRSTRLEN];
876 876 struct sockaddr *ca;
877 877 uchar_t *b;
878 878
879 879 if (rfs4_ss_enabled == 0) {
880 880 return;
881 881 }
882 882
883 883 buf[0] = 0;
884 884
885 885 ca = (struct sockaddr *)&cp->rc_addr;
886 886
887 887 /*
888 888 * Convert the caller's IP address to a dotted string
889 889 */
890 890 if (ca->sa_family == AF_INET) {
891 891 b = (uchar_t *)&((struct sockaddr_in *)ca)->sin_addr;
892 892 (void) sprintf(buf, "%03d.%03d.%03d.%03d", b[0] & 0xFF,
893 893 b[1] & 0xFF, b[2] & 0xFF, b[3] & 0xFF);
894 894 } else if (ca->sa_family == AF_INET6) {
895 895 struct sockaddr_in6 *sin6;
896 896
897 897 sin6 = (struct sockaddr_in6 *)ca;
898 898 (void) kinet_ntop6((uchar_t *)&sin6->sin6_addr,
899 899 buf, INET6_ADDRSTRLEN);
900 900 }
901 901
902 902 (void) snprintf(leaf, MAXNAMELEN, "%s-%llx", buf,
903 903 (longlong_t)cp->rc_clientid);
904 904 rfs4_ss_clid_write(nsrv4, cp, leaf);
905 905 }
906 906
907 907 /*
908 908 * Place client information into stable storage: 2/3.
909 909 * DSS: distributed stable storage: the file may need to be written to
910 910 * multiple directories.
911 911 */
912 912 static void
913 913 rfs4_ss_clid_write(nfs4_srv_t *nsrv4, rfs4_client_t *cp, char *leaf)
914 914 {
915 915 rfs4_servinst_t *sip;
916 916
917 917 /*
918 918 * It should be sufficient to write the leaf file to (all) DSS paths
919 919 * associated with just this client's instance. However, since our
920 920 * per-instance client grouping is solely temporal, HA-NFSv4 RG
921 921 * failover might result in us losing DSS data.
922 922 *
923 923 * Until the client grouping is improved, we must write the DSS data
924 924 * to all instances' paths. Start at the current instance, and
925 925 * walk the list backwards to the first.
926 926 */
927 927 mutex_enter(&nsrv4->servinst_lock);
928 928 for (sip = nsrv4->nfs4_cur_servinst; sip != NULL; sip = sip->prev) {
929 929 int i, npaths = sip->dss_npaths;
930 930
931 931 /* write the leaf file to all DSS paths */
932 932 for (i = 0; i < npaths; i++) {
933 933 rfs4_dss_path_t *dss_path = sip->dss_paths[i];
934 934
935 935 /* HA-NFSv4 path might have been failed-away from us */
936 936 if (dss_path == NULL)
937 937 continue;
938 938
939 939 rfs4_ss_clid_write_one(cp, dss_path->path, leaf);
940 940 }
941 941 }
942 942 mutex_exit(&nsrv4->servinst_lock);
943 943 }
944 944
945 945 /*
946 946 * Place client information into stable storage: 3/3.
947 947 * Write the stable storage data to the requested file.
948 948 */
949 949 static void
950 950 rfs4_ss_clid_write_one(rfs4_client_t *cp, char *dss_path, char *leaf)
951 951 {
952 952 int ioflag;
953 953 int file_vers = NFS4_SS_VERSION;
954 954 size_t dirlen;
955 955 struct uio uio;
956 956 struct iovec iov[4];
957 957 char *dir;
958 958 rfs4_ss_pn_t *ss_pn;
959 959 vnode_t *vp;
960 960 nfs_client_id4 *cl_id4 = &(cp->rc_nfs_client);
961 961
962 962 /* allow 2 extra bytes for '/' & NUL */
963 963 dirlen = strlen(dss_path) + strlen(NFS4_DSS_STATE_LEAF) + 2;
964 964 dir = kmem_alloc(dirlen, KM_SLEEP);
965 965 (void) sprintf(dir, "%s/%s", dss_path, NFS4_DSS_STATE_LEAF);
966 966
967 967 ss_pn = rfs4_ss_pnalloc(dir, leaf);
968 968 /* rfs4_ss_pnalloc takes its own copy */
969 969 kmem_free(dir, dirlen);
970 970 if (ss_pn == NULL)
971 971 return;
972 972
973 973 if (vn_open(ss_pn->pn, UIO_SYSSPACE, FCREAT|FWRITE, 0600, &vp,
974 974 CRCREAT, 0)) {
975 975 rfs4_ss_pnfree(ss_pn);
976 976 return;
977 977 }
978 978
979 979 /*
980 980 * We need to record leaf - i.e. the filename - so that we know
981 981 * what to remove, in the future. However, the dir part of cp->ss_pn
982 982 * should never be referenced directly, since it's potentially only
983 983 * one of several paths with this leaf in it.
984 984 */
985 985 if (cp->rc_ss_pn != NULL) {
986 986 if (strcmp(cp->rc_ss_pn->leaf, leaf) == 0) {
987 987 /* we've already recorded *this* leaf */
988 988 rfs4_ss_pnfree(ss_pn);
989 989 } else {
990 990 /* replace with this leaf */
991 991 rfs4_ss_pnfree(cp->rc_ss_pn);
992 992 cp->rc_ss_pn = ss_pn;
993 993 }
994 994 } else {
995 995 cp->rc_ss_pn = ss_pn;
996 996 }
997 997
998 998 /*
999 999 * Build a scatter list that points to the nfs_client_id4
1000 1000 */
1001 1001 iov[0].iov_base = (caddr_t)&file_vers;
1002 1002 iov[0].iov_len = sizeof (int);
1003 1003 iov[1].iov_base = (caddr_t)&(cl_id4->verifier);
1004 1004 iov[1].iov_len = NFS4_VERIFIER_SIZE;
1005 1005 iov[2].iov_base = (caddr_t)&(cl_id4->id_len);
1006 1006 iov[2].iov_len = sizeof (uint_t);
1007 1007 iov[3].iov_base = (caddr_t)cl_id4->id_val;
1008 1008 iov[3].iov_len = cl_id4->id_len;
1009 1009
1010 1010 uio.uio_iov = iov;
1011 1011 uio.uio_iovcnt = 4;
1012 1012 uio.uio_loffset = 0;
1013 1013 uio.uio_segflg = UIO_SYSSPACE;
1014 1014 uio.uio_llimit = (rlim64_t)MAXOFFSET_T;
1015 1015 uio.uio_resid = cl_id4->id_len + sizeof (int) +
1016 1016 NFS4_VERIFIER_SIZE + sizeof (uint_t);
1017 1017
1018 1018 ioflag = uio.uio_fmode = (FWRITE|FSYNC);
1019 1019 uio.uio_extflg = UIO_COPY_DEFAULT;
1020 1020
1021 1021 (void) VOP_RWLOCK(vp, V_WRITELOCK_TRUE, NULL);
1022 1022 /* write the full client id to the file. */
1023 1023 (void) VOP_WRITE(vp, &uio, ioflag, CRED(), NULL);
1024 1024 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, NULL);
1025 1025
1026 1026 (void) VOP_CLOSE(vp, FWRITE, 1, (offset_t)0, CRED(), NULL);
1027 1027 VN_RELE(vp);
1028 1028 }
1029 1029
1030 1030 /*
1031 1031 * DSS: distributed stable storage.
1032 1032 * Unpack the list of paths passed by nfsd.
1033 1033 * Use nvlist_alloc(9F) to manage the data.
1034 1034 * The caller is responsible for allocating and freeing the buffer.
1035 1035 */
1036 1036 int
1037 1037 rfs4_dss_setpaths(char *buf, size_t buflen)
1038 1038 {
1039 1039 int error;
1040 1040
1041 1041 /*
1042 1042 * If this is a "warm start", i.e. we previously had DSS paths,
1043 1043 * preserve the old paths.
1044 1044 */
1045 1045 if (rfs4_dss_paths != NULL) {
1046 1046 /*
1047 1047 * Before we lose the ptr, destroy the nvlist and pathnames
1048 1048 * array from the warm start before this one.
1049 1049 */
1050 1050 nvlist_free(rfs4_dss_oldpaths);
1051 1051 rfs4_dss_oldpaths = rfs4_dss_paths;
1052 1052 }
1053 1053
1054 1054 /* unpack the buffer into a searchable nvlist */
1055 1055 error = nvlist_unpack(buf, buflen, &rfs4_dss_paths, KM_SLEEP);
1056 1056 if (error)
1057 1057 return (error);
1058 1058
1059 1059 /*
1060 1060 * Search the nvlist for the pathnames nvpair (which is the only nvpair
1061 1061 * in the list, and record its location.
1062 1062 */
1063 1063 error = nvlist_lookup_string_array(rfs4_dss_paths, NFS4_DSS_NVPAIR_NAME,
1064 1064 &rfs4_dss_newpaths, &rfs4_dss_numnewpaths);
1065 1065 return (error);
1066 1066 }
1067 1067
1068 1068 /*
1069 1069 * Ultimately the nfssys() call NFS4_CLR_STATE endsup here
1070 1070 * to find and mark the client for forced expire.
1071 1071 */
1072 1072 static void
1073 1073 rfs4_client_scrub(rfs4_entry_t ent, void *arg)
1074 1074 {
1075 1075 rfs4_client_t *cp = (rfs4_client_t *)ent;
1076 1076 struct nfs4clrst_args *clr = arg;
1077 1077 struct sockaddr_in6 *ent_sin6;
1078 1078 struct in6_addr clr_in6;
1079 1079 struct sockaddr_in *ent_sin;
1080 1080 struct in_addr clr_in;
1081 1081
1082 1082 if (clr->addr_type != cp->rc_addr.ss_family) {
1083 1083 return;
1084 1084 }
1085 1085
1086 1086 switch (clr->addr_type) {
1087 1087
1088 1088 case AF_INET6:
1089 1089 /* copyin the address from user space */
1090 1090 if (copyin(clr->ap, &clr_in6, sizeof (clr_in6))) {
1091 1091 break;
1092 1092 }
1093 1093
1094 1094 ent_sin6 = (struct sockaddr_in6 *)&cp->rc_addr;
1095 1095
1096 1096 /*
1097 1097 * now compare, and if equivalent mark entry
1098 1098 * for forced expiration
1099 1099 */
1100 1100 if (IN6_ARE_ADDR_EQUAL(&ent_sin6->sin6_addr, &clr_in6)) {
1101 1101 cp->rc_forced_expire = 1;
1102 1102 }
1103 1103 break;
1104 1104
1105 1105 case AF_INET:
1106 1106 /* copyin the address from user space */
1107 1107 if (copyin(clr->ap, &clr_in, sizeof (clr_in))) {
1108 1108 break;
1109 1109 }
1110 1110
1111 1111 ent_sin = (struct sockaddr_in *)&cp->rc_addr;
1112 1112
1113 1113 /*
1114 1114 * now compare, and if equivalent mark entry
1115 1115 * for forced expiration
1116 1116 */
1117 1117 if (ent_sin->sin_addr.s_addr == clr_in.s_addr) {
1118 1118 cp->rc_forced_expire = 1;
1119 1119 }
1120 1120 break;
1121 1121
1122 1122 default:
1123 1123 /* force this assert to fail */
1124 1124 ASSERT(clr->addr_type != clr->addr_type);
1125 1125 }
1126 1126 }
1127 1127
1128 1128 /*
1129 1129 * This is called from nfssys() in order to clear server state
1130 1130 * for the specified client IP Address.
1131 1131 */
1132 1132 void
1133 1133 rfs4_clear_client_state(struct nfs4clrst_args *clr)
1134 1134 {
1135 1135 nfs4_srv_t *nsrv4;
1136 1136 nsrv4 = nfs4_get_srv();
1137 1137 (void) rfs4_dbe_walk(nsrv4->rfs4_client_tab, rfs4_client_scrub, clr);
1138 1138 }
1139 1139
1140 1140 /*
1141 1141 * Used to initialize the NFSv4 server's state or database. All of
1142 1142 * the tables are created and timers are set.
1143 1143 */
1144 1144 void
1145 1145 rfs4_state_g_init()
1146 1146 {
1147 1147 extern boolean_t rfs4_cpr_callb(void *, int);
1148 1148 /*
1149 1149 * Add a CPR callback so that we can update client
1150 1150 * access times to extend the lease after a suspend
1151 1151 * and resume (using the same class as rpcmod/connmgr)
1152 1152 */
1153 1153 cpr_id = callb_add(rfs4_cpr_callb, 0, CB_CL_CPR_RPC, "rfs4");
1154 1154
1155 1155 /*
1156 1156 * NFSv4 server state databases
1157 1157 *
1158 1158 * Initilized when the module is loaded and used by NFSv4 state tables.
1159 1159 * These kmem_cache free pools are used globally, the NFSv4 state
1160 1160 * tables which make use of these kmem_cache free pools are per zone.
1161 1161 *
1162 1162 * initialize the global kmem_cache free pools which will be used by
1163 1163 * the NFSv4 state tables.
1164 1164 */
1165 1165 /* CSTYLED */
1166 1166 rfs4_client_mem_cache = nfs4_init_mem_cache("Client_entry_cache", 2, sizeof (rfs4_client_t), 0);
1167 1167 /* CSTYLED */
1168 1168 rfs4_clntIP_mem_cache = nfs4_init_mem_cache("ClntIP_entry_cache", 1, sizeof (rfs4_clntip_t), 1);
1169 1169 /* CSTYLED */
1170 1170 rfs4_openown_mem_cache = nfs4_init_mem_cache("OpenOwner_entry_cache", 1, sizeof (rfs4_openowner_t), 2);
1171 1171 /* CSTYLED */
1172 1172 rfs4_openstID_mem_cache = nfs4_init_mem_cache("OpenStateID_entry_cache", 3, sizeof (rfs4_state_t), 3);
1173 1173 /* CSTYLED */
1174 1174 rfs4_lockstID_mem_cache = nfs4_init_mem_cache("LockStateID_entry_cache", 3, sizeof (rfs4_lo_state_t), 4);
1175 1175 /* CSTYLED */
1176 1176 rfs4_lockown_mem_cache = nfs4_init_mem_cache("Lockowner_entry_cache", 2, sizeof (rfs4_lockowner_t), 5);
1177 1177 /* CSTYLED */
1178 1178 rfs4_file_mem_cache = nfs4_init_mem_cache("File_entry_cache", 1, sizeof (rfs4_file_t), 6);
1179 1179 /* CSTYLED */
1180 1180 rfs4_delegstID_mem_cache = nfs4_init_mem_cache("DelegStateID_entry_cache", 2, sizeof (rfs4_deleg_state_t), 7);
1181 1181
1182 1182 rfs4_client_clrst = rfs4_clear_client_state;
1183 1183 }
1184 1184
1185 1185
1186 1186 /*
1187 1187 * Used at server shutdown to cleanup all of the NFSv4 server's structures
1188 1188 * and other state.
1189 1189 */
1190 1190 void
1191 1191 rfs4_state_g_fini()
1192 1192 {
1193 1193 int i;
1194 1194 /*
1195 1195 * Cleanup the CPR callback.
1196 1196 */
1197 1197 if (cpr_id)
1198 1198 (void) callb_delete(cpr_id);
1199 1199
1200 1200 rfs4_client_clrst = NULL;
1201 1201
1202 1202 /* free the NFSv4 state databases */
1203 1203 for (i = 0; i < RFS4_DB_MEM_CACHE_NUM; i++) {
1204 1204 kmem_cache_destroy(rfs4_db_mem_cache_table[i].r_db_mem_cache);
1205 1205 rfs4_db_mem_cache_table[i].r_db_mem_cache = NULL;
1206 1206 }
1207 1207
1208 1208 rfs4_client_mem_cache = NULL;
1209 1209 rfs4_clntIP_mem_cache = NULL;
1210 1210 rfs4_openown_mem_cache = NULL;
1211 1211 rfs4_openstID_mem_cache = NULL;
1212 1212 rfs4_lockstID_mem_cache = NULL;
1213 1213 rfs4_lockown_mem_cache = NULL;
1214 1214 rfs4_file_mem_cache = NULL;
1215 1215 rfs4_delegstID_mem_cache = NULL;
1216 1216
1217 1217 /* DSS: distributed stable storage */
1218 1218 nvlist_free(rfs4_dss_oldpaths);
1219 1219 nvlist_free(rfs4_dss_paths);
1220 1220 rfs4_dss_paths = rfs4_dss_oldpaths = NULL;
1221 1221 }
1222 1222
1223 1223 /*
1224 1224 * Used to initialize the per zone NFSv4 server's state
1225 1225 */
1226 1226 void
1227 1227 rfs4_state_zone_init(nfs4_srv_t *nsrv4)
1228 1228 {
1229 1229 time_t start_time;
1230 1230 int start_grace;
1231 1231 char *dss_path = NFS4_DSS_VAR_DIR;
1232 1232
1233 1233 /* DSS: distributed stable storage: initialise served paths list */
1234 1234 nsrv4->dss_pathlist = NULL;
1235 1235
1236 1236 /*
1237 1237 * Set the boot time. If the server
1238 1238 * has been restarted quickly and has had the opportunity to
1239 1239 * service clients, then the start_time needs to be bumped
1240 1240 * regardless. A small window but it exists...
1241 1241 */
1242 1242 start_time = gethrestime_sec();
1243 1243 if (nsrv4->rfs4_start_time < start_time)
1244 1244 nsrv4->rfs4_start_time = start_time;
1245 1245 else
1246 1246 nsrv4->rfs4_start_time++;
1247 1247
1248 1248 /*
1249 1249 * Create the first server instance, or a new one if the server has
1250 1250 * been restarted; see above comments on rfs4_start_time. Don't
1251 1251 * start its grace period; that will be done later, to maximise the
1252 1252 * clients' recovery window.
1253 1253 */
1254 1254 start_grace = 0;
1255 1255 if (curzone == global_zone && rfs4_dss_numnewpaths > 0) {
1256 1256 int i;
1257 1257 char **dss_allpaths = NULL;
1258 1258 dss_allpaths = kmem_alloc(sizeof (char *) * (rfs4_dss_numnewpaths + 1), KM_SLEEP);
1259 1259 /*
1260 1260 * Add the default path into the list of paths for saving
1261 1261 * state informantion.
1262 1262 */
1263 1263 dss_allpaths[0] = dss_path;
1264 1264 for ( i = 0; i < rfs4_dss_numnewpaths; i++) {
1265 1265 dss_allpaths[i + 1] = rfs4_dss_newpaths[i];
1266 1266 }
1267 1267 rfs4_servinst_create(nsrv4, start_grace, (rfs4_dss_numnewpaths + 1), dss_allpaths);
1268 1268 kmem_free(dss_allpaths, (sizeof (char *) * (rfs4_dss_numnewpaths + 1)));
1269 1269 } else {
1270 1270 rfs4_servinst_create(nsrv4, start_grace, 1, &dss_path);
1271 1271 }
1272 1272
1273 1273 /* reset the "first NFSv4 request" status */
1274 1274 nsrv4->seen_first_compound = 0;
1275 1275
1276 1276 mutex_enter(&nsrv4->state_lock);
1277 1277
1278 1278 /*
1279 1279 * If the server state database has already been initialized,
1280 1280 * skip it
1281 1281 */
1282 1282 if (nsrv4->nfs4_server_state != NULL) {
1283 1283 mutex_exit(&nsrv4->state_lock);
1284 1284 return;
1285 1285 }
1286 1286
1287 1287 rw_init(&nsrv4->rfs4_findclient_lock, NULL, RW_DEFAULT, NULL);
1288 1288
1289 1289 /* set the various cache timers for table creation */
1290 1290 if (nsrv4->rfs4_client_cache_time == 0)
1291 1291 nsrv4->rfs4_client_cache_time = CLIENT_CACHE_TIME;
1292 1292 if (nsrv4->rfs4_openowner_cache_time == 0)
1293 1293 nsrv4->rfs4_openowner_cache_time = OPENOWNER_CACHE_TIME;
1294 1294 if (nsrv4->rfs4_state_cache_time == 0)
1295 1295 nsrv4->rfs4_state_cache_time = STATE_CACHE_TIME;
1296 1296 if (nsrv4->rfs4_lo_state_cache_time == 0)
1297 1297 nsrv4->rfs4_lo_state_cache_time = LO_STATE_CACHE_TIME;
1298 1298 if (nsrv4->rfs4_lockowner_cache_time == 0)
1299 1299 nsrv4->rfs4_lockowner_cache_time = LOCKOWNER_CACHE_TIME;
1300 1300 if (nsrv4->rfs4_file_cache_time == 0)
1301 1301 nsrv4->rfs4_file_cache_time = FILE_CACHE_TIME;
1302 1302 if (nsrv4->rfs4_deleg_state_cache_time == 0)
1303 1303 nsrv4->rfs4_deleg_state_cache_time = DELEG_STATE_CACHE_TIME;
1304 1304
1305 1305 /* Create the overall database to hold all server state */
1306 1306 nsrv4->nfs4_server_state = rfs4_database_create(rfs4_database_debug);
1307 1307
1308 1308 /* Now create the individual tables */
1309 1309 nsrv4->rfs4_client_cache_time *= rfs4_lease_time;
1310 1310 nsrv4->rfs4_client_tab = rfs4_table_create(nsrv4->nfs4_server_state,
1311 1311 "Client",
1312 1312 nsrv4->rfs4_client_cache_time,
1313 1313 2,
1314 1314 rfs4_client_create,
1315 1315 rfs4_client_destroy,
1316 1316 rfs4_client_expiry,
1317 1317 sizeof (rfs4_client_t),
1318 1318 TABSIZE,
1319 1319 MAXTABSZ/8, 100);
1320 1320 nsrv4->rfs4_nfsclnt_idx = rfs4_index_create(nsrv4->rfs4_client_tab,
1321 1321 "nfs_client_id4", nfsclnt_hash,
1322 1322 nfsclnt_compare, nfsclnt_mkkey,
1323 1323 TRUE);
1324 1324 nsrv4->rfs4_clientid_idx = rfs4_index_create(nsrv4->rfs4_client_tab,
1325 1325 "client_id", clientid_hash,
1326 1326 clientid_compare, clientid_mkkey,
1327 1327 FALSE);
1328 1328
1329 1329 nsrv4->rfs4_clntip_cache_time = 86400 * 365; /* about a year */
1330 1330 nsrv4->rfs4_clntip_tab = rfs4_table_create(nsrv4->nfs4_server_state,
1331 1331 "ClntIP",
1332 1332 nsrv4->rfs4_clntip_cache_time,
1333 1333 1,
1334 1334 rfs4_clntip_create,
1335 1335 rfs4_clntip_destroy,
1336 1336 rfs4_clntip_expiry,
1337 1337 sizeof (rfs4_clntip_t),
1338 1338 TABSIZE,
1339 1339 MAXTABSZ, 100);
1340 1340 nsrv4->rfs4_clntip_idx = rfs4_index_create(nsrv4->rfs4_clntip_tab,
1341 1341 "client_ip", clntip_hash,
1342 1342 clntip_compare, clntip_mkkey,
1343 1343 TRUE);
1344 1344
1345 1345 nsrv4->rfs4_openowner_cache_time *= rfs4_lease_time;
1346 1346 nsrv4->rfs4_openowner_tab = rfs4_table_create(nsrv4->nfs4_server_state,
1347 1347 "OpenOwner",
1348 1348 nsrv4->rfs4_openowner_cache_time,
1349 1349 1,
1350 1350 rfs4_openowner_create,
1351 1351 rfs4_openowner_destroy,
1352 1352 rfs4_openowner_expiry,
1353 1353 sizeof (rfs4_openowner_t),
1354 1354 TABSIZE,
1355 1355 MAXTABSZ, 100);
1356 1356 nsrv4->rfs4_openowner_idx = rfs4_index_create(nsrv4->rfs4_openowner_tab,
1357 1357 "open_owner4", openowner_hash,
1358 1358 openowner_compare,
1359 1359 openowner_mkkey, TRUE);
1360 1360
1361 1361 nsrv4->rfs4_state_cache_time *= rfs4_lease_time;
1362 1362 nsrv4->rfs4_state_tab = rfs4_table_create(nsrv4->nfs4_server_state,
1363 1363 "OpenStateID",
1364 1364 nsrv4->rfs4_state_cache_time,
1365 1365 3,
1366 1366 rfs4_state_create,
1367 1367 rfs4_state_destroy,
1368 1368 rfs4_state_expiry,
1369 1369 sizeof (rfs4_state_t),
1370 1370 TABSIZE,
1371 1371 MAXTABSZ, 100);
1372 1372
1373 1373 /* CSTYLED */
1374 1374 nsrv4->rfs4_state_owner_file_idx = rfs4_index_create(nsrv4->rfs4_state_tab,
1375 1375 "Openowner-File",
1376 1376 state_owner_file_hash,
1377 1377 state_owner_file_compare,
1378 1378 state_owner_file_mkkey, TRUE);
1379 1379
1380 1380 nsrv4->rfs4_state_idx = rfs4_index_create(nsrv4->rfs4_state_tab,
1381 1381 "State-id", state_hash,
1382 1382 state_compare, state_mkkey, FALSE);
1383 1383
1384 1384 nsrv4->rfs4_state_file_idx = rfs4_index_create(nsrv4->rfs4_state_tab,
1385 1385 "File", state_file_hash,
1386 1386 state_file_compare, state_file_mkkey,
1387 1387 FALSE);
1388 1388
1389 1389 nsrv4->rfs4_lo_state_cache_time *= rfs4_lease_time;
1390 1390 nsrv4->rfs4_lo_state_tab = rfs4_table_create(nsrv4->nfs4_server_state,
1391 1391 "LockStateID",
1392 1392 nsrv4->rfs4_lo_state_cache_time,
1393 1393 2,
1394 1394 rfs4_lo_state_create,
1395 1395 rfs4_lo_state_destroy,
1396 1396 rfs4_lo_state_expiry,
1397 1397 sizeof (rfs4_lo_state_t),
1398 1398 TABSIZE,
1399 1399 MAXTABSZ, 100);
1400 1400
1401 1401 /* CSTYLED */
1402 1402 nsrv4->rfs4_lo_state_owner_idx = rfs4_index_create(nsrv4->rfs4_lo_state_tab,
1403 1403 "lockownerxstate",
1404 1404 lo_state_lo_hash,
1405 1405 lo_state_lo_compare,
1406 1406 lo_state_lo_mkkey, TRUE);
1407 1407
1408 1408 nsrv4->rfs4_lo_state_idx = rfs4_index_create(nsrv4->rfs4_lo_state_tab,
1409 1409 "State-id",
1410 1410 lo_state_hash, lo_state_compare,
1411 1411 lo_state_mkkey, FALSE);
1412 1412
1413 1413 nsrv4->rfs4_lockowner_cache_time *= rfs4_lease_time;
1414 1414
1415 1415 nsrv4->rfs4_lockowner_tab = rfs4_table_create(nsrv4->nfs4_server_state,
1416 1416 "Lockowner",
1417 1417 nsrv4->rfs4_lockowner_cache_time,
1418 1418 2,
1419 1419 rfs4_lockowner_create,
1420 1420 rfs4_lockowner_destroy,
1421 1421 rfs4_lockowner_expiry,
1422 1422 sizeof (rfs4_lockowner_t),
1423 1423 TABSIZE,
1424 1424 MAXTABSZ, 100);
1425 1425
1426 1426 nsrv4->rfs4_lockowner_idx = rfs4_index_create(nsrv4->rfs4_lockowner_tab,
1427 1427 "lock_owner4", lockowner_hash,
1428 1428 lockowner_compare,
1429 1429 lockowner_mkkey, TRUE);
1430 1430
1431 1431 /* CSTYLED */
1432 1432 nsrv4->rfs4_lockowner_pid_idx = rfs4_index_create(nsrv4->rfs4_lockowner_tab,
1433 1433 "pid", pid_hash,
1434 1434 pid_compare, pid_mkkey,
1435 1435 FALSE);
1436 1436
1437 1437 nsrv4->rfs4_file_cache_time *= rfs4_lease_time;
1438 1438 nsrv4->rfs4_file_tab = rfs4_table_create(nsrv4->nfs4_server_state,
1439 1439 "File",
1440 1440 nsrv4->rfs4_file_cache_time,
1441 1441 1,
1442 1442 rfs4_file_create,
1443 1443 rfs4_file_destroy,
1444 1444 NULL,
1445 1445 sizeof (rfs4_file_t),
1446 1446 TABSIZE,
1447 1447 MAXTABSZ, -1);
1448 1448
1449 1449 nsrv4->rfs4_file_idx = rfs4_index_create(nsrv4->rfs4_file_tab,
1450 1450 "Filehandle", file_hash,
1451 1451 file_compare, file_mkkey, TRUE);
1452 1452
1453 1453 nsrv4->rfs4_deleg_state_cache_time *= rfs4_lease_time;
1454 1454 /* CSTYLED */
1455 1455 nsrv4->rfs4_deleg_state_tab = rfs4_table_create(nsrv4->nfs4_server_state,
1456 1456 "DelegStateID",
1457 1457 nsrv4->rfs4_deleg_state_cache_time,
1458 1458 2,
1459 1459 rfs4_deleg_state_create,
1460 1460 rfs4_deleg_state_destroy,
1461 1461 rfs4_deleg_state_expiry,
1462 1462 sizeof (rfs4_deleg_state_t),
1463 1463 TABSIZE,
1464 1464 MAXTABSZ, 100);
1465 1465 nsrv4->rfs4_deleg_idx = rfs4_index_create(nsrv4->rfs4_deleg_state_tab,
1466 1466 "DelegByFileClient",
1467 1467 deleg_hash,
1468 1468 deleg_compare,
1469 1469 deleg_mkkey, TRUE);
1470 1470
1471 1471 /* CSTYLED */
1472 1472 nsrv4->rfs4_deleg_state_idx = rfs4_index_create(nsrv4->rfs4_deleg_state_tab,
1473 1473 "DelegState",
1474 1474 deleg_state_hash,
1475 1475 deleg_state_compare,
1476 1476 deleg_state_mkkey, FALSE);
1477 1477
1478 1478 mutex_exit(&nsrv4->state_lock);
1479 1479
1480 1480 /*
1481 1481 * Init the stable storage.
1482 1482 */
1483 1483 rfs4_ss_init(nsrv4);
1484 1484 }
1485 1485
1486 1486 /*
1487 1487 * Used at server shutdown to cleanup all of NFSv4 server's zone structures
1488 1488 * and state.
|
↓ open down ↓ |
1488 lines elided |
↑ open up ↑ |
1489 1489 */
1490 1490 void
1491 1491 rfs4_state_zone_fini()
1492 1492 {
1493 1493 rfs4_database_t *dbp;
1494 1494 nfs4_srv_t *nsrv4;
1495 1495 nsrv4 = nfs4_get_srv();
1496 1496
1497 1497 rfs4_set_deleg_policy(nsrv4, SRV_NEVER_DELEGATE);
1498 1498
1499 + /*
1500 + * Clean up any dangling stable storage structures BEFORE calling
1501 + * rfs4_servinst_destroy_all() so there are no dangling structures
1502 + * (i.e. the srvinsts are all cleared of danglers BEFORE they get
1503 + * freed).
1504 + */
1505 + rfs4_ss_fini(nsrv4);
1506 +
1499 1507 mutex_enter(&nsrv4->state_lock);
1500 1508
1501 1509 if (nsrv4->nfs4_server_state == NULL) {
1502 1510 mutex_exit(&nsrv4->state_lock);
1503 1511 return;
1504 1512 }
1505 1513
1506 1514 /* destroy server instances and current instance ptr */
1507 1515 rfs4_servinst_destroy_all(nsrv4);
1508 1516
1509 1517 /* reset the "first NFSv4 request" status */
|
↓ open down ↓ |
1 lines elided |
↑ open up ↑ |
1510 1518 nsrv4->seen_first_compound = 0;
1511 1519
1512 1520 dbp = nsrv4->nfs4_server_state;
1513 1521 nsrv4->nfs4_server_state = NULL;
1514 1522
1515 1523 rw_destroy(&nsrv4->rfs4_findclient_lock);
1516 1524
1517 1525 /* First stop all of the reaper threads in the database */
1518 1526 rfs4_database_shutdown(dbp);
1519 1527 /*
1520 - * XXX workaround
1521 - * Skip destrying the state database yet just in case there
1522 - * are unfinished operations depending on it.
1528 + * WARNING: There may be consumers of the rfs4 database still
1529 + * active as we destroy these. IF that's the case, consider putting
1530 + * some of their _zone_fini()-like functions into the zsd key as
1531 + * ~~SHUTDOWN~~ functions instead of ~~DESTROY~~ functions. We can
1532 + * maintain some ordering guarantees better that way.
1523 1533 */
1524 1534 /* Now destroy/release the database tables */
1525 - /* rfs4_database_destroy(dbp); */
1535 + rfs4_database_destroy(dbp);
1526 1536
1527 1537 /* Reset the cache timers for next time */
1528 1538 nsrv4->rfs4_client_cache_time = 0;
1529 1539 nsrv4->rfs4_openowner_cache_time = 0;
1530 1540 nsrv4->rfs4_state_cache_time = 0;
1531 1541 nsrv4->rfs4_lo_state_cache_time = 0;
1532 1542 nsrv4->rfs4_lockowner_cache_time = 0;
1533 1543 nsrv4->rfs4_file_cache_time = 0;
1534 1544 nsrv4->rfs4_deleg_state_cache_time = 0;
1535 1545
1536 1546 mutex_exit(&nsrv4->state_lock);
1537 -
1538 - /* clean up any dangling stable storage structures */
1539 - rfs4_ss_fini(nsrv4);
1540 1547 }
1541 1548
1542 1549 typedef union {
1543 1550 struct {
1544 1551 uint32_t start_time;
1545 1552 uint32_t c_id;
1546 1553 } impl_id;
1547 1554 clientid4 id4;
1548 1555 } cid;
1549 1556
1550 1557 static int foreign_stateid(stateid_t *id);
1551 1558 static int foreign_clientid(cid *cidp);
1552 1559 static void embed_nodeid(cid *cidp);
1553 1560
1554 1561 typedef union {
1555 1562 struct {
1556 1563 uint32_t c_id;
1557 1564 uint32_t gen_num;
1558 1565 } cv_impl;
1559 1566 verifier4 confirm_verf;
1560 1567 } scid_confirm_verf;
1561 1568
1562 1569 static uint32_t
1563 1570 clientid_hash(void *key)
1564 1571 {
1565 1572 cid *idp = key;
1566 1573
1567 1574 return (idp->impl_id.c_id);
1568 1575 }
1569 1576
1570 1577 static bool_t
1571 1578 clientid_compare(rfs4_entry_t entry, void *key)
1572 1579 {
1573 1580 rfs4_client_t *cp = (rfs4_client_t *)entry;
1574 1581 clientid4 *idp = key;
1575 1582
1576 1583 return (*idp == cp->rc_clientid);
1577 1584 }
1578 1585
1579 1586 static void *
1580 1587 clientid_mkkey(rfs4_entry_t entry)
1581 1588 {
1582 1589 rfs4_client_t *cp = (rfs4_client_t *)entry;
1583 1590
1584 1591 return (&cp->rc_clientid);
1585 1592 }
1586 1593
1587 1594 static uint32_t
1588 1595 nfsclnt_hash(void *key)
1589 1596 {
1590 1597 nfs_client_id4 *client = key;
1591 1598 int i;
1592 1599 uint32_t hash = 0;
1593 1600
1594 1601 for (i = 0; i < client->id_len; i++) {
1595 1602 hash <<= 1;
1596 1603 hash += (uint_t)client->id_val[i];
1597 1604 }
1598 1605 return (hash);
1599 1606 }
1600 1607
1601 1608
1602 1609 static bool_t
1603 1610 nfsclnt_compare(rfs4_entry_t entry, void *key)
1604 1611 {
1605 1612 rfs4_client_t *cp = (rfs4_client_t *)entry;
1606 1613 nfs_client_id4 *nfs_client = key;
1607 1614
1608 1615 if (cp->rc_nfs_client.id_len != nfs_client->id_len)
1609 1616 return (FALSE);
1610 1617
1611 1618 return (bcmp(cp->rc_nfs_client.id_val, nfs_client->id_val,
1612 1619 nfs_client->id_len) == 0);
1613 1620 }
1614 1621
1615 1622 static void *
1616 1623 nfsclnt_mkkey(rfs4_entry_t entry)
1617 1624 {
1618 1625 rfs4_client_t *cp = (rfs4_client_t *)entry;
1619 1626
1620 1627 return (&cp->rc_nfs_client);
1621 1628 }
1622 1629
1623 1630 static bool_t
1624 1631 rfs4_client_expiry(rfs4_entry_t u_entry)
1625 1632 {
1626 1633 rfs4_client_t *cp = (rfs4_client_t *)u_entry;
1627 1634 bool_t cp_expired;
1628 1635
1629 1636 if (rfs4_dbe_is_invalid(cp->rc_dbe)) {
1630 1637 cp->rc_ss_remove = 1;
1631 1638 return (TRUE);
1632 1639 }
1633 1640 /*
1634 1641 * If the sysadmin has used clear_locks for this
1635 1642 * entry then forced_expire will be set and we
1636 1643 * want this entry to be reaped. Or the entry
1637 1644 * has exceeded its lease period.
1638 1645 */
1639 1646 cp_expired = (cp->rc_forced_expire ||
1640 1647 (gethrestime_sec() - cp->rc_last_access
1641 1648 > rfs4_lease_time));
1642 1649
1643 1650 if (!cp->rc_ss_remove && cp_expired)
1644 1651 cp->rc_ss_remove = 1;
1645 1652 return (cp_expired);
1646 1653 }
1647 1654
1648 1655 /*
1649 1656 * Remove the leaf file from all distributed stable storage paths.
1650 1657 */
1651 1658 static void
1652 1659 rfs4_dss_remove_cpleaf(rfs4_client_t *cp)
1653 1660 {
1654 1661 nfs4_srv_t *nsrv4;
1655 1662 rfs4_servinst_t *sip;
1656 1663 char *leaf = cp->rc_ss_pn->leaf;
1657 1664
1658 1665 /*
1659 1666 * since the state files are written to all DSS
1660 1667 * paths we must remove this leaf file instance
1661 1668 * from all server instances.
1662 1669 */
1663 1670
1664 1671 nsrv4 = nfs4_get_srv();
1665 1672 mutex_enter(&nsrv4->servinst_lock);
1666 1673 for (sip = nsrv4->nfs4_cur_servinst; sip != NULL; sip = sip->prev) {
1667 1674 /* remove the leaf file associated with this server instance */
1668 1675 rfs4_dss_remove_leaf(sip, NFS4_DSS_STATE_LEAF, leaf);
1669 1676 }
1670 1677 mutex_exit(&nsrv4->servinst_lock);
1671 1678 }
1672 1679
1673 1680 static void
1674 1681 rfs4_dss_remove_leaf(rfs4_servinst_t *sip, char *dir_leaf, char *leaf)
1675 1682 {
1676 1683 int i, npaths = sip->dss_npaths;
1677 1684
1678 1685 for (i = 0; i < npaths; i++) {
1679 1686 rfs4_dss_path_t *dss_path = sip->dss_paths[i];
1680 1687 char *path, *dir;
1681 1688 size_t pathlen;
1682 1689
1683 1690 /* the HA-NFSv4 path might have been failed-over away from us */
1684 1691 if (dss_path == NULL)
1685 1692 continue;
1686 1693
1687 1694 dir = dss_path->path;
1688 1695
1689 1696 /* allow 3 extra bytes for two '/' & a NUL */
1690 1697 pathlen = strlen(dir) + strlen(dir_leaf) + strlen(leaf) + 3;
1691 1698 path = kmem_alloc(pathlen, KM_SLEEP);
1692 1699 (void) sprintf(path, "%s/%s/%s", dir, dir_leaf, leaf);
1693 1700
1694 1701 (void) vn_remove(path, UIO_SYSSPACE, RMFILE);
1695 1702
1696 1703 kmem_free(path, pathlen);
1697 1704 }
1698 1705 }
1699 1706
1700 1707 static void
1701 1708 rfs4_client_destroy(rfs4_entry_t u_entry)
1702 1709 {
1703 1710 rfs4_client_t *cp = (rfs4_client_t *)u_entry;
1704 1711
1705 1712 mutex_destroy(cp->rc_cbinfo.cb_lock);
1706 1713 cv_destroy(cp->rc_cbinfo.cb_cv);
1707 1714 cv_destroy(cp->rc_cbinfo.cb_cv_nullcaller);
1708 1715 list_destroy(&cp->rc_openownerlist);
1709 1716
1710 1717 /* free callback info */
1711 1718 rfs4_cbinfo_free(&cp->rc_cbinfo);
1712 1719
1713 1720 if (cp->rc_cp_confirmed)
1714 1721 rfs4_client_rele(cp->rc_cp_confirmed);
1715 1722
1716 1723 if (cp->rc_ss_pn) {
1717 1724 /* check if the stable storage files need to be removed */
1718 1725 if (cp->rc_ss_remove)
1719 1726 rfs4_dss_remove_cpleaf(cp);
1720 1727 rfs4_ss_pnfree(cp->rc_ss_pn);
1721 1728 }
1722 1729
1723 1730 /* Free the client supplied client id */
1724 1731 kmem_free(cp->rc_nfs_client.id_val, cp->rc_nfs_client.id_len);
1725 1732
1726 1733 if (cp->rc_sysidt != LM_NOSYSID)
1727 1734 lm_free_sysidt(cp->rc_sysidt);
1728 1735 }
1729 1736
1730 1737 static bool_t
1731 1738 rfs4_client_create(rfs4_entry_t u_entry, void *arg)
1732 1739 {
1733 1740 rfs4_client_t *cp = (rfs4_client_t *)u_entry;
1734 1741 nfs_client_id4 *client = (nfs_client_id4 *)arg;
1735 1742 struct sockaddr *ca;
1736 1743 cid *cidp;
1737 1744 scid_confirm_verf *scvp;
1738 1745 nfs4_srv_t *nsrv4;
1739 1746
1740 1747 nsrv4 = nfs4_get_srv();
1741 1748
1742 1749 /* Get a clientid to give to the client */
1743 1750 cidp = (cid *)&cp->rc_clientid;
1744 1751 cidp->impl_id.start_time = nsrv4->rfs4_start_time;
1745 1752 cidp->impl_id.c_id = (uint32_t)rfs4_dbe_getid(cp->rc_dbe);
1746 1753
1747 1754 /* If we are booted as a cluster node, embed our nodeid */
1748 1755 if (cluster_bootflags & CLUSTER_BOOTED)
1749 1756 embed_nodeid(cidp);
1750 1757
1751 1758 /* Allocate and copy client's client id value */
1752 1759 cp->rc_nfs_client.id_val = kmem_alloc(client->id_len, KM_SLEEP);
1753 1760 cp->rc_nfs_client.id_len = client->id_len;
1754 1761 bcopy(client->id_val, cp->rc_nfs_client.id_val, client->id_len);
1755 1762 cp->rc_nfs_client.verifier = client->verifier;
1756 1763
1757 1764 /* Copy client's IP address */
1758 1765 ca = client->cl_addr;
1759 1766 if (ca->sa_family == AF_INET)
1760 1767 bcopy(ca, &cp->rc_addr, sizeof (struct sockaddr_in));
1761 1768 else if (ca->sa_family == AF_INET6)
1762 1769 bcopy(ca, &cp->rc_addr, sizeof (struct sockaddr_in6));
1763 1770 cp->rc_nfs_client.cl_addr = (struct sockaddr *)&cp->rc_addr;
1764 1771
1765 1772 /* Init the value for the SETCLIENTID_CONFIRM verifier */
1766 1773 scvp = (scid_confirm_verf *)&cp->rc_confirm_verf;
1767 1774 scvp->cv_impl.c_id = cidp->impl_id.c_id;
1768 1775 scvp->cv_impl.gen_num = 0;
1769 1776
1770 1777 /* An F_UNLKSYS has been done for this client */
1771 1778 cp->rc_unlksys_completed = FALSE;
1772 1779
1773 1780 /* We need the client to ack us */
1774 1781 cp->rc_need_confirm = TRUE;
1775 1782 cp->rc_cp_confirmed = NULL;
1776 1783
1777 1784 /* TRUE all the time until the callback path actually fails */
1778 1785 cp->rc_cbinfo.cb_notified_of_cb_path_down = TRUE;
1779 1786
1780 1787 /* Initialize the access time to now */
1781 1788 cp->rc_last_access = gethrestime_sec();
1782 1789
1783 1790 cp->rc_cr_set = NULL;
1784 1791
1785 1792 cp->rc_sysidt = LM_NOSYSID;
1786 1793
1787 1794 list_create(&cp->rc_openownerlist, sizeof (rfs4_openowner_t),
1788 1795 offsetof(rfs4_openowner_t, ro_node));
1789 1796
1790 1797 /* set up the callback control structure */
1791 1798 cp->rc_cbinfo.cb_state = CB_UNINIT;
1792 1799 mutex_init(cp->rc_cbinfo.cb_lock, NULL, MUTEX_DEFAULT, NULL);
1793 1800 cv_init(cp->rc_cbinfo.cb_cv, NULL, CV_DEFAULT, NULL);
1794 1801 cv_init(cp->rc_cbinfo.cb_cv_nullcaller, NULL, CV_DEFAULT, NULL);
1795 1802
1796 1803 /*
1797 1804 * Associate the client_t with the current server instance.
1798 1805 * The hold is solely to satisfy the calling requirement of
1799 1806 * rfs4_servinst_assign(). In this case it's not strictly necessary.
1800 1807 */
1801 1808 rfs4_dbe_hold(cp->rc_dbe);
1802 1809 rfs4_servinst_assign(nsrv4, cp, nsrv4->nfs4_cur_servinst);
1803 1810 rfs4_dbe_rele(cp->rc_dbe);
1804 1811
1805 1812 return (TRUE);
1806 1813 }
1807 1814
1808 1815 /*
1809 1816 * Caller wants to generate/update the setclientid_confirm verifier
1810 1817 * associated with a client. This is done during the SETCLIENTID
1811 1818 * processing.
1812 1819 */
1813 1820 void
1814 1821 rfs4_client_scv_next(rfs4_client_t *cp)
1815 1822 {
1816 1823 scid_confirm_verf *scvp;
1817 1824
1818 1825 /* Init the value for the SETCLIENTID_CONFIRM verifier */
1819 1826 scvp = (scid_confirm_verf *)&cp->rc_confirm_verf;
1820 1827 scvp->cv_impl.gen_num++;
1821 1828 }
1822 1829
1823 1830 void
1824 1831 rfs4_client_rele(rfs4_client_t *cp)
1825 1832 {
1826 1833 rfs4_dbe_rele(cp->rc_dbe);
1827 1834 }
1828 1835
1829 1836 rfs4_client_t *
1830 1837 rfs4_findclient(nfs_client_id4 *client, bool_t *create, rfs4_client_t *oldcp)
1831 1838 {
1832 1839 rfs4_client_t *cp;
1833 1840 nfs4_srv_t *nsrv4;
1834 1841 nsrv4 = nfs4_get_srv();
1835 1842
1836 1843
1837 1844 if (oldcp) {
1838 1845 rw_enter(&nsrv4->rfs4_findclient_lock, RW_WRITER);
1839 1846 rfs4_dbe_hide(oldcp->rc_dbe);
1840 1847 } else {
1841 1848 rw_enter(&nsrv4->rfs4_findclient_lock, RW_READER);
1842 1849 }
1843 1850
1844 1851 cp = (rfs4_client_t *)rfs4_dbsearch(nsrv4->rfs4_nfsclnt_idx, client,
1845 1852 create, (void *)client, RFS4_DBS_VALID);
1846 1853
1847 1854 if (oldcp)
1848 1855 rfs4_dbe_unhide(oldcp->rc_dbe);
1849 1856
1850 1857 rw_exit(&nsrv4->rfs4_findclient_lock);
1851 1858
1852 1859 return (cp);
1853 1860 }
1854 1861
1855 1862 rfs4_client_t *
1856 1863 rfs4_findclient_by_id(clientid4 clientid, bool_t find_unconfirmed)
1857 1864 {
1858 1865 rfs4_client_t *cp;
1859 1866 bool_t create = FALSE;
1860 1867 cid *cidp = (cid *)&clientid;
1861 1868 nfs4_srv_t *nsrv4 = nfs4_get_srv();
1862 1869
1863 1870 /* If we're a cluster and the nodeid isn't right, short-circuit */
1864 1871 if (cluster_bootflags & CLUSTER_BOOTED && foreign_clientid(cidp))
1865 1872 return (NULL);
1866 1873
1867 1874 rw_enter(&nsrv4->rfs4_findclient_lock, RW_READER);
1868 1875
1869 1876 cp = (rfs4_client_t *)rfs4_dbsearch(nsrv4->rfs4_clientid_idx, &clientid,
1870 1877 &create, NULL, RFS4_DBS_VALID);
1871 1878
1872 1879 rw_exit(&nsrv4->rfs4_findclient_lock);
1873 1880
1874 1881 if (cp && cp->rc_need_confirm && find_unconfirmed == FALSE) {
1875 1882 rfs4_client_rele(cp);
1876 1883 return (NULL);
1877 1884 } else {
1878 1885 return (cp);
1879 1886 }
1880 1887 }
1881 1888
1882 1889 static uint32_t
1883 1890 clntip_hash(void *key)
1884 1891 {
1885 1892 struct sockaddr *addr = key;
1886 1893 int i, len = 0;
1887 1894 uint32_t hash = 0;
1888 1895 char *ptr;
1889 1896
1890 1897 if (addr->sa_family == AF_INET) {
1891 1898 struct sockaddr_in *a = (struct sockaddr_in *)addr;
1892 1899 len = sizeof (struct in_addr);
1893 1900 ptr = (char *)&a->sin_addr;
1894 1901 } else if (addr->sa_family == AF_INET6) {
1895 1902 struct sockaddr_in6 *a = (struct sockaddr_in6 *)addr;
1896 1903 len = sizeof (struct in6_addr);
1897 1904 ptr = (char *)&a->sin6_addr;
1898 1905 } else
1899 1906 return (0);
1900 1907
1901 1908 for (i = 0; i < len; i++) {
1902 1909 hash <<= 1;
1903 1910 hash += (uint_t)ptr[i];
1904 1911 }
1905 1912 return (hash);
1906 1913 }
1907 1914
1908 1915 static bool_t
1909 1916 clntip_compare(rfs4_entry_t entry, void *key)
1910 1917 {
1911 1918 rfs4_clntip_t *cp = (rfs4_clntip_t *)entry;
1912 1919 struct sockaddr *addr = key;
1913 1920 int len = 0;
1914 1921 char *p1, *p2;
1915 1922
1916 1923 if (addr->sa_family == AF_INET) {
1917 1924 struct sockaddr_in *a1 = (struct sockaddr_in *)&cp->ri_addr;
1918 1925 struct sockaddr_in *a2 = (struct sockaddr_in *)addr;
1919 1926 len = sizeof (struct in_addr);
1920 1927 p1 = (char *)&a1->sin_addr;
1921 1928 p2 = (char *)&a2->sin_addr;
1922 1929 } else if (addr->sa_family == AF_INET6) {
1923 1930 struct sockaddr_in6 *a1 = (struct sockaddr_in6 *)&cp->ri_addr;
1924 1931 struct sockaddr_in6 *a2 = (struct sockaddr_in6 *)addr;
1925 1932 len = sizeof (struct in6_addr);
1926 1933 p1 = (char *)&a1->sin6_addr;
1927 1934 p2 = (char *)&a2->sin6_addr;
1928 1935 } else
1929 1936 return (0);
1930 1937
1931 1938 return (bcmp(p1, p2, len) == 0);
1932 1939 }
1933 1940
1934 1941 static void *
1935 1942 clntip_mkkey(rfs4_entry_t entry)
1936 1943 {
1937 1944 rfs4_clntip_t *cp = (rfs4_clntip_t *)entry;
1938 1945
1939 1946 return (&cp->ri_addr);
1940 1947 }
1941 1948
1942 1949 static bool_t
1943 1950 rfs4_clntip_expiry(rfs4_entry_t u_entry)
1944 1951 {
1945 1952 rfs4_clntip_t *cp = (rfs4_clntip_t *)u_entry;
1946 1953
1947 1954 if (rfs4_dbe_is_invalid(cp->ri_dbe))
1948 1955 return (TRUE);
1949 1956 return (FALSE);
1950 1957 }
1951 1958
1952 1959 /* ARGSUSED */
1953 1960 static void
1954 1961 rfs4_clntip_destroy(rfs4_entry_t u_entry)
1955 1962 {
1956 1963 }
1957 1964
1958 1965 static bool_t
1959 1966 rfs4_clntip_create(rfs4_entry_t u_entry, void *arg)
1960 1967 {
1961 1968 rfs4_clntip_t *cp = (rfs4_clntip_t *)u_entry;
1962 1969 struct sockaddr *ca = (struct sockaddr *)arg;
1963 1970
1964 1971 /* Copy client's IP address */
1965 1972 if (ca->sa_family == AF_INET)
1966 1973 bcopy(ca, &cp->ri_addr, sizeof (struct sockaddr_in));
1967 1974 else if (ca->sa_family == AF_INET6)
1968 1975 bcopy(ca, &cp->ri_addr, sizeof (struct sockaddr_in6));
1969 1976 else
1970 1977 return (FALSE);
1971 1978 cp->ri_no_referrals = 1;
1972 1979
1973 1980 return (TRUE);
1974 1981 }
1975 1982
1976 1983 rfs4_clntip_t *
1977 1984 rfs4_find_clntip(struct sockaddr *addr, bool_t *create)
1978 1985 {
1979 1986 rfs4_clntip_t *cp;
1980 1987 nfs4_srv_t *nsrv4;
1981 1988
1982 1989 nsrv4 = nfs4_get_srv();
1983 1990
1984 1991 rw_enter(&nsrv4->rfs4_findclient_lock, RW_READER);
1985 1992
1986 1993 cp = (rfs4_clntip_t *)rfs4_dbsearch(nsrv4->rfs4_clntip_idx, addr,
1987 1994 create, addr, RFS4_DBS_VALID);
1988 1995
1989 1996 rw_exit(&nsrv4->rfs4_findclient_lock);
1990 1997
1991 1998 return (cp);
1992 1999 }
1993 2000
1994 2001 void
1995 2002 rfs4_invalidate_clntip(struct sockaddr *addr)
1996 2003 {
1997 2004 rfs4_clntip_t *cp;
1998 2005 bool_t create = FALSE;
1999 2006 nfs4_srv_t *nsrv4 = nfs4_get_srv();
2000 2007
2001 2008 rw_enter(&nsrv4->rfs4_findclient_lock, RW_READER);
2002 2009
2003 2010 cp = (rfs4_clntip_t *)rfs4_dbsearch(nsrv4->rfs4_clntip_idx, addr,
2004 2011 &create, NULL, RFS4_DBS_VALID);
2005 2012 if (cp == NULL) {
2006 2013 rw_exit(&nsrv4->rfs4_findclient_lock);
2007 2014 return;
2008 2015 }
2009 2016 rfs4_dbe_invalidate(cp->ri_dbe);
2010 2017 rfs4_dbe_rele(cp->ri_dbe);
2011 2018
2012 2019 rw_exit(&nsrv4->rfs4_findclient_lock);
2013 2020 }
2014 2021
2015 2022 bool_t
2016 2023 rfs4_lease_expired(rfs4_client_t *cp)
2017 2024 {
2018 2025 bool_t rc;
2019 2026
2020 2027 rfs4_dbe_lock(cp->rc_dbe);
2021 2028
2022 2029 /*
2023 2030 * If the admin has executed clear_locks for this
2024 2031 * client id, force expire will be set, so no need
2025 2032 * to calculate anything because it's "outa here".
2026 2033 */
2027 2034 if (cp->rc_forced_expire) {
2028 2035 rc = TRUE;
2029 2036 } else {
2030 2037 rc = (gethrestime_sec() - cp->rc_last_access > rfs4_lease_time);
2031 2038 }
2032 2039
2033 2040 /*
2034 2041 * If the lease has expired we will also want
2035 2042 * to remove any stable storage state data. So
2036 2043 * mark the client id accordingly.
2037 2044 */
2038 2045 if (!cp->rc_ss_remove)
2039 2046 cp->rc_ss_remove = (rc == TRUE);
2040 2047
2041 2048 rfs4_dbe_unlock(cp->rc_dbe);
2042 2049
2043 2050 return (rc);
2044 2051 }
2045 2052
2046 2053 void
2047 2054 rfs4_update_lease(rfs4_client_t *cp)
2048 2055 {
2049 2056 rfs4_dbe_lock(cp->rc_dbe);
2050 2057 if (!cp->rc_forced_expire)
2051 2058 cp->rc_last_access = gethrestime_sec();
2052 2059 rfs4_dbe_unlock(cp->rc_dbe);
2053 2060 }
2054 2061
2055 2062
2056 2063 static bool_t
2057 2064 EQOPENOWNER(open_owner4 *a, open_owner4 *b)
2058 2065 {
2059 2066 bool_t rc;
2060 2067
2061 2068 if (a->clientid != b->clientid)
2062 2069 return (FALSE);
2063 2070
2064 2071 if (a->owner_len != b->owner_len)
2065 2072 return (FALSE);
2066 2073
2067 2074 rc = (bcmp(a->owner_val, b->owner_val, a->owner_len) == 0);
2068 2075
2069 2076 return (rc);
2070 2077 }
2071 2078
2072 2079 static uint_t
2073 2080 openowner_hash(void *key)
2074 2081 {
2075 2082 int i;
2076 2083 open_owner4 *openowner = key;
2077 2084 uint_t hash = 0;
2078 2085
2079 2086 for (i = 0; i < openowner->owner_len; i++) {
2080 2087 hash <<= 4;
2081 2088 hash += (uint_t)openowner->owner_val[i];
2082 2089 }
2083 2090 hash += (uint_t)openowner->clientid;
2084 2091 hash |= (openowner->clientid >> 32);
2085 2092
2086 2093 return (hash);
2087 2094 }
2088 2095
2089 2096 static bool_t
2090 2097 openowner_compare(rfs4_entry_t u_entry, void *key)
2091 2098 {
2092 2099 rfs4_openowner_t *oo = (rfs4_openowner_t *)u_entry;
2093 2100 open_owner4 *arg = key;
2094 2101
2095 2102 return (EQOPENOWNER(&oo->ro_owner, arg));
2096 2103 }
2097 2104
2098 2105 void *
2099 2106 openowner_mkkey(rfs4_entry_t u_entry)
2100 2107 {
2101 2108 rfs4_openowner_t *oo = (rfs4_openowner_t *)u_entry;
2102 2109
2103 2110 return (&oo->ro_owner);
2104 2111 }
2105 2112
2106 2113 /* ARGSUSED */
2107 2114 static bool_t
2108 2115 rfs4_openowner_expiry(rfs4_entry_t u_entry)
2109 2116 {
2110 2117 /* openstateid held us and did all needed delay */
2111 2118 return (TRUE);
2112 2119 }
2113 2120
2114 2121 static void
2115 2122 rfs4_openowner_destroy(rfs4_entry_t u_entry)
2116 2123 {
2117 2124 rfs4_openowner_t *oo = (rfs4_openowner_t *)u_entry;
2118 2125
2119 2126 /* Remove open owner from client's lists of open owners */
2120 2127 rfs4_dbe_lock(oo->ro_client->rc_dbe);
2121 2128 list_remove(&oo->ro_client->rc_openownerlist, oo);
2122 2129 rfs4_dbe_unlock(oo->ro_client->rc_dbe);
2123 2130
2124 2131 /* One less reference to the client */
2125 2132 rfs4_client_rele(oo->ro_client);
2126 2133 oo->ro_client = NULL;
2127 2134
2128 2135 /* Free the last reply for this lock owner */
2129 2136 rfs4_free_reply(&oo->ro_reply);
2130 2137
2131 2138 if (oo->ro_reply_fh.nfs_fh4_val) {
2132 2139 kmem_free(oo->ro_reply_fh.nfs_fh4_val,
2133 2140 oo->ro_reply_fh.nfs_fh4_len);
2134 2141 oo->ro_reply_fh.nfs_fh4_val = NULL;
2135 2142 oo->ro_reply_fh.nfs_fh4_len = 0;
2136 2143 }
2137 2144
2138 2145 rfs4_sw_destroy(&oo->ro_sw);
2139 2146 list_destroy(&oo->ro_statelist);
2140 2147
2141 2148 /* Free the lock owner id */
2142 2149 kmem_free(oo->ro_owner.owner_val, oo->ro_owner.owner_len);
2143 2150 }
2144 2151
2145 2152 void
2146 2153 rfs4_openowner_rele(rfs4_openowner_t *oo)
2147 2154 {
2148 2155 rfs4_dbe_rele(oo->ro_dbe);
2149 2156 }
2150 2157
2151 2158 static bool_t
2152 2159 rfs4_openowner_create(rfs4_entry_t u_entry, void *arg)
2153 2160 {
2154 2161 rfs4_openowner_t *oo = (rfs4_openowner_t *)u_entry;
2155 2162 rfs4_openowner_t *argp = (rfs4_openowner_t *)arg;
2156 2163 open_owner4 *openowner = &argp->ro_owner;
2157 2164 seqid4 seqid = argp->ro_open_seqid;
2158 2165 rfs4_client_t *cp;
2159 2166 bool_t create = FALSE;
2160 2167 nfs4_srv_t *nsrv4 = nfs4_get_srv();
2161 2168
2162 2169 rw_enter(&nsrv4->rfs4_findclient_lock, RW_READER);
2163 2170
2164 2171 cp = (rfs4_client_t *)rfs4_dbsearch(nsrv4->rfs4_clientid_idx,
2165 2172 &openowner->clientid,
2166 2173 &create, NULL, RFS4_DBS_VALID);
2167 2174
2168 2175 rw_exit(&nsrv4->rfs4_findclient_lock);
2169 2176
2170 2177 if (cp == NULL)
2171 2178 return (FALSE);
2172 2179
2173 2180 oo->ro_reply_fh.nfs_fh4_len = 0;
2174 2181 oo->ro_reply_fh.nfs_fh4_val = NULL;
2175 2182
2176 2183 oo->ro_owner.clientid = openowner->clientid;
2177 2184 oo->ro_owner.owner_val =
2178 2185 kmem_alloc(openowner->owner_len, KM_SLEEP);
2179 2186
2180 2187 bcopy(openowner->owner_val,
2181 2188 oo->ro_owner.owner_val, openowner->owner_len);
2182 2189
2183 2190 oo->ro_owner.owner_len = openowner->owner_len;
2184 2191
2185 2192 oo->ro_need_confirm = TRUE;
2186 2193
2187 2194 rfs4_sw_init(&oo->ro_sw);
2188 2195
2189 2196 oo->ro_open_seqid = seqid;
2190 2197 bzero(&oo->ro_reply, sizeof (nfs_resop4));
2191 2198 oo->ro_client = cp;
2192 2199 oo->ro_cr_set = NULL;
2193 2200
2194 2201 list_create(&oo->ro_statelist, sizeof (rfs4_state_t),
2195 2202 offsetof(rfs4_state_t, rs_node));
2196 2203
2197 2204 /* Insert openowner into client's open owner list */
2198 2205 rfs4_dbe_lock(cp->rc_dbe);
2199 2206 list_insert_tail(&cp->rc_openownerlist, oo);
2200 2207 rfs4_dbe_unlock(cp->rc_dbe);
2201 2208
2202 2209 return (TRUE);
2203 2210 }
2204 2211
2205 2212 rfs4_openowner_t *
2206 2213 rfs4_findopenowner(open_owner4 *openowner, bool_t *create, seqid4 seqid)
2207 2214 {
2208 2215 rfs4_openowner_t *oo;
2209 2216 rfs4_openowner_t arg;
2210 2217 nfs4_srv_t *nsrv4 = nfs4_get_srv();
2211 2218
2212 2219 arg.ro_owner = *openowner;
2213 2220 arg.ro_open_seqid = seqid;
2214 2221 /* CSTYLED */
2215 2222 oo = (rfs4_openowner_t *)rfs4_dbsearch(nsrv4->rfs4_openowner_idx, openowner,
2216 2223 create, &arg, RFS4_DBS_VALID);
2217 2224
2218 2225 return (oo);
2219 2226 }
2220 2227
2221 2228 void
2222 2229 rfs4_update_open_sequence(rfs4_openowner_t *oo)
2223 2230 {
2224 2231
2225 2232 rfs4_dbe_lock(oo->ro_dbe);
2226 2233
2227 2234 oo->ro_open_seqid++;
2228 2235
2229 2236 rfs4_dbe_unlock(oo->ro_dbe);
2230 2237 }
2231 2238
2232 2239 void
2233 2240 rfs4_update_open_resp(rfs4_openowner_t *oo, nfs_resop4 *resp, nfs_fh4 *fh)
2234 2241 {
2235 2242
2236 2243 rfs4_dbe_lock(oo->ro_dbe);
2237 2244
2238 2245 rfs4_free_reply(&oo->ro_reply);
2239 2246
2240 2247 rfs4_copy_reply(&oo->ro_reply, resp);
2241 2248
2242 2249 /* Save the filehandle if provided and free if not used */
2243 2250 if (resp->nfs_resop4_u.opopen.status == NFS4_OK &&
2244 2251 fh && fh->nfs_fh4_len) {
2245 2252 if (oo->ro_reply_fh.nfs_fh4_val == NULL)
2246 2253 oo->ro_reply_fh.nfs_fh4_val =
2247 2254 kmem_alloc(fh->nfs_fh4_len, KM_SLEEP);
2248 2255 nfs_fh4_copy(fh, &oo->ro_reply_fh);
2249 2256 } else {
2250 2257 if (oo->ro_reply_fh.nfs_fh4_val) {
2251 2258 kmem_free(oo->ro_reply_fh.nfs_fh4_val,
2252 2259 oo->ro_reply_fh.nfs_fh4_len);
2253 2260 oo->ro_reply_fh.nfs_fh4_val = NULL;
2254 2261 oo->ro_reply_fh.nfs_fh4_len = 0;
2255 2262 }
2256 2263 }
2257 2264
2258 2265 rfs4_dbe_unlock(oo->ro_dbe);
2259 2266 }
2260 2267
2261 2268 static bool_t
2262 2269 lockowner_compare(rfs4_entry_t u_entry, void *key)
2263 2270 {
2264 2271 rfs4_lockowner_t *lo = (rfs4_lockowner_t *)u_entry;
2265 2272 lock_owner4 *b = (lock_owner4 *)key;
2266 2273
2267 2274 if (lo->rl_owner.clientid != b->clientid)
2268 2275 return (FALSE);
2269 2276
2270 2277 if (lo->rl_owner.owner_len != b->owner_len)
2271 2278 return (FALSE);
2272 2279
2273 2280 return (bcmp(lo->rl_owner.owner_val, b->owner_val,
2274 2281 lo->rl_owner.owner_len) == 0);
2275 2282 }
2276 2283
2277 2284 void *
2278 2285 lockowner_mkkey(rfs4_entry_t u_entry)
2279 2286 {
2280 2287 rfs4_lockowner_t *lo = (rfs4_lockowner_t *)u_entry;
2281 2288
2282 2289 return (&lo->rl_owner);
2283 2290 }
2284 2291
2285 2292 static uint32_t
2286 2293 lockowner_hash(void *key)
2287 2294 {
2288 2295 int i;
2289 2296 lock_owner4 *lockowner = key;
2290 2297 uint_t hash = 0;
2291 2298
2292 2299 for (i = 0; i < lockowner->owner_len; i++) {
2293 2300 hash <<= 4;
2294 2301 hash += (uint_t)lockowner->owner_val[i];
2295 2302 }
2296 2303 hash += (uint_t)lockowner->clientid;
2297 2304 hash |= (lockowner->clientid >> 32);
2298 2305
2299 2306 return (hash);
2300 2307 }
2301 2308
2302 2309 static uint32_t
2303 2310 pid_hash(void *key)
2304 2311 {
2305 2312 return ((uint32_t)(uintptr_t)key);
2306 2313 }
2307 2314
2308 2315 static void *
2309 2316 pid_mkkey(rfs4_entry_t u_entry)
2310 2317 {
2311 2318 rfs4_lockowner_t *lo = (rfs4_lockowner_t *)u_entry;
2312 2319
2313 2320 return ((void *)(uintptr_t)lo->rl_pid);
2314 2321 }
2315 2322
2316 2323 static bool_t
2317 2324 pid_compare(rfs4_entry_t u_entry, void *key)
2318 2325 {
2319 2326 rfs4_lockowner_t *lo = (rfs4_lockowner_t *)u_entry;
2320 2327
2321 2328 return (lo->rl_pid == (pid_t)(uintptr_t)key);
2322 2329 }
2323 2330
2324 2331 static void
2325 2332 rfs4_lockowner_destroy(rfs4_entry_t u_entry)
2326 2333 {
2327 2334 rfs4_lockowner_t *lo = (rfs4_lockowner_t *)u_entry;
2328 2335
2329 2336 /* Free the lock owner id */
2330 2337 kmem_free(lo->rl_owner.owner_val, lo->rl_owner.owner_len);
2331 2338 rfs4_client_rele(lo->rl_client);
2332 2339 }
2333 2340
2334 2341 void
2335 2342 rfs4_lockowner_rele(rfs4_lockowner_t *lo)
2336 2343 {
2337 2344 rfs4_dbe_rele(lo->rl_dbe);
2338 2345 }
2339 2346
2340 2347 /* ARGSUSED */
2341 2348 static bool_t
2342 2349 rfs4_lockowner_expiry(rfs4_entry_t u_entry)
2343 2350 {
2344 2351 /*
2345 2352 * Since expiry is called with no other references on
2346 2353 * this struct, go ahead and have it removed.
2347 2354 */
2348 2355 return (TRUE);
2349 2356 }
2350 2357
2351 2358 static bool_t
2352 2359 rfs4_lockowner_create(rfs4_entry_t u_entry, void *arg)
2353 2360 {
2354 2361 rfs4_lockowner_t *lo = (rfs4_lockowner_t *)u_entry;
2355 2362 lock_owner4 *lockowner = (lock_owner4 *)arg;
2356 2363 rfs4_client_t *cp;
2357 2364 bool_t create = FALSE;
2358 2365 nfs4_srv_t *nsrv4 = nfs4_get_srv();
2359 2366
2360 2367 rw_enter(&nsrv4->rfs4_findclient_lock, RW_READER);
2361 2368
2362 2369 cp = (rfs4_client_t *)rfs4_dbsearch(nsrv4->rfs4_clientid_idx,
2363 2370 &lockowner->clientid,
2364 2371 &create, NULL, RFS4_DBS_VALID);
2365 2372
2366 2373 rw_exit(&nsrv4->rfs4_findclient_lock);
2367 2374
2368 2375 if (cp == NULL)
2369 2376 return (FALSE);
2370 2377
2371 2378 /* Reference client */
2372 2379 lo->rl_client = cp;
2373 2380 lo->rl_owner.clientid = lockowner->clientid;
2374 2381 lo->rl_owner.owner_val = kmem_alloc(lockowner->owner_len, KM_SLEEP);
2375 2382 bcopy(lockowner->owner_val, lo->rl_owner.owner_val,
2376 2383 lockowner->owner_len);
2377 2384 lo->rl_owner.owner_len = lockowner->owner_len;
2378 2385 lo->rl_pid = rfs4_dbe_getid(lo->rl_dbe);
2379 2386
2380 2387 return (TRUE);
2381 2388 }
2382 2389
2383 2390 rfs4_lockowner_t *
2384 2391 rfs4_findlockowner(lock_owner4 *lockowner, bool_t *create)
2385 2392 {
2386 2393 rfs4_lockowner_t *lo;
2387 2394 nfs4_srv_t *nsrv4 = nfs4_get_srv();
2388 2395
2389 2396 /* CSTYLED */
2390 2397 lo = (rfs4_lockowner_t *)rfs4_dbsearch(nsrv4->rfs4_lockowner_idx, lockowner,
2391 2398 create, lockowner, RFS4_DBS_VALID);
2392 2399
2393 2400 return (lo);
2394 2401 }
2395 2402
2396 2403 rfs4_lockowner_t *
2397 2404 rfs4_findlockowner_by_pid(pid_t pid)
2398 2405 {
2399 2406 rfs4_lockowner_t *lo;
2400 2407 bool_t create = FALSE;
2401 2408 nfs4_srv_t *nsrv4 = nfs4_get_srv();
2402 2409
2403 2410 lo = (rfs4_lockowner_t *)rfs4_dbsearch(nsrv4->rfs4_lockowner_pid_idx,
2404 2411 (void *)(uintptr_t)pid, &create, NULL, RFS4_DBS_VALID);
2405 2412
2406 2413 return (lo);
2407 2414 }
2408 2415
2409 2416
2410 2417 static uint32_t
2411 2418 file_hash(void *key)
2412 2419 {
2413 2420 return (ADDRHASH(key));
2414 2421 }
2415 2422
2416 2423 static void *
2417 2424 file_mkkey(rfs4_entry_t u_entry)
2418 2425 {
2419 2426 rfs4_file_t *fp = (rfs4_file_t *)u_entry;
2420 2427
2421 2428 return (fp->rf_vp);
2422 2429 }
2423 2430
2424 2431 static bool_t
2425 2432 file_compare(rfs4_entry_t u_entry, void *key)
2426 2433 {
2427 2434 rfs4_file_t *fp = (rfs4_file_t *)u_entry;
2428 2435
2429 2436 return (fp->rf_vp == (vnode_t *)key);
2430 2437 }
2431 2438
2432 2439 static void
2433 2440 rfs4_file_destroy(rfs4_entry_t u_entry)
2434 2441 {
2435 2442 rfs4_file_t *fp = (rfs4_file_t *)u_entry;
2436 2443
2437 2444 list_destroy(&fp->rf_delegstatelist);
2438 2445
2439 2446 if (fp->rf_filehandle.nfs_fh4_val)
2440 2447 kmem_free(fp->rf_filehandle.nfs_fh4_val,
2441 2448 fp->rf_filehandle.nfs_fh4_len);
2442 2449 cv_destroy(fp->rf_dinfo.rd_recall_cv);
2443 2450 if (fp->rf_vp) {
2444 2451 vnode_t *vp = fp->rf_vp;
2445 2452
2446 2453 mutex_enter(&vp->v_vsd_lock);
2447 2454 (void) vsd_set(vp, nfs4_srv_vkey, NULL);
2448 2455 mutex_exit(&vp->v_vsd_lock);
2449 2456 VN_RELE(vp);
2450 2457 fp->rf_vp = NULL;
2451 2458 }
2452 2459 rw_destroy(&fp->rf_file_rwlock);
2453 2460 }
2454 2461
2455 2462 /*
2456 2463 * Used to unlock the underlying dbe struct only
2457 2464 */
2458 2465 void
2459 2466 rfs4_file_rele(rfs4_file_t *fp)
2460 2467 {
2461 2468 rfs4_dbe_rele(fp->rf_dbe);
2462 2469 }
2463 2470
2464 2471 typedef struct {
2465 2472 vnode_t *vp;
2466 2473 nfs_fh4 *fh;
2467 2474 } rfs4_fcreate_arg;
2468 2475
2469 2476 static bool_t
2470 2477 rfs4_file_create(rfs4_entry_t u_entry, void *arg)
2471 2478 {
2472 2479 rfs4_file_t *fp = (rfs4_file_t *)u_entry;
2473 2480 rfs4_fcreate_arg *ap = (rfs4_fcreate_arg *)arg;
2474 2481 vnode_t *vp = ap->vp;
2475 2482 nfs_fh4 *fh = ap->fh;
2476 2483
2477 2484 VN_HOLD(vp);
2478 2485
2479 2486 fp->rf_filehandle.nfs_fh4_len = 0;
2480 2487 fp->rf_filehandle.nfs_fh4_val = NULL;
2481 2488 ASSERT(fh && fh->nfs_fh4_len);
2482 2489 if (fh && fh->nfs_fh4_len) {
2483 2490 fp->rf_filehandle.nfs_fh4_val =
2484 2491 kmem_alloc(fh->nfs_fh4_len, KM_SLEEP);
2485 2492 nfs_fh4_copy(fh, &fp->rf_filehandle);
2486 2493 }
2487 2494 fp->rf_vp = vp;
2488 2495
2489 2496 list_create(&fp->rf_delegstatelist, sizeof (rfs4_deleg_state_t),
2490 2497 offsetof(rfs4_deleg_state_t, rds_node));
2491 2498
2492 2499 fp->rf_share_deny = fp->rf_share_access = fp->rf_access_read = 0;
2493 2500 fp->rf_access_write = fp->rf_deny_read = fp->rf_deny_write = 0;
2494 2501
2495 2502 mutex_init(fp->rf_dinfo.rd_recall_lock, NULL, MUTEX_DEFAULT, NULL);
2496 2503 cv_init(fp->rf_dinfo.rd_recall_cv, NULL, CV_DEFAULT, NULL);
2497 2504
2498 2505 fp->rf_dinfo.rd_dtype = OPEN_DELEGATE_NONE;
2499 2506
2500 2507 rw_init(&fp->rf_file_rwlock, NULL, RW_DEFAULT, NULL);
2501 2508
2502 2509 mutex_enter(&vp->v_vsd_lock);
2503 2510 VERIFY(vsd_set(vp, nfs4_srv_vkey, (void *)fp) == 0);
2504 2511 mutex_exit(&vp->v_vsd_lock);
2505 2512
2506 2513 return (TRUE);
2507 2514 }
2508 2515
2509 2516 rfs4_file_t *
2510 2517 rfs4_findfile(vnode_t *vp, nfs_fh4 *fh, bool_t *create)
2511 2518 {
2512 2519 rfs4_file_t *fp;
2513 2520 rfs4_fcreate_arg arg;
2514 2521 nfs4_srv_t *nsrv4 = nfs4_get_srv();
2515 2522
2516 2523 arg.vp = vp;
2517 2524 arg.fh = fh;
2518 2525
2519 2526 if (*create == TRUE)
2520 2527 /* CSTYLED */
2521 2528 fp = (rfs4_file_t *)rfs4_dbsearch(nsrv4->rfs4_file_idx, vp, create,
2522 2529 &arg, RFS4_DBS_VALID);
2523 2530 else {
2524 2531 mutex_enter(&vp->v_vsd_lock);
2525 2532 fp = (rfs4_file_t *)vsd_get(vp, nfs4_srv_vkey);
2526 2533 if (fp) {
2527 2534 rfs4_dbe_lock(fp->rf_dbe);
2528 2535 if (rfs4_dbe_is_invalid(fp->rf_dbe) ||
2529 2536 (rfs4_dbe_refcnt(fp->rf_dbe) == 0)) {
2530 2537 rfs4_dbe_unlock(fp->rf_dbe);
2531 2538 fp = NULL;
2532 2539 } else {
2533 2540 rfs4_dbe_hold(fp->rf_dbe);
2534 2541 rfs4_dbe_unlock(fp->rf_dbe);
2535 2542 }
2536 2543 }
2537 2544 mutex_exit(&vp->v_vsd_lock);
2538 2545 }
2539 2546 return (fp);
2540 2547 }
2541 2548
2542 2549 /*
2543 2550 * Find a file in the db and once it is located, take the rw lock.
2544 2551 * Need to check the vnode pointer and if it does not exist (it was
2545 2552 * removed between the db location and check) redo the find. This
2546 2553 * assumes that a file struct that has a NULL vnode pointer is marked
2547 2554 * at 'invalid' and will not be found in the db the second time
2548 2555 * around.
2549 2556 */
2550 2557 rfs4_file_t *
2551 2558 rfs4_findfile_withlock(vnode_t *vp, nfs_fh4 *fh, bool_t *create)
2552 2559 {
2553 2560 rfs4_file_t *fp;
2554 2561 rfs4_fcreate_arg arg;
2555 2562 bool_t screate = *create;
2556 2563 nfs4_srv_t *nsrv4 = nfs4_get_srv();
2557 2564
2558 2565 if (screate == FALSE) {
2559 2566 mutex_enter(&vp->v_vsd_lock);
2560 2567 fp = (rfs4_file_t *)vsd_get(vp, nfs4_srv_vkey);
2561 2568 if (fp) {
2562 2569 rfs4_dbe_lock(fp->rf_dbe);
2563 2570 if (rfs4_dbe_is_invalid(fp->rf_dbe) ||
2564 2571 (rfs4_dbe_refcnt(fp->rf_dbe) == 0)) {
2565 2572 rfs4_dbe_unlock(fp->rf_dbe);
2566 2573 mutex_exit(&vp->v_vsd_lock);
2567 2574 fp = NULL;
2568 2575 } else {
2569 2576 rfs4_dbe_hold(fp->rf_dbe);
2570 2577 rfs4_dbe_unlock(fp->rf_dbe);
2571 2578 mutex_exit(&vp->v_vsd_lock);
2572 2579 rw_enter(&fp->rf_file_rwlock, RW_WRITER);
2573 2580 if (fp->rf_vp == NULL) {
2574 2581 rw_exit(&fp->rf_file_rwlock);
2575 2582 rfs4_file_rele(fp);
2576 2583 fp = NULL;
2577 2584 }
2578 2585 }
2579 2586 } else {
2580 2587 mutex_exit(&vp->v_vsd_lock);
2581 2588 }
2582 2589 } else {
2583 2590 retry:
2584 2591 arg.vp = vp;
2585 2592 arg.fh = fh;
2586 2593
2587 2594 fp = (rfs4_file_t *)rfs4_dbsearch(nsrv4->rfs4_file_idx, vp,
2588 2595 create, &arg, RFS4_DBS_VALID);
2589 2596 if (fp != NULL) {
2590 2597 rw_enter(&fp->rf_file_rwlock, RW_WRITER);
2591 2598 if (fp->rf_vp == NULL) {
2592 2599 rw_exit(&fp->rf_file_rwlock);
2593 2600 rfs4_file_rele(fp);
2594 2601 *create = screate;
2595 2602 goto retry;
2596 2603 }
2597 2604 }
2598 2605 }
2599 2606
2600 2607 return (fp);
2601 2608 }
2602 2609
2603 2610 static uint32_t
2604 2611 lo_state_hash(void *key)
2605 2612 {
2606 2613 stateid_t *id = key;
2607 2614
2608 2615 return (id->bits.ident+id->bits.pid);
2609 2616 }
2610 2617
2611 2618 static bool_t
2612 2619 lo_state_compare(rfs4_entry_t u_entry, void *key)
2613 2620 {
2614 2621 rfs4_lo_state_t *lsp = (rfs4_lo_state_t *)u_entry;
2615 2622 stateid_t *id = key;
2616 2623 bool_t rc;
2617 2624
2618 2625 rc = (lsp->rls_lockid.bits.boottime == id->bits.boottime &&
2619 2626 lsp->rls_lockid.bits.type == id->bits.type &&
2620 2627 lsp->rls_lockid.bits.ident == id->bits.ident &&
2621 2628 lsp->rls_lockid.bits.pid == id->bits.pid);
2622 2629
2623 2630 return (rc);
2624 2631 }
2625 2632
2626 2633 static void *
2627 2634 lo_state_mkkey(rfs4_entry_t u_entry)
2628 2635 {
2629 2636 rfs4_lo_state_t *lsp = (rfs4_lo_state_t *)u_entry;
2630 2637
2631 2638 return (&lsp->rls_lockid);
2632 2639 }
2633 2640
2634 2641 static bool_t
2635 2642 rfs4_lo_state_expiry(rfs4_entry_t u_entry)
2636 2643 {
2637 2644 rfs4_lo_state_t *lsp = (rfs4_lo_state_t *)u_entry;
2638 2645
2639 2646 if (rfs4_dbe_is_invalid(lsp->rls_dbe))
2640 2647 return (TRUE);
2641 2648 if (lsp->rls_state->rs_closed)
2642 2649 return (TRUE);
2643 2650 return ((gethrestime_sec() -
2644 2651 lsp->rls_state->rs_owner->ro_client->rc_last_access
2645 2652 > rfs4_lease_time));
2646 2653 }
2647 2654
2648 2655 static void
2649 2656 rfs4_lo_state_destroy(rfs4_entry_t u_entry)
2650 2657 {
2651 2658 rfs4_lo_state_t *lsp = (rfs4_lo_state_t *)u_entry;
2652 2659
2653 2660 rfs4_dbe_lock(lsp->rls_state->rs_dbe);
2654 2661 list_remove(&lsp->rls_state->rs_lostatelist, lsp);
2655 2662 rfs4_dbe_unlock(lsp->rls_state->rs_dbe);
2656 2663
2657 2664 rfs4_sw_destroy(&lsp->rls_sw);
2658 2665
2659 2666 /* Make sure to release the file locks */
2660 2667 if (lsp->rls_locks_cleaned == FALSE) {
2661 2668 lsp->rls_locks_cleaned = TRUE;
2662 2669 if (lsp->rls_locker->rl_client->rc_sysidt != LM_NOSYSID) {
2663 2670 /* Is the PxFS kernel module loaded? */
2664 2671 if (lm_remove_file_locks != NULL) {
2665 2672 int new_sysid;
2666 2673
2667 2674 /* Encode the cluster nodeid in new sysid */
2668 2675 new_sysid =
2669 2676 lsp->rls_locker->rl_client->rc_sysidt;
2670 2677 lm_set_nlmid_flk(&new_sysid);
2671 2678
2672 2679 /*
2673 2680 * This PxFS routine removes file locks for a
2674 2681 * client over all nodes of a cluster.
2675 2682 */
2676 2683 DTRACE_PROBE1(nfss_i_clust_rm_lck,
2677 2684 int, new_sysid);
2678 2685 (*lm_remove_file_locks)(new_sysid);
2679 2686 } else {
2680 2687 (void) cleanlocks(
2681 2688 lsp->rls_state->rs_finfo->rf_vp,
2682 2689 lsp->rls_locker->rl_pid,
2683 2690 lsp->rls_locker->rl_client->rc_sysidt);
2684 2691 }
2685 2692 }
2686 2693 }
2687 2694
2688 2695 /* Free the last reply for this state */
2689 2696 rfs4_free_reply(&lsp->rls_reply);
2690 2697
2691 2698 rfs4_lockowner_rele(lsp->rls_locker);
2692 2699 lsp->rls_locker = NULL;
2693 2700
2694 2701 rfs4_state_rele_nounlock(lsp->rls_state);
2695 2702 lsp->rls_state = NULL;
2696 2703 }
2697 2704
2698 2705 static bool_t
2699 2706 rfs4_lo_state_create(rfs4_entry_t u_entry, void *arg)
2700 2707 {
2701 2708 rfs4_lo_state_t *lsp = (rfs4_lo_state_t *)u_entry;
2702 2709 rfs4_lo_state_t *argp = (rfs4_lo_state_t *)arg;
2703 2710 rfs4_lockowner_t *lo = argp->rls_locker;
2704 2711 rfs4_state_t *sp = argp->rls_state;
2705 2712
2706 2713 lsp->rls_state = sp;
2707 2714
2708 2715 lsp->rls_lockid = sp->rs_stateid;
2709 2716 lsp->rls_lockid.bits.type = LOCKID;
2710 2717 lsp->rls_lockid.bits.chgseq = 0;
2711 2718 lsp->rls_lockid.bits.pid = lo->rl_pid;
2712 2719
2713 2720 lsp->rls_locks_cleaned = FALSE;
2714 2721 lsp->rls_lock_completed = FALSE;
2715 2722
2716 2723 rfs4_sw_init(&lsp->rls_sw);
2717 2724
2718 2725 /* Attached the supplied lock owner */
2719 2726 rfs4_dbe_hold(lo->rl_dbe);
2720 2727 lsp->rls_locker = lo;
2721 2728
2722 2729 rfs4_dbe_lock(sp->rs_dbe);
2723 2730 list_insert_tail(&sp->rs_lostatelist, lsp);
2724 2731 rfs4_dbe_hold(sp->rs_dbe);
2725 2732 rfs4_dbe_unlock(sp->rs_dbe);
2726 2733
2727 2734 return (TRUE);
2728 2735 }
2729 2736
2730 2737 void
2731 2738 rfs4_lo_state_rele(rfs4_lo_state_t *lsp, bool_t unlock_fp)
2732 2739 {
2733 2740 if (unlock_fp == TRUE)
2734 2741 rw_exit(&lsp->rls_state->rs_finfo->rf_file_rwlock);
2735 2742 rfs4_dbe_rele(lsp->rls_dbe);
2736 2743 }
2737 2744
2738 2745 static rfs4_lo_state_t *
2739 2746 rfs4_findlo_state(stateid_t *id, bool_t lock_fp)
2740 2747 {
2741 2748 rfs4_lo_state_t *lsp;
2742 2749 bool_t create = FALSE;
2743 2750 nfs4_srv_t *nsrv4 = nfs4_get_srv();
2744 2751
2745 2752 lsp = (rfs4_lo_state_t *)rfs4_dbsearch(nsrv4->rfs4_lo_state_idx, id,
2746 2753 &create, NULL, RFS4_DBS_VALID);
2747 2754 if (lock_fp == TRUE && lsp != NULL)
2748 2755 rw_enter(&lsp->rls_state->rs_finfo->rf_file_rwlock, RW_READER);
2749 2756
2750 2757 return (lsp);
2751 2758 }
2752 2759
2753 2760
2754 2761 static uint32_t
2755 2762 lo_state_lo_hash(void *key)
2756 2763 {
2757 2764 rfs4_lo_state_t *lsp = key;
2758 2765
2759 2766 return (ADDRHASH(lsp->rls_locker) ^ ADDRHASH(lsp->rls_state));
2760 2767 }
2761 2768
2762 2769 static bool_t
2763 2770 lo_state_lo_compare(rfs4_entry_t u_entry, void *key)
2764 2771 {
2765 2772 rfs4_lo_state_t *lsp = (rfs4_lo_state_t *)u_entry;
2766 2773 rfs4_lo_state_t *keyp = key;
2767 2774
2768 2775 return (keyp->rls_locker == lsp->rls_locker &&
2769 2776 keyp->rls_state == lsp->rls_state);
2770 2777 }
2771 2778
2772 2779 static void *
2773 2780 lo_state_lo_mkkey(rfs4_entry_t u_entry)
2774 2781 {
2775 2782 return (u_entry);
2776 2783 }
2777 2784
2778 2785 rfs4_lo_state_t *
2779 2786 rfs4_findlo_state_by_owner(rfs4_lockowner_t *lo, rfs4_state_t *sp,
2780 2787 bool_t *create)
2781 2788 {
2782 2789 rfs4_lo_state_t *lsp;
2783 2790 rfs4_lo_state_t arg;
2784 2791 nfs4_srv_t *nsrv4 = nfs4_get_srv();
2785 2792
2786 2793 arg.rls_locker = lo;
2787 2794 arg.rls_state = sp;
2788 2795
2789 2796 lsp = (rfs4_lo_state_t *)rfs4_dbsearch(nsrv4->rfs4_lo_state_owner_idx,
2790 2797 &arg, create, &arg, RFS4_DBS_VALID);
2791 2798
2792 2799 return (lsp);
2793 2800 }
2794 2801
2795 2802 static stateid_t
2796 2803 get_stateid(id_t eid)
2797 2804 {
2798 2805 stateid_t id;
2799 2806 nfs4_srv_t *nsrv4;
2800 2807
2801 2808 nsrv4 = nfs4_get_srv();
2802 2809
2803 2810 id.bits.boottime = nsrv4->rfs4_start_time;
2804 2811 id.bits.ident = eid;
2805 2812 id.bits.chgseq = 0;
2806 2813 id.bits.type = 0;
2807 2814 id.bits.pid = 0;
2808 2815
2809 2816 /*
2810 2817 * If we are booted as a cluster node, embed our nodeid.
2811 2818 * We've already done sanity checks in rfs4_client_create() so no
2812 2819 * need to repeat them here.
2813 2820 */
2814 2821 id.bits.clnodeid = (cluster_bootflags & CLUSTER_BOOTED) ?
2815 2822 clconf_get_nodeid() : 0;
2816 2823
2817 2824 return (id);
2818 2825 }
2819 2826
2820 2827 /*
2821 2828 * For use only when booted as a cluster node.
2822 2829 * Returns TRUE if the embedded nodeid indicates that this stateid was
2823 2830 * generated on another node.
2824 2831 */
2825 2832 static int
2826 2833 foreign_stateid(stateid_t *id)
2827 2834 {
2828 2835 ASSERT(cluster_bootflags & CLUSTER_BOOTED);
2829 2836 return (id->bits.clnodeid != (uint32_t)clconf_get_nodeid());
2830 2837 }
2831 2838
2832 2839 /*
2833 2840 * For use only when booted as a cluster node.
2834 2841 * Returns TRUE if the embedded nodeid indicates that this clientid was
2835 2842 * generated on another node.
2836 2843 */
2837 2844 static int
2838 2845 foreign_clientid(cid *cidp)
2839 2846 {
2840 2847 ASSERT(cluster_bootflags & CLUSTER_BOOTED);
2841 2848 return (cidp->impl_id.c_id >> CLUSTER_NODEID_SHIFT !=
2842 2849 (uint32_t)clconf_get_nodeid());
2843 2850 }
2844 2851
2845 2852 /*
2846 2853 * For use only when booted as a cluster node.
2847 2854 * Embed our cluster nodeid into the clientid.
2848 2855 */
2849 2856 static void
2850 2857 embed_nodeid(cid *cidp)
2851 2858 {
2852 2859 int clnodeid;
2853 2860 /*
2854 2861 * Currently, our state tables are small enough that their
2855 2862 * ids will leave enough bits free for the nodeid. If the
2856 2863 * tables become larger, we mustn't overwrite the id.
2857 2864 * Equally, we only have room for so many bits of nodeid, so
2858 2865 * must check that too.
2859 2866 */
2860 2867 ASSERT(cluster_bootflags & CLUSTER_BOOTED);
2861 2868 ASSERT(cidp->impl_id.c_id >> CLUSTER_NODEID_SHIFT == 0);
2862 2869 clnodeid = clconf_get_nodeid();
2863 2870 ASSERT(clnodeid <= CLUSTER_MAX_NODEID);
2864 2871 ASSERT(clnodeid != NODEID_UNKNOWN);
2865 2872 cidp->impl_id.c_id |= (clnodeid << CLUSTER_NODEID_SHIFT);
2866 2873 }
2867 2874
2868 2875 static uint32_t
2869 2876 state_hash(void *key)
2870 2877 {
2871 2878 stateid_t *ip = (stateid_t *)key;
2872 2879
2873 2880 return (ip->bits.ident);
2874 2881 }
2875 2882
2876 2883 static bool_t
2877 2884 state_compare(rfs4_entry_t u_entry, void *key)
2878 2885 {
2879 2886 rfs4_state_t *sp = (rfs4_state_t *)u_entry;
2880 2887 stateid_t *id = (stateid_t *)key;
2881 2888 bool_t rc;
2882 2889
2883 2890 rc = (sp->rs_stateid.bits.boottime == id->bits.boottime &&
2884 2891 sp->rs_stateid.bits.ident == id->bits.ident);
2885 2892
2886 2893 return (rc);
2887 2894 }
2888 2895
2889 2896 static void *
2890 2897 state_mkkey(rfs4_entry_t u_entry)
2891 2898 {
2892 2899 rfs4_state_t *sp = (rfs4_state_t *)u_entry;
2893 2900
2894 2901 return (&sp->rs_stateid);
2895 2902 }
2896 2903
2897 2904 static void
2898 2905 rfs4_state_destroy(rfs4_entry_t u_entry)
2899 2906 {
2900 2907 rfs4_state_t *sp = (rfs4_state_t *)u_entry;
2901 2908
2902 2909 /* remove from openowner list */
2903 2910 rfs4_dbe_lock(sp->rs_owner->ro_dbe);
2904 2911 list_remove(&sp->rs_owner->ro_statelist, sp);
2905 2912 rfs4_dbe_unlock(sp->rs_owner->ro_dbe);
2906 2913
2907 2914 list_destroy(&sp->rs_lostatelist);
2908 2915
2909 2916 /* release any share locks for this stateid if it's still open */
2910 2917 if (!sp->rs_closed) {
2911 2918 rfs4_dbe_lock(sp->rs_dbe);
2912 2919 (void) rfs4_unshare(sp);
2913 2920 rfs4_dbe_unlock(sp->rs_dbe);
2914 2921 }
2915 2922
2916 2923 /* Were done with the file */
2917 2924 rfs4_file_rele(sp->rs_finfo);
2918 2925 sp->rs_finfo = NULL;
2919 2926
2920 2927 /* And now with the openowner */
2921 2928 rfs4_openowner_rele(sp->rs_owner);
2922 2929 sp->rs_owner = NULL;
2923 2930 }
2924 2931
2925 2932 static void
2926 2933 rfs4_state_rele_nounlock(rfs4_state_t *sp)
2927 2934 {
2928 2935 rfs4_dbe_rele(sp->rs_dbe);
2929 2936 }
2930 2937
2931 2938 void
2932 2939 rfs4_state_rele(rfs4_state_t *sp)
2933 2940 {
2934 2941 rw_exit(&sp->rs_finfo->rf_file_rwlock);
2935 2942 rfs4_dbe_rele(sp->rs_dbe);
2936 2943 }
2937 2944
2938 2945 static uint32_t
2939 2946 deleg_hash(void *key)
2940 2947 {
2941 2948 rfs4_deleg_state_t *dsp = (rfs4_deleg_state_t *)key;
2942 2949
2943 2950 return (ADDRHASH(dsp->rds_client) ^ ADDRHASH(dsp->rds_finfo));
2944 2951 }
2945 2952
2946 2953 static bool_t
2947 2954 deleg_compare(rfs4_entry_t u_entry, void *key)
2948 2955 {
2949 2956 rfs4_deleg_state_t *dsp = (rfs4_deleg_state_t *)u_entry;
2950 2957 rfs4_deleg_state_t *kdsp = (rfs4_deleg_state_t *)key;
2951 2958
2952 2959 return (dsp->rds_client == kdsp->rds_client &&
2953 2960 dsp->rds_finfo == kdsp->rds_finfo);
2954 2961 }
2955 2962
2956 2963 static void *
2957 2964 deleg_mkkey(rfs4_entry_t u_entry)
2958 2965 {
2959 2966 return (u_entry);
2960 2967 }
2961 2968
2962 2969 static uint32_t
2963 2970 deleg_state_hash(void *key)
2964 2971 {
2965 2972 stateid_t *ip = (stateid_t *)key;
2966 2973
2967 2974 return (ip->bits.ident);
2968 2975 }
2969 2976
2970 2977 static bool_t
2971 2978 deleg_state_compare(rfs4_entry_t u_entry, void *key)
2972 2979 {
2973 2980 rfs4_deleg_state_t *dsp = (rfs4_deleg_state_t *)u_entry;
2974 2981 stateid_t *id = (stateid_t *)key;
2975 2982 bool_t rc;
2976 2983
2977 2984 if (id->bits.type != DELEGID)
2978 2985 return (FALSE);
2979 2986
2980 2987 rc = (dsp->rds_delegid.bits.boottime == id->bits.boottime &&
2981 2988 dsp->rds_delegid.bits.ident == id->bits.ident);
2982 2989
2983 2990 return (rc);
2984 2991 }
2985 2992
2986 2993 static void *
2987 2994 deleg_state_mkkey(rfs4_entry_t u_entry)
2988 2995 {
2989 2996 rfs4_deleg_state_t *dsp = (rfs4_deleg_state_t *)u_entry;
2990 2997
2991 2998 return (&dsp->rds_delegid);
2992 2999 }
2993 3000
2994 3001 static bool_t
2995 3002 rfs4_deleg_state_expiry(rfs4_entry_t u_entry)
2996 3003 {
2997 3004 rfs4_deleg_state_t *dsp = (rfs4_deleg_state_t *)u_entry;
2998 3005
2999 3006 if (rfs4_dbe_is_invalid(dsp->rds_dbe))
3000 3007 return (TRUE);
3001 3008
3002 3009 if (dsp->rds_dtype == OPEN_DELEGATE_NONE)
3003 3010 return (TRUE);
3004 3011
3005 3012 if ((gethrestime_sec() - dsp->rds_client->rc_last_access
3006 3013 > rfs4_lease_time)) {
3007 3014 rfs4_dbe_invalidate(dsp->rds_dbe);
3008 3015 return (TRUE);
3009 3016 }
3010 3017
3011 3018 return (FALSE);
3012 3019 }
3013 3020
3014 3021 static bool_t
3015 3022 rfs4_deleg_state_create(rfs4_entry_t u_entry, void *argp)
3016 3023 {
3017 3024 rfs4_deleg_state_t *dsp = (rfs4_deleg_state_t *)u_entry;
3018 3025 rfs4_file_t *fp = ((rfs4_deleg_state_t *)argp)->rds_finfo;
3019 3026 rfs4_client_t *cp = ((rfs4_deleg_state_t *)argp)->rds_client;
3020 3027
3021 3028 rfs4_dbe_hold(fp->rf_dbe);
3022 3029 rfs4_dbe_hold(cp->rc_dbe);
3023 3030
3024 3031 dsp->rds_delegid = get_stateid(rfs4_dbe_getid(dsp->rds_dbe));
3025 3032 dsp->rds_delegid.bits.type = DELEGID;
3026 3033 dsp->rds_finfo = fp;
3027 3034 dsp->rds_client = cp;
3028 3035 dsp->rds_dtype = OPEN_DELEGATE_NONE;
3029 3036
3030 3037 dsp->rds_time_granted = gethrestime_sec(); /* observability */
3031 3038 dsp->rds_time_revoked = 0;
3032 3039
3033 3040 list_link_init(&dsp->rds_node);
3034 3041
3035 3042 return (TRUE);
3036 3043 }
3037 3044
3038 3045 static void
3039 3046 rfs4_deleg_state_destroy(rfs4_entry_t u_entry)
3040 3047 {
3041 3048 rfs4_deleg_state_t *dsp = (rfs4_deleg_state_t *)u_entry;
3042 3049
3043 3050 /* return delegation if necessary */
3044 3051 rfs4_return_deleg(dsp, FALSE);
3045 3052
3046 3053 /* Were done with the file */
3047 3054 rfs4_file_rele(dsp->rds_finfo);
3048 3055 dsp->rds_finfo = NULL;
3049 3056
3050 3057 /* And now with the openowner */
3051 3058 rfs4_client_rele(dsp->rds_client);
3052 3059 dsp->rds_client = NULL;
3053 3060 }
3054 3061
3055 3062 rfs4_deleg_state_t *
3056 3063 rfs4_finddeleg(rfs4_state_t *sp, bool_t *create)
3057 3064 {
3058 3065 rfs4_deleg_state_t ds, *dsp;
3059 3066 nfs4_srv_t *nsrv4 = nfs4_get_srv();
3060 3067
3061 3068 ds.rds_client = sp->rs_owner->ro_client;
3062 3069 ds.rds_finfo = sp->rs_finfo;
3063 3070
3064 3071 dsp = (rfs4_deleg_state_t *)rfs4_dbsearch(nsrv4->rfs4_deleg_idx, &ds,
3065 3072 create, &ds, RFS4_DBS_VALID);
3066 3073
3067 3074 return (dsp);
3068 3075 }
3069 3076
3070 3077 rfs4_deleg_state_t *
3071 3078 rfs4_finddelegstate(stateid_t *id)
3072 3079 {
3073 3080 rfs4_deleg_state_t *dsp;
3074 3081 bool_t create = FALSE;
3075 3082 nfs4_srv_t *nsrv4 = nfs4_get_srv();
3076 3083
3077 3084 dsp = (rfs4_deleg_state_t *)rfs4_dbsearch(nsrv4->rfs4_deleg_state_idx,
3078 3085 id, &create, NULL, RFS4_DBS_VALID);
3079 3086
3080 3087 return (dsp);
3081 3088 }
3082 3089
3083 3090 void
3084 3091 rfs4_deleg_state_rele(rfs4_deleg_state_t *dsp)
3085 3092 {
3086 3093 rfs4_dbe_rele(dsp->rds_dbe);
3087 3094 }
3088 3095
3089 3096 void
3090 3097 rfs4_update_lock_sequence(rfs4_lo_state_t *lsp)
3091 3098 {
3092 3099
3093 3100 rfs4_dbe_lock(lsp->rls_dbe);
3094 3101
3095 3102 /*
3096 3103 * If we are skipping sequence id checking, this means that
3097 3104 * this is the first lock request and therefore the sequence
3098 3105 * id does not need to be updated. This only happens on the
3099 3106 * first lock request for a lockowner
3100 3107 */
3101 3108 if (!lsp->rls_skip_seqid_check)
3102 3109 lsp->rls_seqid++;
3103 3110
3104 3111 rfs4_dbe_unlock(lsp->rls_dbe);
3105 3112 }
3106 3113
3107 3114 void
3108 3115 rfs4_update_lock_resp(rfs4_lo_state_t *lsp, nfs_resop4 *resp)
3109 3116 {
3110 3117
3111 3118 rfs4_dbe_lock(lsp->rls_dbe);
3112 3119
3113 3120 rfs4_free_reply(&lsp->rls_reply);
3114 3121
3115 3122 rfs4_copy_reply(&lsp->rls_reply, resp);
3116 3123
3117 3124 rfs4_dbe_unlock(lsp->rls_dbe);
3118 3125 }
3119 3126
3120 3127 void
3121 3128 rfs4_free_opens(rfs4_openowner_t *oo, bool_t invalidate,
3122 3129 bool_t close_of_client)
3123 3130 {
3124 3131 rfs4_state_t *sp;
3125 3132
3126 3133 rfs4_dbe_lock(oo->ro_dbe);
3127 3134
3128 3135 for (sp = list_head(&oo->ro_statelist); sp != NULL;
3129 3136 sp = list_next(&oo->ro_statelist, sp)) {
3130 3137 rfs4_state_close(sp, FALSE, close_of_client, CRED());
3131 3138 if (invalidate == TRUE)
3132 3139 rfs4_dbe_invalidate(sp->rs_dbe);
3133 3140 }
3134 3141
3135 3142 rfs4_dbe_invalidate(oo->ro_dbe);
3136 3143 rfs4_dbe_unlock(oo->ro_dbe);
3137 3144 }
3138 3145
3139 3146 static uint32_t
3140 3147 state_owner_file_hash(void *key)
3141 3148 {
3142 3149 rfs4_state_t *sp = key;
3143 3150
3144 3151 return (ADDRHASH(sp->rs_owner) ^ ADDRHASH(sp->rs_finfo));
3145 3152 }
3146 3153
3147 3154 static bool_t
3148 3155 state_owner_file_compare(rfs4_entry_t u_entry, void *key)
3149 3156 {
3150 3157 rfs4_state_t *sp = (rfs4_state_t *)u_entry;
3151 3158 rfs4_state_t *arg = key;
3152 3159
3153 3160 if (sp->rs_closed == TRUE)
3154 3161 return (FALSE);
3155 3162
3156 3163 return (arg->rs_owner == sp->rs_owner && arg->rs_finfo == sp->rs_finfo);
3157 3164 }
3158 3165
3159 3166 static void *
3160 3167 state_owner_file_mkkey(rfs4_entry_t u_entry)
3161 3168 {
3162 3169 return (u_entry);
3163 3170 }
3164 3171
3165 3172 static uint32_t
3166 3173 state_file_hash(void *key)
3167 3174 {
3168 3175 return (ADDRHASH(key));
3169 3176 }
3170 3177
3171 3178 static bool_t
3172 3179 state_file_compare(rfs4_entry_t u_entry, void *key)
3173 3180 {
3174 3181 rfs4_state_t *sp = (rfs4_state_t *)u_entry;
3175 3182 rfs4_file_t *fp = key;
3176 3183
3177 3184 if (sp->rs_closed == TRUE)
3178 3185 return (FALSE);
3179 3186
3180 3187 return (fp == sp->rs_finfo);
3181 3188 }
3182 3189
3183 3190 static void *
3184 3191 state_file_mkkey(rfs4_entry_t u_entry)
3185 3192 {
3186 3193 rfs4_state_t *sp = (rfs4_state_t *)u_entry;
3187 3194
3188 3195 return (sp->rs_finfo);
3189 3196 }
3190 3197
3191 3198 rfs4_state_t *
3192 3199 rfs4_findstate_by_owner_file(rfs4_openowner_t *oo, rfs4_file_t *fp,
3193 3200 bool_t *create)
3194 3201 {
3195 3202 rfs4_state_t *sp;
3196 3203 rfs4_state_t key;
3197 3204 nfs4_srv_t *nsrv4 = nfs4_get_srv();
3198 3205
3199 3206 key.rs_owner = oo;
3200 3207 key.rs_finfo = fp;
3201 3208
3202 3209 sp = (rfs4_state_t *)rfs4_dbsearch(nsrv4->rfs4_state_owner_file_idx,
3203 3210 &key, create, &key, RFS4_DBS_VALID);
3204 3211
3205 3212 return (sp);
3206 3213 }
3207 3214
3208 3215 /* This returns ANY state struct that refers to this file */
3209 3216 static rfs4_state_t *
3210 3217 rfs4_findstate_by_file(rfs4_file_t *fp)
3211 3218 {
3212 3219 bool_t create = FALSE;
3213 3220 nfs4_srv_t *nsrv4 = nfs4_get_srv();
3214 3221
3215 3222 return ((rfs4_state_t *)rfs4_dbsearch(nsrv4->rfs4_state_file_idx, fp,
3216 3223 &create, fp, RFS4_DBS_VALID));
3217 3224 }
3218 3225
3219 3226 static bool_t
3220 3227 rfs4_state_expiry(rfs4_entry_t u_entry)
3221 3228 {
3222 3229 rfs4_state_t *sp = (rfs4_state_t *)u_entry;
3223 3230
3224 3231 if (rfs4_dbe_is_invalid(sp->rs_dbe))
3225 3232 return (TRUE);
3226 3233
3227 3234 if (sp->rs_closed == TRUE &&
3228 3235 ((gethrestime_sec() - rfs4_dbe_get_timerele(sp->rs_dbe))
3229 3236 > rfs4_lease_time))
3230 3237 return (TRUE);
3231 3238
3232 3239 return ((gethrestime_sec() - sp->rs_owner->ro_client->rc_last_access
3233 3240 > rfs4_lease_time));
3234 3241 }
3235 3242
3236 3243 static bool_t
3237 3244 rfs4_state_create(rfs4_entry_t u_entry, void *argp)
3238 3245 {
3239 3246 rfs4_state_t *sp = (rfs4_state_t *)u_entry;
3240 3247 rfs4_file_t *fp = ((rfs4_state_t *)argp)->rs_finfo;
3241 3248 rfs4_openowner_t *oo = ((rfs4_state_t *)argp)->rs_owner;
3242 3249
3243 3250 rfs4_dbe_hold(fp->rf_dbe);
3244 3251 rfs4_dbe_hold(oo->ro_dbe);
3245 3252 sp->rs_stateid = get_stateid(rfs4_dbe_getid(sp->rs_dbe));
3246 3253 sp->rs_stateid.bits.type = OPENID;
3247 3254 sp->rs_owner = oo;
3248 3255 sp->rs_finfo = fp;
3249 3256
3250 3257 list_create(&sp->rs_lostatelist, sizeof (rfs4_lo_state_t),
3251 3258 offsetof(rfs4_lo_state_t, rls_node));
3252 3259
3253 3260 /* Insert state on per open owner's list */
3254 3261 rfs4_dbe_lock(oo->ro_dbe);
3255 3262 list_insert_tail(&oo->ro_statelist, sp);
3256 3263 rfs4_dbe_unlock(oo->ro_dbe);
3257 3264
3258 3265 return (TRUE);
3259 3266 }
3260 3267
3261 3268 static rfs4_state_t *
3262 3269 rfs4_findstate(stateid_t *id, rfs4_dbsearch_type_t find_invalid, bool_t lock_fp)
3263 3270 {
3264 3271 rfs4_state_t *sp;
3265 3272 bool_t create = FALSE;
3266 3273 nfs4_srv_t *nsrv4 = nfs4_get_srv();
3267 3274
3268 3275 sp = (rfs4_state_t *)rfs4_dbsearch(nsrv4->rfs4_state_idx, id,
3269 3276 &create, NULL, find_invalid);
3270 3277 if (lock_fp == TRUE && sp != NULL)
3271 3278 rw_enter(&sp->rs_finfo->rf_file_rwlock, RW_READER);
3272 3279
3273 3280 return (sp);
3274 3281 }
3275 3282
3276 3283 void
3277 3284 rfs4_state_close(rfs4_state_t *sp, bool_t lock_held, bool_t close_of_client,
3278 3285 cred_t *cr)
3279 3286 {
3280 3287 /* Remove the associated lo_state owners */
3281 3288 if (!lock_held)
3282 3289 rfs4_dbe_lock(sp->rs_dbe);
3283 3290
3284 3291 /*
3285 3292 * If refcnt == 0, the dbe is about to be destroyed.
3286 3293 * lock state will be released by the reaper thread.
3287 3294 */
3288 3295
3289 3296 if (rfs4_dbe_refcnt(sp->rs_dbe) > 0) {
3290 3297 if (sp->rs_closed == FALSE) {
3291 3298 rfs4_release_share_lock_state(sp, cr, close_of_client);
3292 3299 sp->rs_closed = TRUE;
3293 3300 }
3294 3301 }
3295 3302
3296 3303 if (!lock_held)
3297 3304 rfs4_dbe_unlock(sp->rs_dbe);
3298 3305 }
3299 3306
3300 3307 /*
3301 3308 * Remove all state associated with the given client.
3302 3309 */
3303 3310 void
3304 3311 rfs4_client_state_remove(rfs4_client_t *cp)
3305 3312 {
3306 3313 rfs4_openowner_t *oo;
3307 3314
3308 3315 rfs4_dbe_lock(cp->rc_dbe);
3309 3316
3310 3317 for (oo = list_head(&cp->rc_openownerlist); oo != NULL;
3311 3318 oo = list_next(&cp->rc_openownerlist, oo)) {
3312 3319 rfs4_free_opens(oo, TRUE, TRUE);
3313 3320 }
3314 3321
3315 3322 rfs4_dbe_unlock(cp->rc_dbe);
3316 3323 }
3317 3324
3318 3325 void
3319 3326 rfs4_client_close(rfs4_client_t *cp)
3320 3327 {
3321 3328 /* Mark client as going away. */
3322 3329 rfs4_dbe_lock(cp->rc_dbe);
3323 3330 rfs4_dbe_invalidate(cp->rc_dbe);
3324 3331 rfs4_dbe_unlock(cp->rc_dbe);
3325 3332
3326 3333 rfs4_client_state_remove(cp);
3327 3334
3328 3335 /* Release the client */
3329 3336 rfs4_client_rele(cp);
3330 3337 }
3331 3338
3332 3339 nfsstat4
3333 3340 rfs4_check_clientid(clientid4 *cp, int setclid_confirm)
3334 3341 {
3335 3342 cid *cidp = (cid *) cp;
3336 3343 nfs4_srv_t *nsrv4;
3337 3344
3338 3345 nsrv4 = nfs4_get_srv();
3339 3346
3340 3347 /*
3341 3348 * If we are booted as a cluster node, check the embedded nodeid.
3342 3349 * If it indicates that this clientid was generated on another node,
3343 3350 * inform the client accordingly.
3344 3351 */
3345 3352 if (cluster_bootflags & CLUSTER_BOOTED && foreign_clientid(cidp))
3346 3353 return (NFS4ERR_STALE_CLIENTID);
3347 3354
3348 3355 /*
3349 3356 * If the server start time matches the time provided
3350 3357 * by the client (via the clientid) and this is NOT a
3351 3358 * setclientid_confirm then return EXPIRED.
3352 3359 */
3353 3360 if (!setclid_confirm &&
3354 3361 cidp->impl_id.start_time == nsrv4->rfs4_start_time)
3355 3362 return (NFS4ERR_EXPIRED);
3356 3363
3357 3364 return (NFS4ERR_STALE_CLIENTID);
3358 3365 }
3359 3366
3360 3367 /*
3361 3368 * This is used when a stateid has not been found amongst the
3362 3369 * current server's state. Check the stateid to see if it
3363 3370 * was from this server instantiation or not.
3364 3371 */
3365 3372 static nfsstat4
3366 3373 what_stateid_error(stateid_t *id, stateid_type_t type)
3367 3374 {
3368 3375 nfs4_srv_t *nsrv4;
3369 3376
3370 3377 nsrv4 = nfs4_get_srv();
3371 3378
3372 3379 /* If we are booted as a cluster node, was stateid locally generated? */
3373 3380 if ((cluster_bootflags & CLUSTER_BOOTED) && foreign_stateid(id))
3374 3381 return (NFS4ERR_STALE_STATEID);
3375 3382
3376 3383 /* If types don't match then no use checking further */
3377 3384 if (type != id->bits.type)
3378 3385 return (NFS4ERR_BAD_STATEID);
3379 3386
3380 3387 /* From a different server instantiation, return STALE */
3381 3388 if (id->bits.boottime != nsrv4->rfs4_start_time)
3382 3389 return (NFS4ERR_STALE_STATEID);
3383 3390
3384 3391 /*
3385 3392 * From this server but the state is most likely beyond lease
3386 3393 * timeout: return NFS4ERR_EXPIRED. However, there is the
3387 3394 * case of a delegation stateid. For delegations, there is a
3388 3395 * case where the state can be removed without the client's
3389 3396 * knowledge/consent: revocation. In the case of delegation
3390 3397 * revocation, the delegation state will be removed and will
3391 3398 * not be found. If the client does something like a
3392 3399 * DELEGRETURN or even a READ/WRITE with a delegatoin stateid
3393 3400 * that has been revoked, the server should return BAD_STATEID
3394 3401 * instead of the more common EXPIRED error.
3395 3402 */
3396 3403 if (id->bits.boottime == nsrv4->rfs4_start_time) {
3397 3404 if (type == DELEGID)
3398 3405 return (NFS4ERR_BAD_STATEID);
3399 3406 else
3400 3407 return (NFS4ERR_EXPIRED);
3401 3408 }
3402 3409
3403 3410 return (NFS4ERR_BAD_STATEID);
3404 3411 }
3405 3412
3406 3413 /*
3407 3414 * Used later on to find the various state structs. When called from
3408 3415 * rfs4_check_stateid()->rfs4_get_all_state(), no file struct lock is
3409 3416 * taken (it is not needed) and helps on the read/write path with
3410 3417 * respect to performance.
3411 3418 */
3412 3419 static nfsstat4
3413 3420 rfs4_get_state_lockit(stateid4 *stateid, rfs4_state_t **spp,
3414 3421 rfs4_dbsearch_type_t find_invalid, bool_t lock_fp)
3415 3422 {
3416 3423 stateid_t *id = (stateid_t *)stateid;
3417 3424 rfs4_state_t *sp;
3418 3425
3419 3426 *spp = NULL;
3420 3427
3421 3428 /* If we are booted as a cluster node, was stateid locally generated? */
3422 3429 if ((cluster_bootflags & CLUSTER_BOOTED) && foreign_stateid(id))
3423 3430 return (NFS4ERR_STALE_STATEID);
3424 3431
3425 3432 sp = rfs4_findstate(id, find_invalid, lock_fp);
3426 3433 if (sp == NULL) {
3427 3434 return (what_stateid_error(id, OPENID));
3428 3435 }
3429 3436
3430 3437 if (rfs4_lease_expired(sp->rs_owner->ro_client)) {
3431 3438 if (lock_fp == TRUE)
3432 3439 rfs4_state_rele(sp);
3433 3440 else
3434 3441 rfs4_state_rele_nounlock(sp);
3435 3442 return (NFS4ERR_EXPIRED);
3436 3443 }
3437 3444
3438 3445 *spp = sp;
3439 3446
3440 3447 return (NFS4_OK);
3441 3448 }
3442 3449
3443 3450 nfsstat4
3444 3451 rfs4_get_state(stateid4 *stateid, rfs4_state_t **spp,
3445 3452 rfs4_dbsearch_type_t find_invalid)
3446 3453 {
3447 3454 return (rfs4_get_state_lockit(stateid, spp, find_invalid, TRUE));
3448 3455 }
3449 3456
3450 3457 int
3451 3458 rfs4_check_stateid_seqid(rfs4_state_t *sp, stateid4 *stateid)
3452 3459 {
3453 3460 stateid_t *id = (stateid_t *)stateid;
3454 3461
3455 3462 if (rfs4_lease_expired(sp->rs_owner->ro_client))
3456 3463 return (NFS4_CHECK_STATEID_EXPIRED);
3457 3464
3458 3465 /* Stateid is some time in the future - that's bad */
3459 3466 if (sp->rs_stateid.bits.chgseq < id->bits.chgseq)
3460 3467 return (NFS4_CHECK_STATEID_BAD);
3461 3468
3462 3469 if (sp->rs_stateid.bits.chgseq == id->bits.chgseq + 1)
3463 3470 return (NFS4_CHECK_STATEID_REPLAY);
3464 3471
3465 3472 /* Stateid is some time in the past - that's old */
3466 3473 if (sp->rs_stateid.bits.chgseq > id->bits.chgseq)
3467 3474 return (NFS4_CHECK_STATEID_OLD);
3468 3475
3469 3476 /* Caller needs to know about confirmation before closure */
3470 3477 if (sp->rs_owner->ro_need_confirm)
3471 3478 return (NFS4_CHECK_STATEID_UNCONFIRMED);
3472 3479
3473 3480 if (sp->rs_closed == TRUE)
3474 3481 return (NFS4_CHECK_STATEID_CLOSED);
3475 3482
3476 3483 return (NFS4_CHECK_STATEID_OKAY);
3477 3484 }
3478 3485
3479 3486 int
3480 3487 rfs4_check_lo_stateid_seqid(rfs4_lo_state_t *lsp, stateid4 *stateid)
3481 3488 {
3482 3489 stateid_t *id = (stateid_t *)stateid;
3483 3490
3484 3491 if (rfs4_lease_expired(lsp->rls_state->rs_owner->ro_client))
3485 3492 return (NFS4_CHECK_STATEID_EXPIRED);
3486 3493
3487 3494 /* Stateid is some time in the future - that's bad */
3488 3495 if (lsp->rls_lockid.bits.chgseq < id->bits.chgseq)
3489 3496 return (NFS4_CHECK_STATEID_BAD);
3490 3497
3491 3498 if (lsp->rls_lockid.bits.chgseq == id->bits.chgseq + 1)
3492 3499 return (NFS4_CHECK_STATEID_REPLAY);
3493 3500
3494 3501 /* Stateid is some time in the past - that's old */
3495 3502 if (lsp->rls_lockid.bits.chgseq > id->bits.chgseq)
3496 3503 return (NFS4_CHECK_STATEID_OLD);
3497 3504
3498 3505 if (lsp->rls_state->rs_closed == TRUE)
3499 3506 return (NFS4_CHECK_STATEID_CLOSED);
3500 3507
3501 3508 return (NFS4_CHECK_STATEID_OKAY);
3502 3509 }
3503 3510
3504 3511 nfsstat4
3505 3512 rfs4_get_deleg_state(stateid4 *stateid, rfs4_deleg_state_t **dspp)
3506 3513 {
3507 3514 stateid_t *id = (stateid_t *)stateid;
3508 3515 rfs4_deleg_state_t *dsp;
3509 3516
3510 3517 *dspp = NULL;
3511 3518
3512 3519 /* If we are booted as a cluster node, was stateid locally generated? */
3513 3520 if ((cluster_bootflags & CLUSTER_BOOTED) && foreign_stateid(id))
3514 3521 return (NFS4ERR_STALE_STATEID);
3515 3522
3516 3523 dsp = rfs4_finddelegstate(id);
3517 3524 if (dsp == NULL) {
3518 3525 return (what_stateid_error(id, DELEGID));
3519 3526 }
3520 3527
3521 3528 if (rfs4_lease_expired(dsp->rds_client)) {
3522 3529 rfs4_deleg_state_rele(dsp);
3523 3530 return (NFS4ERR_EXPIRED);
3524 3531 }
3525 3532
3526 3533 *dspp = dsp;
3527 3534
3528 3535 return (NFS4_OK);
3529 3536 }
3530 3537
3531 3538 nfsstat4
3532 3539 rfs4_get_lo_state(stateid4 *stateid, rfs4_lo_state_t **lspp, bool_t lock_fp)
3533 3540 {
3534 3541 stateid_t *id = (stateid_t *)stateid;
3535 3542 rfs4_lo_state_t *lsp;
3536 3543
3537 3544 *lspp = NULL;
3538 3545
3539 3546 /* If we are booted as a cluster node, was stateid locally generated? */
3540 3547 if ((cluster_bootflags & CLUSTER_BOOTED) && foreign_stateid(id))
3541 3548 return (NFS4ERR_STALE_STATEID);
3542 3549
3543 3550 lsp = rfs4_findlo_state(id, lock_fp);
3544 3551 if (lsp == NULL) {
3545 3552 return (what_stateid_error(id, LOCKID));
3546 3553 }
3547 3554
3548 3555 if (rfs4_lease_expired(lsp->rls_state->rs_owner->ro_client)) {
3549 3556 rfs4_lo_state_rele(lsp, lock_fp);
3550 3557 return (NFS4ERR_EXPIRED);
3551 3558 }
3552 3559
3553 3560 *lspp = lsp;
3554 3561
3555 3562 return (NFS4_OK);
3556 3563 }
3557 3564
3558 3565 static nfsstat4
3559 3566 rfs4_get_all_state(stateid4 *sid, rfs4_state_t **spp,
3560 3567 rfs4_deleg_state_t **dspp, rfs4_lo_state_t **lspp)
3561 3568 {
3562 3569 rfs4_state_t *sp = NULL;
3563 3570 rfs4_deleg_state_t *dsp = NULL;
3564 3571 rfs4_lo_state_t *lsp = NULL;
3565 3572 stateid_t *id;
3566 3573 nfsstat4 status;
3567 3574
3568 3575 *spp = NULL; *dspp = NULL; *lspp = NULL;
3569 3576
3570 3577 id = (stateid_t *)sid;
3571 3578 switch (id->bits.type) {
3572 3579 case OPENID:
3573 3580 status = rfs4_get_state_lockit(sid, &sp, FALSE, FALSE);
3574 3581 break;
3575 3582 case DELEGID:
3576 3583 status = rfs4_get_deleg_state(sid, &dsp);
3577 3584 break;
3578 3585 case LOCKID:
3579 3586 status = rfs4_get_lo_state(sid, &lsp, FALSE);
3580 3587 if (status == NFS4_OK) {
3581 3588 sp = lsp->rls_state;
3582 3589 rfs4_dbe_hold(sp->rs_dbe);
3583 3590 }
3584 3591 break;
3585 3592 default:
3586 3593 status = NFS4ERR_BAD_STATEID;
3587 3594 }
3588 3595
3589 3596 if (status == NFS4_OK) {
3590 3597 *spp = sp;
3591 3598 *dspp = dsp;
3592 3599 *lspp = lsp;
3593 3600 }
3594 3601
3595 3602 return (status);
3596 3603 }
3597 3604
3598 3605 /*
3599 3606 * Given the I/O mode (FREAD or FWRITE), this checks whether the
3600 3607 * rfs4_state_t struct has access to do this operation and if so
3601 3608 * return NFS4_OK; otherwise the proper NFSv4 error is returned.
3602 3609 */
3603 3610 nfsstat4
3604 3611 rfs4_state_has_access(rfs4_state_t *sp, int mode, vnode_t *vp)
3605 3612 {
3606 3613 nfsstat4 stat = NFS4_OK;
3607 3614 rfs4_file_t *fp;
3608 3615 bool_t create = FALSE;
3609 3616
3610 3617 rfs4_dbe_lock(sp->rs_dbe);
3611 3618 if (mode == FWRITE) {
3612 3619 if (!(sp->rs_share_access & OPEN4_SHARE_ACCESS_WRITE)) {
3613 3620 stat = NFS4ERR_OPENMODE;
3614 3621 }
3615 3622 } else if (mode == FREAD) {
3616 3623 if (!(sp->rs_share_access & OPEN4_SHARE_ACCESS_READ)) {
3617 3624 /*
3618 3625 * If we have OPENed the file with DENYing access
3619 3626 * to both READ and WRITE then no one else could
3620 3627 * have OPENed the file, hence no conflicting READ
3621 3628 * deny. This check is merely an optimization.
3622 3629 */
3623 3630 if (sp->rs_share_deny == OPEN4_SHARE_DENY_BOTH)
3624 3631 goto out;
3625 3632
3626 3633 /* Check against file struct's DENY mode */
3627 3634 fp = rfs4_findfile(vp, NULL, &create);
3628 3635 if (fp != NULL) {
3629 3636 int deny_read = 0;
3630 3637 rfs4_dbe_lock(fp->rf_dbe);
3631 3638 /*
3632 3639 * Check if any other open owner has the file
3633 3640 * OPENed with deny READ.
3634 3641 */
3635 3642 if (sp->rs_share_deny & OPEN4_SHARE_DENY_READ)
3636 3643 deny_read = 1;
3637 3644 ASSERT(fp->rf_deny_read >= deny_read);
3638 3645 if (fp->rf_deny_read > deny_read)
3639 3646 stat = NFS4ERR_OPENMODE;
3640 3647 rfs4_dbe_unlock(fp->rf_dbe);
3641 3648 rfs4_file_rele(fp);
3642 3649 }
3643 3650 }
3644 3651 } else {
3645 3652 /* Illegal I/O mode */
3646 3653 stat = NFS4ERR_INVAL;
3647 3654 }
3648 3655 out:
3649 3656 rfs4_dbe_unlock(sp->rs_dbe);
3650 3657 return (stat);
3651 3658 }
3652 3659
3653 3660 /*
3654 3661 * Given the I/O mode (FREAD or FWRITE), the vnode, the stateid and whether
3655 3662 * the file is being truncated, return NFS4_OK if allowed or appropriate
3656 3663 * V4 error if not. Note NFS4ERR_DELAY will be returned and a recall on
3657 3664 * the associated file will be done if the I/O is not consistent with any
3658 3665 * delegation in effect on the file. Should be holding VOP_RWLOCK, either
3659 3666 * as reader or writer as appropriate. rfs4_op_open will acquire the
3660 3667 * VOP_RWLOCK as writer when setting up delegation. If the stateid is bad
3661 3668 * this routine will return NFS4ERR_BAD_STATEID. In addition, through the
3662 3669 * deleg parameter, we will return whether a write delegation is held by
3663 3670 * the client associated with this stateid.
3664 3671 * If the server instance associated with the relevant client is in its
3665 3672 * grace period, return NFS4ERR_GRACE.
3666 3673 */
3667 3674
3668 3675 nfsstat4
3669 3676 rfs4_check_stateid(int mode, vnode_t *vp,
3670 3677 stateid4 *stateid, bool_t trunc, bool_t *deleg,
3671 3678 bool_t do_access, caller_context_t *ct)
3672 3679 {
3673 3680 rfs4_file_t *fp;
3674 3681 bool_t create = FALSE;
3675 3682 rfs4_state_t *sp;
3676 3683 rfs4_deleg_state_t *dsp;
3677 3684 rfs4_lo_state_t *lsp;
3678 3685 stateid_t *id = (stateid_t *)stateid;
3679 3686 nfsstat4 stat = NFS4_OK;
3680 3687
3681 3688 if (ct != NULL) {
3682 3689 ct->cc_sysid = 0;
3683 3690 ct->cc_pid = 0;
3684 3691 ct->cc_caller_id = nfs4_srv_caller_id;
3685 3692 ct->cc_flags = CC_DONTBLOCK;
3686 3693 }
3687 3694
3688 3695 if (ISSPECIAL(stateid)) {
3689 3696 fp = rfs4_findfile(vp, NULL, &create);
3690 3697 if (fp == NULL)
3691 3698 return (NFS4_OK);
3692 3699 if (fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_NONE) {
3693 3700 rfs4_file_rele(fp);
3694 3701 return (NFS4_OK);
3695 3702 }
3696 3703 if (mode == FWRITE ||
3697 3704 fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_WRITE) {
3698 3705 rfs4_recall_deleg(fp, trunc, NULL);
3699 3706 rfs4_file_rele(fp);
3700 3707 return (NFS4ERR_DELAY);
3701 3708 }
3702 3709 rfs4_file_rele(fp);
3703 3710 return (NFS4_OK);
3704 3711 } else {
3705 3712 stat = rfs4_get_all_state(stateid, &sp, &dsp, &lsp);
3706 3713 if (stat != NFS4_OK)
3707 3714 return (stat);
3708 3715 if (lsp != NULL) {
3709 3716 /* Is associated server instance in its grace period? */
3710 3717 if (rfs4_clnt_in_grace(lsp->rls_locker->rl_client)) {
3711 3718 rfs4_lo_state_rele(lsp, FALSE);
3712 3719 if (sp != NULL)
3713 3720 rfs4_state_rele_nounlock(sp);
3714 3721 return (NFS4ERR_GRACE);
3715 3722 }
3716 3723 if (id->bits.type == LOCKID) {
3717 3724 /* Seqid in the future? - that's bad */
3718 3725 if (lsp->rls_lockid.bits.chgseq <
3719 3726 id->bits.chgseq) {
3720 3727 rfs4_lo_state_rele(lsp, FALSE);
3721 3728 if (sp != NULL)
3722 3729 rfs4_state_rele_nounlock(sp);
3723 3730 return (NFS4ERR_BAD_STATEID);
3724 3731 }
3725 3732 /* Seqid in the past? - that's old */
3726 3733 if (lsp->rls_lockid.bits.chgseq >
3727 3734 id->bits.chgseq) {
3728 3735 rfs4_lo_state_rele(lsp, FALSE);
3729 3736 if (sp != NULL)
3730 3737 rfs4_state_rele_nounlock(sp);
3731 3738 return (NFS4ERR_OLD_STATEID);
3732 3739 }
3733 3740 /* Ensure specified filehandle matches */
3734 3741 if (lsp->rls_state->rs_finfo->rf_vp != vp) {
3735 3742 rfs4_lo_state_rele(lsp, FALSE);
3736 3743 if (sp != NULL)
3737 3744 rfs4_state_rele_nounlock(sp);
3738 3745 return (NFS4ERR_BAD_STATEID);
3739 3746 }
3740 3747 }
3741 3748 if (ct != NULL) {
3742 3749 ct->cc_sysid =
3743 3750 lsp->rls_locker->rl_client->rc_sysidt;
3744 3751 ct->cc_pid = lsp->rls_locker->rl_pid;
3745 3752 }
3746 3753 rfs4_lo_state_rele(lsp, FALSE);
3747 3754 }
3748 3755
3749 3756 /* Stateid provided was an "open" stateid */
3750 3757 if (sp != NULL) {
3751 3758 /* Is associated server instance in its grace period? */
3752 3759 if (rfs4_clnt_in_grace(sp->rs_owner->ro_client)) {
3753 3760 rfs4_state_rele_nounlock(sp);
3754 3761 return (NFS4ERR_GRACE);
3755 3762 }
3756 3763 if (id->bits.type == OPENID) {
3757 3764 /* Seqid in the future? - that's bad */
3758 3765 if (sp->rs_stateid.bits.chgseq <
3759 3766 id->bits.chgseq) {
3760 3767 rfs4_state_rele_nounlock(sp);
3761 3768 return (NFS4ERR_BAD_STATEID);
3762 3769 }
3763 3770 /* Seqid in the past - that's old */
3764 3771 if (sp->rs_stateid.bits.chgseq >
3765 3772 id->bits.chgseq) {
3766 3773 rfs4_state_rele_nounlock(sp);
3767 3774 return (NFS4ERR_OLD_STATEID);
3768 3775 }
3769 3776 }
3770 3777 /* Ensure specified filehandle matches */
3771 3778 if (sp->rs_finfo->rf_vp != vp) {
3772 3779 rfs4_state_rele_nounlock(sp);
3773 3780 return (NFS4ERR_BAD_STATEID);
3774 3781 }
3775 3782
3776 3783 if (sp->rs_owner->ro_need_confirm) {
3777 3784 rfs4_state_rele_nounlock(sp);
3778 3785 return (NFS4ERR_BAD_STATEID);
3779 3786 }
3780 3787
3781 3788 if (sp->rs_closed == TRUE) {
3782 3789 rfs4_state_rele_nounlock(sp);
3783 3790 return (NFS4ERR_OLD_STATEID);
3784 3791 }
3785 3792
3786 3793 if (do_access)
3787 3794 stat = rfs4_state_has_access(sp, mode, vp);
3788 3795 else
3789 3796 stat = NFS4_OK;
3790 3797
3791 3798 /*
3792 3799 * Return whether this state has write
3793 3800 * delegation if desired
3794 3801 */
3795 3802 if (deleg && (sp->rs_finfo->rf_dinfo.rd_dtype ==
3796 3803 OPEN_DELEGATE_WRITE))
3797 3804 *deleg = TRUE;
3798 3805
3799 3806 /*
3800 3807 * We got a valid stateid, so we update the
3801 3808 * lease on the client. Ideally we would like
3802 3809 * to do this after the calling op succeeds,
3803 3810 * but for now this will be good
3804 3811 * enough. Callers of this routine are
3805 3812 * currently insulated from the state stuff.
3806 3813 */
3807 3814 rfs4_update_lease(sp->rs_owner->ro_client);
3808 3815
3809 3816 /*
3810 3817 * If a delegation is present on this file and
3811 3818 * this is a WRITE, then update the lastwrite
3812 3819 * time to indicate that activity is present.
3813 3820 */
3814 3821 if (sp->rs_finfo->rf_dinfo.rd_dtype ==
3815 3822 OPEN_DELEGATE_WRITE &&
3816 3823 mode == FWRITE) {
3817 3824 sp->rs_finfo->rf_dinfo.rd_time_lastwrite =
3818 3825 gethrestime_sec();
3819 3826 }
3820 3827
3821 3828 rfs4_state_rele_nounlock(sp);
3822 3829
3823 3830 return (stat);
3824 3831 }
3825 3832
3826 3833 if (dsp != NULL) {
3827 3834 /* Is associated server instance in its grace period? */
3828 3835 if (rfs4_clnt_in_grace(dsp->rds_client)) {
3829 3836 rfs4_deleg_state_rele(dsp);
3830 3837 return (NFS4ERR_GRACE);
3831 3838 }
3832 3839 if (dsp->rds_delegid.bits.chgseq != id->bits.chgseq) {
3833 3840 rfs4_deleg_state_rele(dsp);
3834 3841 return (NFS4ERR_BAD_STATEID);
3835 3842 }
3836 3843
3837 3844 /* Ensure specified filehandle matches */
3838 3845 if (dsp->rds_finfo->rf_vp != vp) {
3839 3846 rfs4_deleg_state_rele(dsp);
3840 3847 return (NFS4ERR_BAD_STATEID);
3841 3848 }
3842 3849 /*
3843 3850 * Return whether this state has write
3844 3851 * delegation if desired
3845 3852 */
3846 3853 if (deleg && (dsp->rds_finfo->rf_dinfo.rd_dtype ==
3847 3854 OPEN_DELEGATE_WRITE))
3848 3855 *deleg = TRUE;
3849 3856
3850 3857 rfs4_update_lease(dsp->rds_client);
3851 3858
3852 3859 /*
3853 3860 * If a delegation is present on this file and
3854 3861 * this is a WRITE, then update the lastwrite
3855 3862 * time to indicate that activity is present.
3856 3863 */
3857 3864 if (dsp->rds_finfo->rf_dinfo.rd_dtype ==
3858 3865 OPEN_DELEGATE_WRITE && mode == FWRITE) {
3859 3866 dsp->rds_finfo->rf_dinfo.rd_time_lastwrite =
3860 3867 gethrestime_sec();
3861 3868 }
3862 3869
3863 3870 /*
3864 3871 * XXX - what happens if this is a WRITE and the
3865 3872 * delegation type of for READ.
3866 3873 */
3867 3874 rfs4_deleg_state_rele(dsp);
3868 3875
3869 3876 return (stat);
3870 3877 }
3871 3878 /*
3872 3879 * If we got this far, something bad happened
3873 3880 */
3874 3881 return (NFS4ERR_BAD_STATEID);
3875 3882 }
3876 3883 }
3877 3884
3878 3885
3879 3886 /*
3880 3887 * This is a special function in that for the file struct provided the
3881 3888 * server wants to remove/close all current state associated with the
3882 3889 * file. The prime use of this would be with OP_REMOVE to force the
3883 3890 * release of state and particularly of file locks.
3884 3891 *
3885 3892 * There is an assumption that there is no delegations outstanding on
3886 3893 * this file at this point. The caller should have waited for those
3887 3894 * to be returned or revoked.
3888 3895 */
3889 3896 void
3890 3897 rfs4_close_all_state(rfs4_file_t *fp)
3891 3898 {
3892 3899 rfs4_state_t *sp;
3893 3900
3894 3901 rfs4_dbe_lock(fp->rf_dbe);
3895 3902
3896 3903 #ifdef DEBUG
3897 3904 /* only applies when server is handing out delegations */
3898 3905 if (nfs4_get_deleg_policy() != SRV_NEVER_DELEGATE)
3899 3906 ASSERT(fp->rf_dinfo.rd_hold_grant > 0);
3900 3907 #endif
3901 3908
3902 3909 /* No delegations for this file */
3903 3910 ASSERT(list_is_empty(&fp->rf_delegstatelist));
3904 3911
3905 3912 /* Make sure that it can not be found */
3906 3913 rfs4_dbe_invalidate(fp->rf_dbe);
3907 3914
3908 3915 if (fp->rf_vp == NULL) {
3909 3916 rfs4_dbe_unlock(fp->rf_dbe);
3910 3917 return;
3911 3918 }
3912 3919 rfs4_dbe_unlock(fp->rf_dbe);
3913 3920
3914 3921 /*
3915 3922 * Hold as writer to prevent other server threads from
3916 3923 * processing requests related to the file while all state is
3917 3924 * being removed.
3918 3925 */
3919 3926 rw_enter(&fp->rf_file_rwlock, RW_WRITER);
3920 3927
3921 3928 /* Remove ALL state from the file */
3922 3929 while (sp = rfs4_findstate_by_file(fp)) {
3923 3930 rfs4_state_close(sp, FALSE, FALSE, CRED());
3924 3931 rfs4_state_rele_nounlock(sp);
3925 3932 }
3926 3933
3927 3934 /*
3928 3935 * This is only safe since there are no further references to
3929 3936 * the file.
3930 3937 */
3931 3938 rfs4_dbe_lock(fp->rf_dbe);
3932 3939 if (fp->rf_vp) {
3933 3940 vnode_t *vp = fp->rf_vp;
3934 3941
3935 3942 mutex_enter(&vp->v_vsd_lock);
3936 3943 (void) vsd_set(vp, nfs4_srv_vkey, NULL);
3937 3944 mutex_exit(&vp->v_vsd_lock);
3938 3945 VN_RELE(vp);
3939 3946 fp->rf_vp = NULL;
3940 3947 }
3941 3948 rfs4_dbe_unlock(fp->rf_dbe);
3942 3949
3943 3950 /* Finally let other references to proceed */
3944 3951 rw_exit(&fp->rf_file_rwlock);
3945 3952 }
3946 3953
3947 3954 /*
3948 3955 * This function is used as a target for the rfs4_dbe_walk() call
3949 3956 * below. The purpose of this function is to see if the
3950 3957 * lockowner_state refers to a file that resides within the exportinfo
3951 3958 * export. If so, then remove the lock_owner state (file locks and
3952 3959 * share "locks") for this object since the intent is the server is
3953 3960 * unexporting the specified directory. Be sure to invalidate the
3954 3961 * object after the state has been released
3955 3962 */
3956 3963 static void
3957 3964 rfs4_lo_state_walk_callout(rfs4_entry_t u_entry, void *e)
3958 3965 {
3959 3966 rfs4_lo_state_t *lsp = (rfs4_lo_state_t *)u_entry;
3960 3967 struct exportinfo *exi = (struct exportinfo *)e;
3961 3968 nfs_fh4_fmt_t fhfmt4, *exi_fhp, *finfo_fhp;
3962 3969 fhandle_t *efhp;
3963 3970
3964 3971 efhp = (fhandle_t *)&exi->exi_fh;
3965 3972 exi_fhp = (nfs_fh4_fmt_t *)&fhfmt4;
3966 3973
3967 3974 FH_TO_FMT4(efhp, exi_fhp);
3968 3975
3969 3976 finfo_fhp = (nfs_fh4_fmt_t *)lsp->rls_state->rs_finfo->
3970 3977 rf_filehandle.nfs_fh4_val;
3971 3978
3972 3979 if (EQFSID(&finfo_fhp->fh4_fsid, &exi_fhp->fh4_fsid) &&
3973 3980 bcmp(&finfo_fhp->fh4_xdata, &exi_fhp->fh4_xdata,
3974 3981 exi_fhp->fh4_xlen) == 0) {
3975 3982 rfs4_state_close(lsp->rls_state, FALSE, FALSE, CRED());
3976 3983 rfs4_dbe_invalidate(lsp->rls_dbe);
3977 3984 rfs4_dbe_invalidate(lsp->rls_state->rs_dbe);
3978 3985 }
3979 3986 }
3980 3987
3981 3988 /*
3982 3989 * This function is used as a target for the rfs4_dbe_walk() call
3983 3990 * below. The purpose of this function is to see if the state refers
3984 3991 * to a file that resides within the exportinfo export. If so, then
3985 3992 * remove the open state for this object since the intent is the
3986 3993 * server is unexporting the specified directory. The main result for
3987 3994 * this type of entry is to invalidate it such it will not be found in
3988 3995 * the future.
3989 3996 */
3990 3997 static void
3991 3998 rfs4_state_walk_callout(rfs4_entry_t u_entry, void *e)
3992 3999 {
3993 4000 rfs4_state_t *sp = (rfs4_state_t *)u_entry;
3994 4001 struct exportinfo *exi = (struct exportinfo *)e;
3995 4002 nfs_fh4_fmt_t fhfmt4, *exi_fhp, *finfo_fhp;
3996 4003 fhandle_t *efhp;
3997 4004
3998 4005 efhp = (fhandle_t *)&exi->exi_fh;
3999 4006 exi_fhp = (nfs_fh4_fmt_t *)&fhfmt4;
4000 4007
4001 4008 FH_TO_FMT4(efhp, exi_fhp);
4002 4009
4003 4010 finfo_fhp =
4004 4011 (nfs_fh4_fmt_t *)sp->rs_finfo->rf_filehandle.nfs_fh4_val;
4005 4012
4006 4013 if (EQFSID(&finfo_fhp->fh4_fsid, &exi_fhp->fh4_fsid) &&
4007 4014 bcmp(&finfo_fhp->fh4_xdata, &exi_fhp->fh4_xdata,
4008 4015 exi_fhp->fh4_xlen) == 0) {
4009 4016 rfs4_state_close(sp, TRUE, FALSE, CRED());
4010 4017 rfs4_dbe_invalidate(sp->rs_dbe);
4011 4018 }
4012 4019 }
4013 4020
4014 4021 /*
4015 4022 * This function is used as a target for the rfs4_dbe_walk() call
4016 4023 * below. The purpose of this function is to see if the state refers
4017 4024 * to a file that resides within the exportinfo export. If so, then
4018 4025 * remove the deleg state for this object since the intent is the
4019 4026 * server is unexporting the specified directory. The main result for
4020 4027 * this type of entry is to invalidate it such it will not be found in
4021 4028 * the future.
4022 4029 */
4023 4030 static void
4024 4031 rfs4_deleg_state_walk_callout(rfs4_entry_t u_entry, void *e)
4025 4032 {
4026 4033 rfs4_deleg_state_t *dsp = (rfs4_deleg_state_t *)u_entry;
4027 4034 struct exportinfo *exi = (struct exportinfo *)e;
4028 4035 nfs_fh4_fmt_t fhfmt4, *exi_fhp, *finfo_fhp;
4029 4036 fhandle_t *efhp;
4030 4037
4031 4038 efhp = (fhandle_t *)&exi->exi_fh;
4032 4039 exi_fhp = (nfs_fh4_fmt_t *)&fhfmt4;
4033 4040
4034 4041 FH_TO_FMT4(efhp, exi_fhp);
4035 4042
4036 4043 finfo_fhp =
4037 4044 (nfs_fh4_fmt_t *)dsp->rds_finfo->rf_filehandle.nfs_fh4_val;
4038 4045
4039 4046 if (EQFSID(&finfo_fhp->fh4_fsid, &exi_fhp->fh4_fsid) &&
4040 4047 bcmp(&finfo_fhp->fh4_xdata, &exi_fhp->fh4_xdata,
4041 4048 exi_fhp->fh4_xlen) == 0) {
4042 4049 rfs4_dbe_invalidate(dsp->rds_dbe);
4043 4050 }
4044 4051 }
4045 4052
4046 4053 /*
4047 4054 * This function is used as a target for the rfs4_dbe_walk() call
4048 4055 * below. The purpose of this function is to see if the state refers
4049 4056 * to a file that resides within the exportinfo export. If so, then
4050 4057 * release vnode hold for this object since the intent is the server
4051 4058 * is unexporting the specified directory. Invalidation will prevent
4052 4059 * this struct from being found in the future.
4053 4060 */
4054 4061 static void
4055 4062 rfs4_file_walk_callout(rfs4_entry_t u_entry, void *e)
4056 4063 {
4057 4064 rfs4_file_t *fp = (rfs4_file_t *)u_entry;
4058 4065 struct exportinfo *exi = (struct exportinfo *)e;
4059 4066 nfs_fh4_fmt_t fhfmt4, *exi_fhp, *finfo_fhp;
4060 4067 fhandle_t *efhp;
4061 4068
4062 4069 efhp = (fhandle_t *)&exi->exi_fh;
4063 4070 exi_fhp = (nfs_fh4_fmt_t *)&fhfmt4;
4064 4071
4065 4072 FH_TO_FMT4(efhp, exi_fhp);
4066 4073
4067 4074 finfo_fhp = (nfs_fh4_fmt_t *)fp->rf_filehandle.nfs_fh4_val;
4068 4075
4069 4076 if (EQFSID(&finfo_fhp->fh4_fsid, &exi_fhp->fh4_fsid) &&
4070 4077 bcmp(&finfo_fhp->fh4_xdata, &exi_fhp->fh4_xdata,
4071 4078 exi_fhp->fh4_xlen) == 0) {
4072 4079 if (fp->rf_vp) {
4073 4080 vnode_t *vp = fp->rf_vp;
4074 4081
4075 4082 /*
4076 4083 * don't leak monitors and remove the reference
4077 4084 * put on the vnode when the delegation was granted.
4078 4085 */
4079 4086 if (fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_READ) {
4080 4087 (void) fem_uninstall(vp, deleg_rdops,
4081 4088 (void *)fp);
4082 4089 vn_open_downgrade(vp, FREAD);
4083 4090 } else if (fp->rf_dinfo.rd_dtype ==
4084 4091 OPEN_DELEGATE_WRITE) {
4085 4092 (void) fem_uninstall(vp, deleg_wrops,
4086 4093 (void *)fp);
4087 4094 vn_open_downgrade(vp, FREAD|FWRITE);
4088 4095 }
4089 4096 mutex_enter(&vp->v_vsd_lock);
4090 4097 (void) vsd_set(vp, nfs4_srv_vkey, NULL);
4091 4098 mutex_exit(&vp->v_vsd_lock);
4092 4099 VN_RELE(vp);
4093 4100 fp->rf_vp = NULL;
4094 4101 }
4095 4102 rfs4_dbe_invalidate(fp->rf_dbe);
4096 4103 }
4097 4104 }
4098 4105
4099 4106 /*
4100 4107 * Given a directory that is being unexported, cleanup/release all
4101 4108 * state in the server that refers to objects residing underneath this
4102 4109 * particular export. The ordering of the release is important.
4103 4110 * Lock_owner, then state and then file.
4104 4111 *
4105 4112 * NFS zones note: nfs_export.c:unexport() calls this from a
4106 4113 * thread in the global zone for NGZ data structures, so we
4107 4114 * CANNOT use zone_getspecific anywhere in this code path.
4108 4115 */
4109 4116 void
4110 4117 rfs4_clean_state_exi(nfs_export_t *ne, struct exportinfo *exi)
4111 4118 {
4112 4119 nfs_globals_t *ng;
4113 4120 nfs4_srv_t *nsrv4;
4114 4121
4115 4122 ng = ne->ne_globals;
|
↓ open down ↓ |
2566 lines elided |
↑ open up ↑ |
4116 4123 ASSERT(ng->nfs_zoneid == exi->exi_zoneid);
4117 4124 nsrv4 = ng->nfs4_srv;
4118 4125
4119 4126 mutex_enter(&nsrv4->state_lock);
4120 4127
4121 4128 if (nsrv4->nfs4_server_state == NULL) {
4122 4129 mutex_exit(&nsrv4->state_lock);
4123 4130 return;
4124 4131 }
4125 4132
4126 - /* CSTYLED */
4127 - rfs4_dbe_walk(nsrv4->rfs4_lo_state_tab, rfs4_lo_state_walk_callout, exi);
4133 + rfs4_dbe_walk(nsrv4->rfs4_lo_state_tab,
4134 + rfs4_lo_state_walk_callout, exi);
4128 4135 rfs4_dbe_walk(nsrv4->rfs4_state_tab, rfs4_state_walk_callout, exi);
4129 - /* CSTYLED */
4130 - rfs4_dbe_walk(nsrv4->rfs4_deleg_state_tab, rfs4_deleg_state_walk_callout, exi);
4136 + rfs4_dbe_walk(nsrv4->rfs4_deleg_state_tab,
4137 + rfs4_deleg_state_walk_callout, exi);
4131 4138 rfs4_dbe_walk(nsrv4->rfs4_file_tab, rfs4_file_walk_callout, exi);
4132 4139
4133 4140 mutex_exit(&nsrv4->state_lock);
4134 4141 }
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX