1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright 2013 Nexenta Systems, Inc. All rights reserved.
24 */
25
26 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
27 /* All Rights Reserved */
28
29 /*
30 * Portions of this source code were derived from Berkeley 4.3 BSD
31 * under license from the Regents of the University of California.
32 */
33
34 #ifndef _NFS4_CLNT_H
35 #define _NFS4_CLNT_H
36
37 #include <sys/errno.h>
38 #include <sys/types.h>
39 #include <sys/kstat.h>
40 #include <sys/time.h>
41 #include <sys/flock.h>
42 #include <vm/page.h>
43 #include <nfs/nfs4_kprot.h>
44 #include <nfs/nfs4.h>
45 #include <nfs/rnode.h>
46 #include <sys/avl.h>
47 #include <sys/list.h>
48 #include <rpc/auth.h>
49 #include <sys/door.h>
50 #include <sys/condvar_impl.h>
51 #include <sys/zone.h>
52
53 #ifdef __cplusplus
54 extern "C" {
55 #endif
56
57 #define NFS4_SIZE_OK(size) ((size) <= MAXOFFSET_T)
58
59 /* Four states of nfs4_server's lease_valid */
60 #define NFS4_LEASE_INVALID 0
61 #define NFS4_LEASE_VALID 1
62 #define NFS4_LEASE_UNINITIALIZED 2
63 #define NFS4_LEASE_NOT_STARTED 3
64
65 /* flag to tell the renew thread it should exit */
66 #define NFS4_THREAD_EXIT 1
67
68 /* Default number of seconds to wait on GRACE and DELAY errors */
69 #define NFS4ERR_DELAY_TIME 10
70
71 /* Number of hash buckets for open owners for each nfs4_server */
72 #define NFS4_NUM_OO_BUCKETS 53
73
74 /* Number of freed open owners (per mntinfo4_t) to keep around */
75 #define NFS4_NUM_FREED_OPEN_OWNERS 8
76
77 /* Number of seconds to wait before retrying a SETCLIENTID(_CONFIRM) op */
78 #define NFS4_RETRY_SCLID_DELAY 10
79
80 /* Number of times we should retry a SETCLIENTID(_CONFIRM) op */
81 #define NFS4_NUM_SCLID_RETRIES 3
82
83 /* Number of times we should retry on open after getting NFS4ERR_BAD_SEQID */
84 #define NFS4_NUM_RETRY_BAD_SEQID 3
85
86 /*
87 * Macro to wakeup sleeping async worker threads.
88 */
89 #define NFS4_WAKE_ASYNC_WORKER(work_cv) { \
90 if (CV_HAS_WAITERS(&work_cv[NFS4_ASYNC_QUEUE])) \
91 cv_signal(&work_cv[NFS4_ASYNC_QUEUE]); \
92 else if (CV_HAS_WAITERS(&work_cv[NFS4_ASYNC_PGOPS_QUEUE])) \
93 cv_signal(&work_cv[NFS4_ASYNC_PGOPS_QUEUE]); \
94 }
95
96 #define NFS4_WAKEALL_ASYNC_WORKERS(work_cv) { \
97 cv_broadcast(&work_cv[NFS4_ASYNC_QUEUE]); \
98 cv_broadcast(&work_cv[NFS4_ASYNC_PGOPS_QUEUE]); \
99 }
100
101 /*
102 * Is the attribute cache valid? If client holds a delegation, then attrs
103 * are by definition valid. If not, then check to see if attrs have timed out.
104 */
105 #define ATTRCACHE4_VALID(vp) (VTOR4(vp)->r_deleg_type != OPEN_DELEGATE_NONE || \
106 gethrtime() < VTOR4(vp)->r_time_attr_inval)
107
108 /*
109 * Flags to indicate whether to purge the DNLC for non-directory vnodes
110 * in a call to nfs_purge_caches.
111 */
112 #define NFS4_NOPURGE_DNLC 0
113 #define NFS4_PURGE_DNLC 1
114
115 /*
116 * Is cache valid?
117 * Swap is always valid, if no attributes (attrtime == 0) or
118 * if mtime matches cached mtime it is valid
119 * NOTE: mtime is now a timestruc_t.
120 * Caller should be holding the rnode r_statelock mutex.
121 */
122 #define CACHE4_VALID(rp, mtime, fsize) \
123 ((RTOV4(rp)->v_flag & VISSWAP) == VISSWAP || \
124 (((mtime).tv_sec == (rp)->r_attr.va_mtime.tv_sec && \
125 (mtime).tv_nsec == (rp)->r_attr.va_mtime.tv_nsec) && \
126 ((fsize) == (rp)->r_attr.va_size)))
127
128 /*
129 * Macro to detect forced unmount or a zone shutdown.
130 */
131 #define FS_OR_ZONE_GONE4(vfsp) \
132 (((vfsp)->vfs_flag & VFS_UNMOUNTED) || \
133 zone_status_get(curproc->p_zone) >= ZONE_IS_SHUTTING_DOWN)
134
135 /*
136 * Macro to help determine whether a request failed because the underlying
137 * filesystem has been forcibly unmounted or because of zone shutdown.
138 */
139 #define NFS4_FRC_UNMT_ERR(err, vfsp) \
140 ((err) == EIO && FS_OR_ZONE_GONE4((vfsp)))
141
142 /*
143 * Due to the way the address space callbacks are used to execute a delmap,
144 * we must keep track of how many times the same thread has called
145 * VOP_DELMAP()->nfs4_delmap(). This is done by having a list of
146 * nfs4_delmapcall_t's associated with each rnode4_t. This list is protected
147 * by the rnode4_t's r_statelock. The individual elements do not need to be
148 * protected as they will only ever be created, modified and destroyed by
149 * one thread (the call_id).
150 * See nfs4_delmap() for further explanation.
151 */
152 typedef struct nfs4_delmapcall {
153 kthread_t *call_id;
154 int error; /* error from delmap */
155 list_node_t call_node;
156 } nfs4_delmapcall_t;
157
158 /*
159 * delmap address space callback args
160 */
161 typedef struct nfs4_delmap_args {
162 vnode_t *vp;
163 offset_t off;
164 caddr_t addr;
165 size_t len;
166 uint_t prot;
167 uint_t maxprot;
168 uint_t flags;
169 cred_t *cr;
170 nfs4_delmapcall_t *caller; /* to retrieve errors from the cb */
171 } nfs4_delmap_args_t;
172
173 /*
174 * client side statistics
175 */
176 /*
177 * Per-zone counters
178 */
179 struct clstat4 {
180 kstat_named_t calls; /* client requests */
181 kstat_named_t badcalls; /* rpc failures */
182 kstat_named_t referrals; /* referrals */
183 kstat_named_t referlinks; /* referrals as symlinks */
184 kstat_named_t clgets; /* client handle gets */
185 kstat_named_t cltoomany; /* client handle cache misses */
186 #ifdef DEBUG
187 kstat_named_t clalloc; /* number of client handles */
188 kstat_named_t noresponse; /* server not responding cnt */
189 kstat_named_t failover; /* server failover count */
190 kstat_named_t remap; /* server remap count */
191 #endif
192 };
193
194 #ifdef DEBUG
195 /*
196 * The following are statistics that describe the behavior of the system as a
197 * whole and don't correspond to any particular zone.
198 */
199 struct clstat4_debug {
200 kstat_named_t nrnode; /* number of allocated rnodes */
201 kstat_named_t access; /* size of access cache */
202 kstat_named_t dirent; /* size of readdir cache */
203 kstat_named_t dirents; /* size of readdir buf cache */
204 kstat_named_t reclaim; /* number of reclaims */
205 kstat_named_t clreclaim; /* number of cl reclaims */
206 kstat_named_t f_reclaim; /* number of free reclaims */
207 kstat_named_t a_reclaim; /* number of active reclaims */
208 kstat_named_t r_reclaim; /* number of rnode reclaims */
209 kstat_named_t rpath; /* bytes used to store rpaths */
210 };
211 extern struct clstat4_debug clstat4_debug;
212
213 #endif
214
215 /*
216 * The NFS specific async_reqs structure. iotype4 is grouped to support two
217 * types of async thread pools, please read comments section of mntinfo4_t
218 * definition for more information. Care should be taken while adding new
219 * members to this group.
220 */
221
222 enum iotype4 {
223 NFS4_PUTAPAGE,
224 NFS4_PAGEIO,
225 NFS4_COMMIT,
226 NFS4_READ_AHEAD,
227 NFS4_READDIR,
228 NFS4_INACTIVE,
229 NFS4_ASYNC_TYPES
230 };
231 #define NFS4_ASYNC_PGOPS_TYPES (NFS4_COMMIT + 1)
232
233 /*
234 * NFS async requests queue type.
235 */
236 enum ioqtype4 {
237 NFS4_ASYNC_QUEUE,
238 NFS4_ASYNC_PGOPS_QUEUE,
239 NFS4_MAX_ASYNC_QUEUES
240 };
241
242 /*
243 * Number of NFS async threads operating exclusively on page op requests.
244 */
245 #define NUM_ASYNC_PGOPS_THREADS 0x2
246
247 struct nfs4_async_read_req {
248 void (*readahead)(); /* pointer to readahead function */
249 u_offset_t blkoff; /* offset in file */
250 struct seg *seg; /* segment to do i/o to */
251 caddr_t addr; /* address to do i/o to */
252 };
253
254 struct nfs4_pageio_req {
255 int (*pageio)(); /* pointer to pageio function */
256 page_t *pp; /* page list */
257 u_offset_t io_off; /* offset in file */
258 uint_t io_len; /* size of request */
259 int flags;
260 };
261
262 struct nfs4_readdir_req {
263 int (*readdir)(); /* pointer to readdir function */
264 struct rddir4_cache *rdc; /* pointer to cache entry to fill */
265 };
266
267 struct nfs4_commit_req {
268 void (*commit)(); /* pointer to commit function */
269 page_t *plist; /* page list */
270 offset4 offset; /* starting offset */
271 count4 count; /* size of range to be commited */
272 };
273
274 struct nfs4_async_reqs {
275 struct nfs4_async_reqs *a_next; /* pointer to next arg struct */
276 #ifdef DEBUG
277 kthread_t *a_queuer; /* thread id of queueing thread */
278 #endif
279 struct vnode *a_vp; /* vnode pointer */
280 struct cred *a_cred; /* cred pointer */
281 enum iotype4 a_io; /* i/o type */
282 union {
283 struct nfs4_async_read_req a_read_args;
284 struct nfs4_pageio_req a_pageio_args;
285 struct nfs4_readdir_req a_readdir_args;
286 struct nfs4_commit_req a_commit_args;
287 } a_args;
288 };
289
290 #define a_nfs4_readahead a_args.a_read_args.readahead
291 #define a_nfs4_blkoff a_args.a_read_args.blkoff
292 #define a_nfs4_seg a_args.a_read_args.seg
293 #define a_nfs4_addr a_args.a_read_args.addr
294
295 #define a_nfs4_putapage a_args.a_pageio_args.pageio
296 #define a_nfs4_pageio a_args.a_pageio_args.pageio
297 #define a_nfs4_pp a_args.a_pageio_args.pp
298 #define a_nfs4_off a_args.a_pageio_args.io_off
299 #define a_nfs4_len a_args.a_pageio_args.io_len
300 #define a_nfs4_flags a_args.a_pageio_args.flags
301
302 #define a_nfs4_readdir a_args.a_readdir_args.readdir
303 #define a_nfs4_rdc a_args.a_readdir_args.rdc
304
305 #define a_nfs4_commit a_args.a_commit_args.commit
306 #define a_nfs4_plist a_args.a_commit_args.plist
307 #define a_nfs4_offset a_args.a_commit_args.offset
308 #define a_nfs4_count a_args.a_commit_args.count
309
310 /*
311 * Security information
312 */
313 typedef struct sv_secinfo {
314 uint_t count; /* how many sdata there are */
315 uint_t index; /* which sdata[index] */
316 struct sec_data *sdata;
317 } sv_secinfo_t;
318
319 /*
320 * Hash bucket for the mi's open owner list (mi_oo_list).
321 */
322 typedef struct nfs4_oo_hash_bucket {
323 list_t b_oo_hash_list;
324 kmutex_t b_lock;
325 } nfs4_oo_hash_bucket_t;
326
327 /*
328 * Global array of ctags.
329 */
330 extern ctag_t nfs4_ctags[];
331
332 typedef enum nfs4_tag_type {
333 TAG_NONE,
334 TAG_ACCESS,
335 TAG_CLOSE,
336 TAG_CLOSE_LOST,
337 TAG_CLOSE_UNDO,
338 TAG_COMMIT,
339 TAG_DELEGRETURN,
340 TAG_FSINFO,
341 TAG_GET_SYMLINK,
342 TAG_GETATTR,
343 TAG_GETATTR_FSLOCATION,
344 TAG_INACTIVE,
345 TAG_LINK,
346 TAG_LOCK,
347 TAG_LOCK_RECLAIM,
348 TAG_LOCK_RESEND,
349 TAG_LOCK_REINSTATE,
350 TAG_LOCK_UNKNOWN,
351 TAG_LOCKT,
352 TAG_LOCKU,
353 TAG_LOCKU_RESEND,
354 TAG_LOCKU_REINSTATE,
355 TAG_LOOKUP,
356 TAG_LOOKUP_PARENT,
357 TAG_LOOKUP_VALID,
358 TAG_LOOKUP_VPARENT,
359 TAG_MKDIR,
360 TAG_MKNOD,
361 TAG_MOUNT,
362 TAG_OPEN,
363 TAG_OPEN_CONFIRM,
364 TAG_OPEN_CONFIRM_LOST,
365 TAG_OPEN_DG,
366 TAG_OPEN_DG_LOST,
367 TAG_OPEN_LOST,
368 TAG_OPENATTR,
369 TAG_PATHCONF,
370 TAG_PUTROOTFH,
371 TAG_READ,
372 TAG_READAHEAD,
373 TAG_READDIR,
374 TAG_READLINK,
375 TAG_RELOCK,
376 TAG_REMAP_LOOKUP,
377 TAG_REMAP_LOOKUP_AD,
378 TAG_REMAP_LOOKUP_NA,
379 TAG_REMAP_MOUNT,
380 TAG_RMDIR,
381 TAG_REMOVE,
382 TAG_RENAME,
383 TAG_RENAME_VFH,
384 TAG_RENEW,
385 TAG_REOPEN,
386 TAG_REOPEN_LOST,
387 TAG_SECINFO,
388 TAG_SETATTR,
389 TAG_SETCLIENTID,
390 TAG_SETCLIENTID_CF,
391 TAG_SYMLINK,
392 TAG_WRITE
393 } nfs4_tag_type_t;
394
395 #define NFS4_TAG_INITIALIZER { \
396 {TAG_NONE, "", \
397 {0x20202020, 0x20202020, 0x20202020}}, \
398 {TAG_ACCESS, "access", \
399 {0x61636365, 0x73732020, 0x20202020}}, \
400 {TAG_CLOSE, "close", \
401 {0x636c6f73, 0x65202020, 0x20202020}}, \
402 {TAG_CLOSE_LOST, "lost close", \
403 {0x6c6f7374, 0x20636c6f, 0x73652020}}, \
404 {TAG_CLOSE_UNDO, "undo close", \
405 {0x756e646f, 0x20636c6f, 0x73652020}}, \
406 {TAG_COMMIT, "commit", \
407 {0x636f6d6d, 0x69742020, 0x20202020}}, \
408 {TAG_DELEGRETURN, "delegreturn", \
409 {0x64656c65, 0x67726574, 0x75726e20}}, \
410 {TAG_FSINFO, "fsinfo", \
411 {0x6673696e, 0x666f2020, 0x20202020}}, \
412 {TAG_GET_SYMLINK, "get symlink text", \
413 {0x67657420, 0x736c6e6b, 0x20747874}}, \
414 {TAG_GETATTR, "getattr", \
415 {0x67657461, 0x74747220, 0x20202020}}, \
416 {TAG_GETATTR_FSLOCATION, "getattr fslocation", \
417 {0x67657461, 0x74747220, 0x66736c6f}}, \
418 {TAG_INACTIVE, "inactive", \
419 {0x696e6163, 0x74697665, 0x20202020}}, \
420 {TAG_LINK, "link", \
421 {0x6c696e6b, 0x20202020, 0x20202020}}, \
422 {TAG_LOCK, "lock", \
423 {0x6c6f636b, 0x20202020, 0x20202020}}, \
424 {TAG_LOCK_RECLAIM, "reclaim lock", \
425 {0x7265636c, 0x61696d20, 0x6c6f636b}}, \
426 {TAG_LOCK_RESEND, "resend lock", \
427 {0x72657365, 0x6e64206c, 0x6f636b20}}, \
428 {TAG_LOCK_REINSTATE, "reinstate lock", \
429 {0x7265696e, 0x7374206c, 0x6f636b20}}, \
430 {TAG_LOCK_UNKNOWN, "unknown lock", \
431 {0x756e6b6e, 0x6f776e20, 0x6c6f636b}}, \
432 {TAG_LOCKT, "lock test", \
433 {0x6c6f636b, 0x5f746573, 0x74202020}}, \
434 {TAG_LOCKU, "unlock", \
435 {0x756e6c6f, 0x636b2020, 0x20202020}}, \
436 {TAG_LOCKU_RESEND, "resend locku", \
437 {0x72657365, 0x6e64206c, 0x6f636b75}}, \
438 {TAG_LOCKU_REINSTATE, "reinstate unlock", \
439 {0x7265696e, 0x73742075, 0x6e6c636b}}, \
440 {TAG_LOOKUP, "lookup", \
441 {0x6c6f6f6b, 0x75702020, 0x20202020}}, \
442 {TAG_LOOKUP_PARENT, "lookup parent", \
443 {0x6c6f6f6b, 0x75702070, 0x6172656e}}, \
444 {TAG_LOOKUP_VALID, "lookup valid", \
445 {0x6c6f6f6b, 0x75702076, 0x616c6964}}, \
446 {TAG_LOOKUP_VPARENT, "lookup valid parent", \
447 {0x6c6f6f6b, 0x766c6420, 0x7061726e}}, \
448 {TAG_MKDIR, "mkdir", \
449 {0x6d6b6469, 0x72202020, 0x20202020}}, \
450 {TAG_MKNOD, "mknod", \
451 {0x6d6b6e6f, 0x64202020, 0x20202020}}, \
452 {TAG_MOUNT, "mount", \
453 {0x6d6f756e, 0x74202020, 0x20202020}}, \
454 {TAG_OPEN, "open", \
455 {0x6f70656e, 0x20202020, 0x20202020}}, \
456 {TAG_OPEN_CONFIRM, "open confirm", \
457 {0x6f70656e, 0x5f636f6e, 0x6669726d}}, \
458 {TAG_OPEN_CONFIRM_LOST, "lost open confirm", \
459 {0x6c6f7374, 0x206f7065, 0x6e5f636f}}, \
460 {TAG_OPEN_DG, "open downgrade", \
461 {0x6f70656e, 0x20646772, 0x61646520}}, \
462 {TAG_OPEN_DG_LOST, "lost open downgrade", \
463 {0x6c737420, 0x6f70656e, 0x20646772}}, \
464 {TAG_OPEN_LOST, "lost open", \
465 {0x6c6f7374, 0x206f7065, 0x6e202020}}, \
466 {TAG_OPENATTR, "openattr", \
467 {0x6f70656e, 0x61747472, 0x20202020}}, \
468 {TAG_PATHCONF, "pathconf", \
469 {0x70617468, 0x636f6e66, 0x20202020}}, \
470 {TAG_PUTROOTFH, "putrootfh", \
471 {0x70757472, 0x6f6f7466, 0x68202020}}, \
472 {TAG_READ, "read", \
473 {0x72656164, 0x20202020, 0x20202020}}, \
474 {TAG_READAHEAD, "readahead", \
475 {0x72656164, 0x61686561, 0x64202020}}, \
476 {TAG_READDIR, "readdir", \
477 {0x72656164, 0x64697220, 0x20202020}}, \
478 {TAG_READLINK, "readlink", \
479 {0x72656164, 0x6c696e6b, 0x20202020}}, \
480 {TAG_RELOCK, "relock", \
481 {0x72656c6f, 0x636b2020, 0x20202020}}, \
482 {TAG_REMAP_LOOKUP, "remap lookup", \
483 {0x72656d61, 0x70206c6f, 0x6f6b7570}}, \
484 {TAG_REMAP_LOOKUP_AD, "remap lookup attr dir", \
485 {0x72656d70, 0x206c6b75, 0x70206164}}, \
486 {TAG_REMAP_LOOKUP_NA, "remap lookup named attrs", \
487 {0x72656d70, 0x206c6b75, 0x70206e61}}, \
488 {TAG_REMAP_MOUNT, "remap mount", \
489 {0x72656d61, 0x70206d6f, 0x756e7420}}, \
490 {TAG_RMDIR, "rmdir", \
491 {0x726d6469, 0x72202020, 0x20202020}}, \
492 {TAG_REMOVE, "remove", \
493 {0x72656d6f, 0x76652020, 0x20202020}}, \
494 {TAG_RENAME, "rename", \
495 {0x72656e61, 0x6d652020, 0x20202020}}, \
496 {TAG_RENAME_VFH, "rename volatile fh", \
497 {0x72656e61, 0x6d652028, 0x76666829}}, \
498 {TAG_RENEW, "renew", \
499 {0x72656e65, 0x77202020, 0x20202020}}, \
500 {TAG_REOPEN, "reopen", \
501 {0x72656f70, 0x656e2020, 0x20202020}}, \
502 {TAG_REOPEN_LOST, "lost reopen", \
503 {0x6c6f7374, 0x2072656f, 0x70656e20}}, \
504 {TAG_SECINFO, "secinfo", \
505 {0x73656369, 0x6e666f20, 0x20202020}}, \
506 {TAG_SETATTR, "setattr", \
507 {0x73657461, 0x74747220, 0x20202020}}, \
508 {TAG_SETCLIENTID, "setclientid", \
509 {0x73657463, 0x6c69656e, 0x74696420}}, \
510 {TAG_SETCLIENTID_CF, "setclientid_confirm", \
511 {0x73636c6e, 0x7469645f, 0x636f6e66}}, \
512 {TAG_SYMLINK, "symlink", \
513 {0x73796d6c, 0x696e6b20, 0x20202020}}, \
514 {TAG_WRITE, "write", \
515 {0x77726974, 0x65202020, 0x20202020}} \
516 }
517
518 /*
519 * These flags are for differentiating the search criterian for
520 * find_open_owner(). The comparison is done with the open_owners's
521 * 'oo_just_created' flag.
522 */
523 #define NFS4_PERM_CREATED 0x0
524 #define NFS4_JUST_CREATED 0x1
525
526 /*
527 * Hashed by the cr_uid and cr_ruid of credential 'oo_cred'. 'oo_cred_otw'
528 * is stored upon a successful OPEN. This is needed when the user's effective
529 * and real uid's don't match. The 'oo_cred_otw' overrides the credential
530 * passed down by VFS for async read/write, commit, lock, and close operations.
531 *
532 * The oo_ref_count keeps track the number of active references on this
533 * data structure + number of nfs4_open_streams point to this structure.
534 *
535 * 'oo_valid' tells whether this stuct is about to be freed or not.
536 *
537 * 'oo_just_created' tells us whether this struct has just been created but
538 * not been fully finalized (that is created upon an OPEN request and
539 * finalized upon the OPEN success).
540 *
541 * The 'oo_seqid_inuse' is for the open seqid synchronization. If a thread
542 * is currently using the open owner and it's open_seqid, then it sets the
543 * oo_seqid_inuse to true if it currently is not set. If it is set then it
544 * does a cv_wait on the oo_cv_seqid_sync condition variable. When the thread
545 * is done it unsets the oo_seqid_inuse and does a cv_signal to wake a process
546 * waiting on the condition variable.
547 *
548 * 'oo_last_good_seqid' is the last valid seqid this open owner sent OTW,
549 * and 'oo_last_good_op' is the operation that issued the last valid seqid.
550 *
551 * Lock ordering:
552 * mntinfo4_t::mi_lock > oo_lock (for searching mi_oo_list)
553 *
554 * oo_seqid_inuse > mntinfo4_t::mi_lock
555 * oo_seqid_inuse > rnode4_t::r_statelock
556 * oo_seqid_inuse > rnode4_t::r_statev4_lock
557 * oo_seqid_inuse > nfs4_open_stream_t::os_sync_lock
558 *
559 * The 'oo_seqid_inuse'/'oo_cv_seqid_sync' protects:
560 * oo_last_good_op
561 * oo_last_good_seqid
562 * oo_name
563 * oo_seqid
564 *
565 * The 'oo_lock' protects:
566 * oo_cred
567 * oo_cred_otw
568 * oo_foo_node
569 * oo_hash_node
570 * oo_just_created
571 * oo_ref_count
572 * oo_valid
573 */
574
575 typedef struct nfs4_open_owner {
576 cred_t *oo_cred;
577 int oo_ref_count;
578 int oo_valid;
579 int oo_just_created;
580 seqid4 oo_seqid;
581 seqid4 oo_last_good_seqid;
582 nfs4_tag_type_t oo_last_good_op;
583 unsigned oo_seqid_inuse:1;
584 cred_t *oo_cred_otw;
585 kcondvar_t oo_cv_seqid_sync;
586 /*
587 * Fix this to always be 8 bytes
588 */
589 uint64_t oo_name;
590 list_node_t oo_hash_node;
591 list_node_t oo_foo_node;
592 kmutex_t oo_lock;
593 } nfs4_open_owner_t;
594
595 /*
596 * Static server information.
597 * These fields are read-only once they are initialized; sv_lock
598 * should be held as writer if they are changed during mount:
599 * sv_addr
600 * sv_dhsec
601 * sv_hostname
602 * sv_hostnamelen
603 * sv_knconf
604 * sv_next
605 * sv_origknconf
606 *
607 * These fields are protected by sv_lock:
608 * sv_currsec
609 * sv_fhandle
610 * sv_flags
611 * sv_fsid
612 * sv_path
613 * sv_pathlen
614 * sv_pfhandle
615 * sv_save_secinfo
616 * sv_savesec
617 * sv_secdata
618 * sv_secinfo
619 * sv_supp_attrs
620 *
621 * Lock ordering:
622 * nfs_rtable4_lock > sv_lock
623 * rnode4_t::r_statelock > sv_lock
624 */
625 typedef struct servinfo4 {
626 struct knetconfig *sv_knconf; /* bound TLI fd */
627 struct knetconfig *sv_origknconf; /* For RDMA save orig knconf */
628 struct netbuf sv_addr; /* server's address */
629 nfs4_fhandle_t sv_fhandle; /* this server's filehandle */
630 nfs4_fhandle_t sv_pfhandle; /* parent dir filehandle */
631 int sv_pathlen; /* Length of server path */
632 char *sv_path; /* Path name on server */
633 uint32_t sv_flags; /* flags for this server */
634 sec_data_t *sv_secdata; /* client initiated security data */
635 sv_secinfo_t *sv_secinfo; /* server security information */
636 sec_data_t *sv_currsec; /* security data currently used; */
637 /* points to one of the sec_data */
638 /* entries in sv_secinfo */
639 sv_secinfo_t *sv_save_secinfo; /* saved secinfo */
640 sec_data_t *sv_savesec; /* saved security data */
641 sec_data_t *sv_dhsec; /* AUTH_DH data from the user land */
642 char *sv_hostname; /* server's hostname */
643 int sv_hostnamelen; /* server's hostname length */
644 fattr4_fsid sv_fsid; /* fsid of shared obj */
645 fattr4_supported_attrs sv_supp_attrs;
646 struct servinfo4 *sv_next; /* next in list */
647 nfs_rwlock_t sv_lock;
648 } servinfo4_t;
649
650 /* sv_flags fields */
651 #define SV4_TRYSECINFO 0x001 /* try secinfo data from the server */
652 #define SV4_TRYSECDEFAULT 0x002 /* try a default flavor */
653 #define SV4_NOTINUSE 0x004 /* servinfo4_t had fatal errors */
654 #define SV4_ROOT_STALE 0x008 /* root vnode got ESTALE */
655
656 /*
657 * Lock call types. See nfs4frlock().
658 */
659 typedef enum nfs4_lock_call_type {
660 NFS4_LCK_CTYPE_NORM,
661 NFS4_LCK_CTYPE_RECLAIM,
662 NFS4_LCK_CTYPE_RESEND,
663 NFS4_LCK_CTYPE_REINSTATE
664 } nfs4_lock_call_type_t;
665
666 /*
667 * This structure holds the information for a lost open/close/open downgrade/
668 * lock/locku request. It is also used for requests that are queued up so
669 * that the recovery thread can release server state after a forced
670 * unmount.
671 * "lr_op" is 0 if the struct is uninitialized. Otherwise, it is set to
672 * the proper OP_* nfs_opnum4 number. The other fields contain information
673 * to reconstruct the call.
674 *
675 * lr_dvp is used for OPENs with CREATE, so that we can do a PUTFH of the
676 * parent directroy without relying on vtodv (since we may not have a vp
677 * for the file we wish to create).
678 *
679 * lr_putfirst means that the request should go to the front of the resend
680 * queue, rather than the end.
681 */
682 typedef struct nfs4_lost_rqst {
683 list_node_t lr_node;
684 nfs_opnum4 lr_op;
685 vnode_t *lr_vp;
686 vnode_t *lr_dvp;
687 nfs4_open_owner_t *lr_oop;
688 struct nfs4_open_stream *lr_osp;
689 struct nfs4_lock_owner *lr_lop;
690 cred_t *lr_cr;
691 flock64_t *lr_flk;
692 bool_t lr_putfirst;
693 union {
694 struct {
695 nfs4_lock_call_type_t lru_ctype;
696 nfs_lock_type4 lru_locktype;
697 } lru_lockargs; /* LOCK, LOCKU */
698 struct {
699 uint32_t lru_oaccess;
700 uint32_t lru_odeny;
701 enum open_claim_type4 lru_oclaim;
702 stateid4 lru_ostateid; /* reopen only */
703 component4 lru_ofile;
704 } lru_open_args;
705 struct {
706 uint32_t lru_dg_access;
707 uint32_t lru_dg_deny;
708 } lru_open_dg_args;
709 } nfs4_lr_u;
710 } nfs4_lost_rqst_t;
711
712 #define lr_oacc nfs4_lr_u.lru_open_args.lru_oaccess
713 #define lr_odeny nfs4_lr_u.lru_open_args.lru_odeny
714 #define lr_oclaim nfs4_lr_u.lru_open_args.lru_oclaim
715 #define lr_ostateid nfs4_lr_u.lru_open_args.lru_ostateid
716 #define lr_ofile nfs4_lr_u.lru_open_args.lru_ofile
717 #define lr_dg_acc nfs4_lr_u.lru_open_dg_args.lru_dg_access
718 #define lr_dg_deny nfs4_lr_u.lru_open_dg_args.lru_dg_deny
719 #define lr_ctype nfs4_lr_u.lru_lockargs.lru_ctype
720 #define lr_locktype nfs4_lr_u.lru_lockargs.lru_locktype
721
722 /*
723 * Recovery actions. Some actions can imply further recovery using a
724 * different recovery action (e.g., recovering the clientid leads to
725 * recovering open files and locks).
726 */
727
728 typedef enum {
729 NR_UNUSED,
730 NR_CLIENTID,
731 NR_OPENFILES,
732 NR_FHEXPIRED,
733 NR_FAILOVER,
734 NR_WRONGSEC,
735 NR_EXPIRED,
736 NR_BAD_STATEID,
737 NR_BADHANDLE,
738 NR_BAD_SEQID,
739 NR_OLDSTATEID,
740 NR_GRACE,
741 NR_DELAY,
742 NR_LOST_LOCK,
743 NR_LOST_STATE_RQST,
744 NR_STALE,
745 NR_MOVED
746 } nfs4_recov_t;
747
748 /*
749 * Administrative and debug message framework.
750 */
751
752 #define NFS4_MSG_MAX 100
753 extern int nfs4_msg_max;
754
755 #define NFS4_REFERRAL_LOOP_MAX 20
756
757 typedef enum {
758 RE_BAD_SEQID,
759 RE_BADHANDLE,
760 RE_CLIENTID,
761 RE_DEAD_FILE,
762 RE_END,
763 RE_FAIL_RELOCK,
764 RE_FAIL_REMAP_LEN,
765 RE_FAIL_REMAP_OP,
766 RE_FAILOVER,
767 RE_FILE_DIFF,
768 RE_LOST_STATE,
769 RE_OPENS_CHANGED,
770 RE_SIGLOST,
771 RE_SIGLOST_NO_DUMP,
772 RE_START,
773 RE_UNEXPECTED_ACTION,
774 RE_UNEXPECTED_ERRNO,
775 RE_UNEXPECTED_STATUS,
776 RE_WRONGSEC,
777 RE_LOST_STATE_BAD_OP,
778 RE_REFERRAL
779 } nfs4_event_type_t;
780
781 typedef enum {
782 RFS_NO_INSPECT,
783 RFS_INSPECT
784 } nfs4_fact_status_t;
785
786 typedef enum {
787 RF_BADOWNER,
788 RF_ERR,
789 RF_RENEW_EXPIRED,
790 RF_SRV_NOT_RESPOND,
791 RF_SRV_OK,
792 RF_SRVS_NOT_RESPOND,
793 RF_SRVS_OK,
794 RF_DELMAP_CB_ERR,
795 RF_SENDQ_FULL
796 } nfs4_fact_type_t;
797
798 typedef enum {
799 NFS4_MS_DUMP,
800 NFS4_MS_NO_DUMP
801 } nfs4_msg_status_t;
802
803 typedef struct nfs4_rfact {
804 nfs4_fact_type_t rf_type;
805 nfs4_fact_status_t rf_status;
806 bool_t rf_reboot;
807 nfs4_recov_t rf_action;
808 nfs_opnum4 rf_op;
809 nfsstat4 rf_stat4;
810 timespec_t rf_time;
811 int rf_error;
812 struct rnode4 *rf_rp1;
813 char *rf_char1;
814 } nfs4_rfact_t;
815
816 typedef struct nfs4_revent {
817 nfs4_event_type_t re_type;
818 nfsstat4 re_stat4;
819 uint_t re_uint;
820 pid_t re_pid;
821 struct mntinfo4 *re_mi;
822 struct rnode4 *re_rp1;
823 struct rnode4 *re_rp2;
824 char *re_char1;
825 char *re_char2;
826 nfs4_tag_type_t re_tag1;
827 nfs4_tag_type_t re_tag2;
828 seqid4 re_seqid1;
829 seqid4 re_seqid2;
830 } nfs4_revent_t;
831
832 typedef enum {
833 RM_EVENT,
834 RM_FACT
835 } nfs4_msg_type_t;
836
837 typedef struct nfs4_debug_msg {
838 timespec_t msg_time;
839 nfs4_msg_type_t msg_type;
840 char *msg_srv;
841 char *msg_mntpt;
842 union {
843 nfs4_rfact_t msg_fact;
844 nfs4_revent_t msg_event;
845 } rmsg_u;
846 nfs4_msg_status_t msg_status;
847 list_node_t msg_node;
848 } nfs4_debug_msg_t;
849
850 /*
851 * NFS private data per mounted file system
852 * The mi_lock mutex protects the following fields:
853 * mi_flags
854 * mi_in_recovery
855 * mi_recovflags
856 * mi_recovthread
857 * mi_error
858 * mi_printed
859 * mi_down
860 * mi_stsize
861 * mi_curread
862 * mi_curwrite
863 * mi_timers
864 * mi_curr_serv
865 * mi_klmconfig
866 * mi_oo_list
867 * mi_foo_list
868 * mi_foo_num
869 * mi_foo_max
870 * mi_lost_state
871 * mi_bseqid_list
872 * mi_ephemeral
873 * mi_ephemeral_tree
874 *
875 * Normally the netconfig information for the mount comes from
876 * mi_curr_serv and mi_klmconfig is NULL. If NLM calls need to use a
877 * different transport, mi_klmconfig contains the necessary netconfig
878 * information.
879 *
880 * The mi_async_lock mutex protects the following fields:
881 * mi_async_reqs
882 * mi_async_req_count
883 * mi_async_tail
884 * mi_async_curr[NFS4_MAX_ASYNC_QUEUES]
885 * mi_async_clusters
886 * mi_async_init_clusters
887 * mi_threads[NFS4_MAX_ASYNC_QUEUES]
888 * mi_inactive_thread
889 * mi_manager_thread
890 *
891 * The nfs4_server_t::s_lock protects the following fields:
892 * mi_clientid
893 * mi_clientid_next
894 * mi_clientid_prev
895 * mi_open_files
896 *
897 * The mntinfo4_t::mi_recovlock protects the following fields:
898 * mi_srvsettime
899 * mi_srvset_cnt
900 * mi_srv
901 *
902 * Changing mi_srv from one nfs4_server_t to a different one requires
903 * holding the mi_recovlock as RW_WRITER.
904 * Exception: setting mi_srv the first time in mount/mountroot is done
905 * holding the mi_recovlock as RW_READER.
906 *
907 * Locking order:
908 * mi4_globals::mig_lock > mi_async_lock
909 * mi_async_lock > nfs4_server_t::s_lock > mi_lock
910 * mi_recovlock > mi_rename_lock > nfs_rtable4_lock
911 * nfs4_server_t::s_recovlock > mi_recovlock
912 * rnode4_t::r_rwlock > mi_rename_lock
913 * nfs_rtable4_lock > mi_lock
914 * nfs4_server_t::s_lock > mi_msg_list_lock
915 * mi_recovlock > nfs4_server_t::s_lock
916 * mi_recovlock > nfs4_server_lst_lock
917 *
918 * The 'mi_oo_list' represents the hash buckets that contain the
919 * nfs4_open_owenrs for this particular mntinfo4.
920 *
921 * The 'mi_foo_list' represents the freed nfs4_open_owners for this mntinfo4.
922 * 'mi_foo_num' is the current number of freed open owners on the list,
923 * 'mi_foo_max' is the maximum number of freed open owners that are allowable
924 * on the list.
925 *
926 * mi_rootfh and mi_srvparentfh are read-only once created, but that just
927 * refers to the pointer. The contents must be updated to keep in sync
928 * with mi_curr_serv.
929 *
930 * The mi_msg_list_lock protects against adding/deleting entries to the
931 * mi_msg_list, and also the updating/retrieving of mi_lease_period;
932 *
933 * 'mi_zone' is initialized at structure creation time, and never
934 * changes; it may be read without a lock.
935 *
936 * mi_zone_node is linkage into the mi4_globals.mig_list, and is
937 * protected by mi4_globals.mig_list_lock.
938 *
939 * If MI4_EPHEMERAL is set in mi_flags, then mi_ephemeral points to an
940 * ephemeral structure for this ephemeral mount point. It can not be
941 * NULL. Also, mi_ephemeral_tree points to the root of the ephemeral
942 * tree.
943 *
944 * If MI4_EPHEMERAL is not set in mi_flags, then mi_ephemeral has
945 * to be NULL. If mi_ephemeral_tree is non-NULL, then this node
946 * is the enclosing mntinfo4 for the ephemeral tree.
947 */
948 struct zone;
949 struct nfs4_ephemeral;
950 struct nfs4_ephemeral_tree;
951 struct nfs4_server;
952 typedef struct mntinfo4 {
953 kmutex_t mi_lock; /* protects mntinfo4 fields */
954 struct servinfo4 *mi_servers; /* server list */
955 struct servinfo4 *mi_curr_serv; /* current server */
956 struct nfs4_sharedfh *mi_rootfh; /* root filehandle */
957 struct nfs4_sharedfh *mi_srvparentfh; /* root's parent on server */
958 kcondvar_t mi_failover_cv; /* failover synchronization */
959 struct vfs *mi_vfsp; /* back pointer to vfs */
960 enum vtype mi_type; /* file type of the root vnode */
961 uint_t mi_flags; /* see below */
962 uint_t mi_recovflags; /* if recovery active; see below */
963 kthread_t *mi_recovthread; /* active recov thread or NULL */
964 uint_t mi_error; /* only set/valid when MI4_RECOV_FAIL */
965 /* is set in mi_flags */
966 int mi_tsize; /* transfer size (bytes) */
967 /* really read size */
968 int mi_stsize; /* server's max transfer size (bytes) */
969 /* really write size */
970 int mi_timeo; /* inital timeout in 10th sec */
971 int mi_retrans; /* times to retry request */
972 hrtime_t mi_acregmin; /* min time to hold cached file attr */
973 hrtime_t mi_acregmax; /* max time to hold cached file attr */
974 hrtime_t mi_acdirmin; /* min time to hold cached dir attr */
975 hrtime_t mi_acdirmax; /* max time to hold cached dir attr */
976 len_t mi_maxfilesize; /* for pathconf _PC_FILESIZEBITS */
977 int mi_curread; /* current read size */
978 int mi_curwrite; /* current write size */
979 uint_t mi_count; /* ref count */
980 /*
981 * Async I/O management
982 * We have 2 pools of threads working on async I/O:
983 * (1) Threads which work on all async queues. Default number of
984 * threads in this queue is 8. Threads in this pool work on async
985 * queue pointed by mi_async_curr[NFS4_ASYNC_QUEUE]. Number of
986 * active threads in this pool is tracked by
987 * mi_threads[NFS4_ASYNC_QUEUE].
988 * (ii)Threads which work only on page op async queues.
989 * Page ops queue comprises of NFS4_PUTAPAGE, NFS4_PAGEIO &
990 * NFS4_COMMIT. Default number of threads in this queue is 2
991 * (NUM_ASYNC_PGOPS_THREADS). Threads in this pool work on async
992 * queue pointed by mi_async_curr[NFS4_ASYNC_PGOPS_QUEUE]. Number
993 * of active threads in this pool is tracked by
994 * mi_threads[NFS4_ASYNC_PGOPS_QUEUE].
995 *
996 * In addition to above two pools, there is always one thread that
997 * handles over-the-wire requests for VOP_INACTIVE.
998 */
999 struct nfs4_async_reqs *mi_async_reqs[NFS4_ASYNC_TYPES];
1000 struct nfs4_async_reqs *mi_async_tail[NFS4_ASYNC_TYPES];
1001 struct nfs4_async_reqs **mi_async_curr[NFS4_MAX_ASYNC_QUEUES];
1002 /* current async queue */
1003 uint_t mi_async_clusters[NFS4_ASYNC_TYPES];
1004 uint_t mi_async_init_clusters;
1005 uint_t mi_async_req_count; /* # outstanding work requests */
1006 kcondvar_t mi_async_reqs_cv; /* signaled when there's work */
1007 ushort_t mi_threads[NFS4_MAX_ASYNC_QUEUES];
1008 /* number of active async threads */
1009 ushort_t mi_max_threads; /* max number of async threads */
1010 kthread_t *mi_manager_thread; /* async manager thread id */
1011 kthread_t *mi_inactive_thread; /* inactive thread id */
1012 kcondvar_t mi_inact_req_cv; /* notify VOP_INACTIVE thread */
1013 kcondvar_t mi_async_work_cv[NFS4_MAX_ASYNC_QUEUES];
1014 /* tell workers to work */
1015 kcondvar_t mi_async_cv; /* all pool threads exited */
1016 kmutex_t mi_async_lock;
1017 /*
1018 * Other stuff
1019 */
1020 struct pathcnf *mi_pathconf; /* static pathconf kludge */
1021 rpcprog_t mi_prog; /* RPC program number */
1022 rpcvers_t mi_vers; /* RPC program version number */
1023 char **mi_rfsnames; /* mapping to proc names */
1024 kstat_named_t *mi_reqs; /* count of requests */
1025 clock_t mi_printftime; /* last error printf time */
1026 nfs_rwlock_t mi_recovlock; /* separate ops from recovery (v4) */
1027 time_t mi_grace_wait; /* non-zero represents time to wait */
1028 /* when we switched nfs4_server_t - only for observability purposes */
1029 time_t mi_srvsettime;
1030 nfs_rwlock_t mi_rename_lock; /* atomic volfh rename */
1031 struct nfs4_fname *mi_fname; /* root fname */
1032 list_t mi_lost_state; /* resend list */
1033 list_t mi_bseqid_list; /* bad seqid list */
1034 /*
1035 * Client Side Failover stats
1036 */
1037 uint_t mi_noresponse; /* server not responding count */
1038 uint_t mi_failover; /* failover to new server count */
1039 uint_t mi_remap; /* remap to new server count */
1040 /*
1041 * Kstat statistics
1042 */
1043 struct kstat *mi_io_kstats;
1044 struct kstat *mi_ro_kstats;
1045 kstat_t *mi_recov_ksp; /* ptr to the recovery kstat */
1046
1047 /*
1048 * Volatile fh flags (nfsv4)
1049 */
1050 uint32_t mi_fh_expire_type;
1051 /*
1052 * Lease Management
1053 */
1054 struct mntinfo4 *mi_clientid_next;
1055 struct mntinfo4 *mi_clientid_prev;
1056 clientid4 mi_clientid; /* redundant info found in nfs4_server */
1057 int mi_open_files; /* count of open files */
1058 int mi_in_recovery; /* count of recovery instances */
1059 kcondvar_t mi_cv_in_recov; /* cv for recovery threads */
1060 /*
1061 * Open owner stuff.
1062 */
1063 struct nfs4_oo_hash_bucket mi_oo_list[NFS4_NUM_OO_BUCKETS];
1064 list_t mi_foo_list;
1065 int mi_foo_num;
1066 int mi_foo_max;
1067 /*
1068 * Shared filehandle pool.
1069 */
1070 nfs_rwlock_t mi_fh_lock;
1071 avl_tree_t mi_filehandles;
1072
1073 /*
1074 * Debug message queue.
1075 */
1076 list_t mi_msg_list;
1077 int mi_msg_count;
1078 time_t mi_lease_period;
1079 /*
1080 * not guaranteed to be accurate.
1081 * only should be used by debug queue.
1082 */
1083 kmutex_t mi_msg_list_lock;
1084 /*
1085 * Zones support.
1086 */
1087 struct zone *mi_zone; /* Zone in which FS is mounted */
1088 zone_ref_t mi_zone_ref; /* Reference to aforementioned zone */
1089 list_node_t mi_zone_node; /* linkage into per-zone mi list */
1090
1091 /*
1092 * Links for unmounting ephemeral mounts.
1093 */
1094 struct nfs4_ephemeral *mi_ephemeral;
1095 struct nfs4_ephemeral_tree *mi_ephemeral_tree;
1096
1097 uint_t mi_srvset_cnt; /* increment when changing the nfs4_server_t */
1098 struct nfs4_server *mi_srv; /* backpointer to nfs4_server_t */
1099 /*
1100 * Referral related info.
1101 */
1102 int mi_vfs_referral_loop_cnt;
1103 } mntinfo4_t;
1104
1105 /*
1106 * The values for mi_flags.
1107 *
1108 * MI4_HARD hard or soft mount
1109 * MI4_PRINTED responding message printed
1110 * MI4_INT allow INTR on hard mount
1111 * MI4_DOWN server is down
1112 * MI4_NOAC don't cache attributes
1113 * MI4_NOCTO no close-to-open consistency
1114 * MI4_LLOCK local locking only (no lockmgr)
1115 * MI4_GRPID System V group id inheritance
1116 * MI4_SHUTDOWN System is rebooting or shutting down
1117 * MI4_LINK server supports link
1118 * MI4_SYMLINK server supports symlink
1119 * MI4_EPHEMERAL_RECURSED an ephemeral mount being unmounted
1120 * due to a recursive call - no need
1121 * for additional recursion
1122 * MI4_ACL server supports NFSv4 ACLs
1123 * MI4_MIRRORMOUNT is a mirrormount
1124 * MI4_NOPRINT don't print messages
1125 * MI4_DIRECTIO do direct I/O
1126 * MI4_RECOV_ACTIV filesystem has recovery a thread
1127 * MI4_REMOVE_ON_LAST_CLOSE remove from server's list
1128 * MI4_RECOV_FAIL client recovery failed
1129 * MI4_PUBLIC public/url option used
1130 * MI4_MOUNTING mount in progress, don't failover
1131 * MI4_POSIX_LOCK if server is using POSIX locking
1132 * MI4_LOCK_DEBUG cmn_err'd posix lock err msg
1133 * MI4_DEAD zone has released it
1134 * MI4_INACTIVE_IDLE inactive thread idle
1135 * MI4_BADOWNER_DEBUG badowner error msg per mount
1136 * MI4_ASYNC_MGR_STOP tell async manager to die
1137 * MI4_TIMEDOUT saw a timeout during zone shutdown
1138 * MI4_EPHEMERAL is an ephemeral mount
1139 */
1140 #define MI4_HARD 0x1
1141 #define MI4_PRINTED 0x2
1142 #define MI4_INT 0x4
1143 #define MI4_DOWN 0x8
1144 #define MI4_NOAC 0x10
1145 #define MI4_NOCTO 0x20
1146 #define MI4_LLOCK 0x80
1147 #define MI4_GRPID 0x100
1148 #define MI4_SHUTDOWN 0x200
1149 #define MI4_LINK 0x400
1150 #define MI4_SYMLINK 0x800
1151 #define MI4_EPHEMERAL_RECURSED 0x1000
1152 #define MI4_ACL 0x2000
1153 /* MI4_MIRRORMOUNT is also defined in nfsstat.c */
1154 #define MI4_MIRRORMOUNT 0x4000
1155 #define MI4_REFERRAL 0x8000
1156 /* 0x10000 is available */
1157 #define MI4_NOPRINT 0x20000
1158 #define MI4_DIRECTIO 0x40000
1159 /* 0x80000 is available */
1160 #define MI4_RECOV_ACTIV 0x100000
1161 #define MI4_REMOVE_ON_LAST_CLOSE 0x200000
1162 #define MI4_RECOV_FAIL 0x400000
1163 #define MI4_PUBLIC 0x800000
1164 #define MI4_MOUNTING 0x1000000
1165 #define MI4_POSIX_LOCK 0x2000000
1166 #define MI4_LOCK_DEBUG 0x4000000
1167 #define MI4_DEAD 0x8000000
1168 #define MI4_INACTIVE_IDLE 0x10000000
1169 #define MI4_BADOWNER_DEBUG 0x20000000
1170 #define MI4_ASYNC_MGR_STOP 0x40000000
1171 #define MI4_TIMEDOUT 0x80000000
1172
1173 #define MI4_EPHEMERAL (MI4_MIRRORMOUNT | MI4_REFERRAL)
1174
1175 #define INTR4(vp) (VTOMI4(vp)->mi_flags & MI4_INT)
1176
1177 #define FAILOVER_MOUNT4(mi) (mi->mi_servers->sv_next)
1178
1179 /*
1180 * Recovery flags.
1181 *
1182 * MI4R_NEED_CLIENTID is sort of redundant (it's the nfs4_server_t flag
1183 * that's important), but some flag is needed to indicate that recovery is
1184 * going on for the filesystem.
1185 */
1186 #define MI4R_NEED_CLIENTID 0x1
1187 #define MI4R_REOPEN_FILES 0x2
1188 #define MI4R_NEED_SECINFO 0x4
1189 #define MI4R_NEED_NEW_SERVER 0x8
1190 #define MI4R_REMAP_FILES 0x10
1191 #define MI4R_SRV_REBOOT 0x20 /* server has rebooted */
1192 #define MI4R_LOST_STATE 0x40
1193 #define MI4R_BAD_SEQID 0x80
1194 #define MI4R_MOVED 0x100
1195
1196 #define MI4_HOLD(mi) { \
1197 mi_hold(mi); \
1198 }
1199
1200 #define MI4_RELE(mi) { \
1201 mi_rele(mi); \
1202 }
1203
1204 /*
1205 * vfs pointer to mount info
1206 */
1207 #define VFTOMI4(vfsp) ((mntinfo4_t *)((vfsp)->vfs_data))
1208
1209 /*
1210 * vnode pointer to mount info
1211 */
1212 #define VTOMI4(vp) ((mntinfo4_t *)(((vp)->v_vfsp)->vfs_data))
1213
1214 /*
1215 * Lease Management
1216 *
1217 * lease_valid is initially set to NFS4_LEASE_NOT_STARTED. This is when the
1218 * nfs4_server is first created. lease_valid is then set to
1219 * NFS4_LEASE_UNITIALIZED when the renew thread is started. The extra state of
1220 * NFS4_LEASE_NOT_STARTED is needed for client recovery (so we know if a thread
1221 * already exists when we do SETCLIENTID). lease_valid is then set to
1222 * NFS4_LEASE_VALID (if it is at NFS4_LEASE_UNITIALIZED) when a state creating
1223 * operation (OPEN) is done. lease_valid stays at NFS4_LEASE_VALID as long as
1224 * the lease is renewed. It is set to NFS4_LEASE_INVALID when the lease
1225 * expires. Client recovery is needed to set the lease back to
1226 * NFS4_LEASE_VALID from NFS4_LEASE_INVALID.
1227 *
1228 * The s_cred is the credential used to mount the first file system for this
1229 * server. It used as the credential for the renew thread's calls to the
1230 * server.
1231 *
1232 * The renew thread waits on the condition variable cv_thread_exit. If the cv
1233 * is signalled, then the thread knows it must check s_thread_exit to see if
1234 * it should exit. The cv is signaled when the last file system is unmounted
1235 * from a particular server. s_thread_exit is set to 0 upon thread startup,
1236 * and set to NFS4_THREAD_EXIT, when the last file system is unmounted thereby
1237 * telling the thread to exit. s_thread_exit is needed to avoid spurious
1238 * wakeups.
1239 *
1240 * state_ref_count is incremented every time a new file is opened and
1241 * decremented every time a file is closed otw. This keeps track of whether
1242 * the nfs4_server has state associated with it or not.
1243 *
1244 * s_refcnt is the reference count for storage management of the struct
1245 * itself.
1246 *
1247 * mntinfo4_list points to the doubly linked list of mntinfo4s that share
1248 * this nfs4_server (ie: <clientid, saddr> pair) in the current zone. This is
1249 * needed for a nfs4_server to get a mntinfo4 for use in rfs4call.
1250 *
1251 * s_recovlock is used to synchronize recovery operations. The thread
1252 * that is recovering the client must acquire it as a writer. If the
1253 * thread is using the clientid (including recovery operations on other
1254 * state), acquire it as a reader.
1255 *
1256 * The 's_otw_call_count' keeps track of the number of outstanding over the
1257 * wire requests for this structure. The struct will not go away as long
1258 * as this is non-zero (or s_refcnt is non-zero).
1259 *
1260 * The 's_cv_otw_count' is used in conjuntion with the 's_otw_call_count'
1261 * variable to let the renew thread when an outstanding otw request has
1262 * finished.
1263 *
1264 * 'zoneid' and 'zone_globals' are set at creation of this structure
1265 * and are read-only after that; no lock is required to read them.
1266 *
1267 * s_lock protects: everything except cv_thread_exit and s_recovlock.
1268 *
1269 * s_program is used as the index into the nfs4_callback_globals's
1270 * nfs4prog2server table. When a callback request comes in, we can
1271 * use that request's program number (minus NFS4_CALLBACK) as an index
1272 * into the nfs4prog2server. That entry will hold the nfs4_server_t ptr.
1273 * We can then access that nfs4_server_t and its 's_deleg_list' (its list of
1274 * delegated rnode4_ts).
1275 *
1276 * Lock order:
1277 * nfs4_server::s_lock > mntinfo4::mi_lock
1278 * nfs_rtable4_lock > s_lock
1279 * nfs4_server_lst_lock > s_lock
1280 * s_recovlock > s_lock
1281 */
1282 struct nfs4_callback_globals;
1283
1284 typedef struct nfs4_server {
1285 struct nfs4_server *forw;
1286 struct nfs4_server *back;
1287 struct netbuf saddr;
1288 uint_t s_flags; /* see below */
1289 uint_t s_refcnt;
1290 clientid4 clientid; /* what we get from server */
1291 nfs_client_id4 clidtosend; /* what we send to server */
1292 mntinfo4_t *mntinfo4_list;
1293 int lease_valid;
1294 time_t s_lease_time;
1295 time_t last_renewal_time;
1296 timespec_t propagation_delay;
1297 cred_t *s_cred;
1298 kcondvar_t cv_thread_exit;
1299 int s_thread_exit;
1300 int state_ref_count;
1301 int s_otw_call_count;
1302 kcondvar_t s_cv_otw_count;
1303 kcondvar_t s_clientid_pend;
1304 kmutex_t s_lock;
1305 list_t s_deleg_list;
1306 rpcprog_t s_program;
1307 nfs_rwlock_t s_recovlock;
1308 kcondvar_t wait_cb_null; /* used to wait for CB_NULL */
1309 zoneid_t zoneid; /* zone using this nfs4_server_t */
1310 struct nfs4_callback_globals *zone_globals; /* globals */
1311 } nfs4_server_t;
1312
1313 /* nfs4_server flags */
1314 #define N4S_CLIENTID_SET 1 /* server has our clientid */
1315 #define N4S_CLIENTID_PEND 0x2 /* server doesn't have clientid */
1316 #define N4S_CB_PINGED 0x4 /* server has sent us a CB_NULL */
1317 #define N4S_CB_WAITER 0x8 /* is/has wait{ing/ed} for cb_null */
1318 #define N4S_INSERTED 0x10 /* list has reference for server */
1319 #define N4S_BADOWNER_DEBUG 0x20 /* bad owner err msg per client */
1320
1321 #define N4S_CB_PAUSE_TIME 10000 /* Amount of time to pause (10ms) */
1322
1323 struct lease_time_arg {
1324 time_t lease_time;
1325 };
1326
1327 enum nfs4_delegreturn_policy {
1328 IMMEDIATE,
1329 FIRSTCLOSE,
1330 LASTCLOSE,
1331 INACTIVE
1332 };
1333
1334 /*
1335 * Operation hints for the recovery framework (mostly).
1336 *
1337 * EXCEPTIONS:
1338 * OH_ACCESS, OH_GETACL, OH_GETATTR, OH_LOOKUP, OH_READDIR
1339 * These hints exist to allow user visit/readdir a R4SRVSTUB dir.
1340 * (dir represents the root of a server fs that has not yet been
1341 * mounted at client)
1342 */
1343 typedef enum {
1344 OH_OTHER,
1345 OH_READ,
1346 OH_WRITE,
1347 OH_COMMIT,
1348 OH_VFH_RENAME,
1349 OH_MOUNT,
1350 OH_CLOSE,
1351 OH_LOCKU,
1352 OH_DELEGRETURN,
1353 OH_ACCESS,
1354 OH_GETACL,
1355 OH_GETATTR,
1356 OH_LOOKUP,
1357 OH_READDIR
1358 } nfs4_op_hint_t;
1359
1360 /*
1361 * This data structure is used to track ephemeral mounts for both
1362 * mirror mounts and referrals.
1363 *
1364 * Note that each nfs4_ephemeral can only have one other nfs4_ephemeral
1365 * pointing at it. So we don't need two backpointers to walk
1366 * back up the tree.
1367 *
1368 * An ephemeral tree is pointed to by an enclosing non-ephemeral
1369 * mntinfo4. The root is also pointed to by its ephemeral
1370 * mntinfo4. ne_child will get us back to it, while ne_prior
1371 * will get us back to the non-ephemeral mntinfo4. This is an
1372 * edge case we will need to be wary of when walking back up the
1373 * tree.
1374 *
1375 * The way we handle this edge case is to have ne_prior be NULL
1376 * for the root nfs4_ephemeral node.
1377 */
1378 typedef struct nfs4_ephemeral {
1379 mntinfo4_t *ne_mount; /* who encloses us */
1380 struct nfs4_ephemeral *ne_child; /* first child node */
1381 struct nfs4_ephemeral *ne_peer; /* next sibling */
1382 struct nfs4_ephemeral *ne_prior; /* who points at us */
1383 time_t ne_ref_time; /* time last referenced */
1384 uint_t ne_mount_to; /* timeout at */
1385 int ne_state; /* used to traverse */
1386 } nfs4_ephemeral_t;
1387
1388 /*
1389 * State for the node (set in ne_state):
1390 */
1391 #define NFS4_EPHEMERAL_OK 0x0
1392 #define NFS4_EPHEMERAL_VISIT_CHILD 0x1
1393 #define NFS4_EPHEMERAL_VISIT_SIBLING 0x2
1394 #define NFS4_EPHEMERAL_PROCESS_ME 0x4
1395 #define NFS4_EPHEMERAL_CHILD_ERROR 0x8
1396 #define NFS4_EPHEMERAL_PEER_ERROR 0x10
1397
1398 /*
1399 * These are the locks used in processing ephemeral data:
1400 *
1401 * mi->mi_lock
1402 *
1403 * net->net_tree_lock
1404 * This lock is used to gate all tree operations.
1405 * If it is held, then no other process may
1406 * traverse the tree. This allows us to not
1407 * throw a hold on each vfs_t in the tree.
1408 * Can be held for a "long" time.
1409 *
1410 * net->net_cnt_lock
1411 * Used to protect refcnt and status.
1412 * Must be held for a really short time.
1413 *
1414 * nfs4_ephemeral_thread_lock
1415 * Is only held to create the harvester for the zone.
1416 * There is no ordering imposed on it.
1417 * Held for a really short time.
1418 *
1419 * Some further detail on the interactions:
1420 *
1421 * net_tree_lock controls access to net_root. Access needs to first be
1422 * attempted in a non-blocking check.
1423 *
1424 * net_cnt_lock controls access to net_refcnt and net_status. It must only be
1425 * held for very short periods of time, unless the refcnt is 0 and the status
1426 * is INVALID.
1427 *
1428 * Before a caller can grab net_tree_lock, it must first grab net_cnt_lock
1429 * to bump the net_refcnt. It then releases it and does the action specific
1430 * algorithm to get the net_tree_lock. Once it has that, then it is okay to
1431 * grab the net_cnt_lock and change the status. The status can only be
1432 * changed if the caller has the net_tree_lock held as well.
1433 *
1434 * Note that the initial grab of net_cnt_lock must occur whilst
1435 * mi_lock is being held. This prevents stale data in that if the
1436 * ephemeral tree is non-NULL, then the harvester can not remove
1437 * the tree from the mntinfo node until it grabs that lock. I.e.,
1438 * we get the pointer to the tree and hold the lock atomically
1439 * with respect to being in mi_lock.
1440 *
1441 * When a caller is done with net_tree_lock, it can decrement the net_refcnt
1442 * either before it releases net_tree_lock or after.
1443 *
1444 * In either event, to decrement net_refcnt, it must hold net_cnt_lock.
1445 *
1446 * Note that the overall locking scheme for the nodes is to control access
1447 * via the tree. The current scheme could easily be extended such that
1448 * the enclosing root referenced a "forest" of trees. The underlying trees
1449 * would be autonomous with respect to locks.
1450 *
1451 * Note that net_next is controlled by external locks
1452 * particular to the data structure that the tree is being added to.
1453 */
1454 typedef struct nfs4_ephemeral_tree {
1455 mntinfo4_t *net_mount;
1456 nfs4_ephemeral_t *net_root;
1457 struct nfs4_ephemeral_tree *net_next;
1458 kmutex_t net_tree_lock;
1459 kmutex_t net_cnt_lock;
1460 uint_t net_status;
1461 uint_t net_refcnt;
1462 } nfs4_ephemeral_tree_t;
1463
1464 /*
1465 * State for the tree (set in net_status):
1466 */
1467 #define NFS4_EPHEMERAL_TREE_OK 0x0
1468 #define NFS4_EPHEMERAL_TREE_BUILDING 0x1
1469 #define NFS4_EPHEMERAL_TREE_DEROOTING 0x2
1470 #define NFS4_EPHEMERAL_TREE_INVALID 0x4
1471 #define NFS4_EPHEMERAL_TREE_MOUNTING 0x8
1472 #define NFS4_EPHEMERAL_TREE_UMOUNTING 0x10
1473 #define NFS4_EPHEMERAL_TREE_LOCKED 0x20
1474
1475 #define NFS4_EPHEMERAL_TREE_PROCESSING (NFS4_EPHEMERAL_TREE_DEROOTING | \
1476 NFS4_EPHEMERAL_TREE_INVALID | NFS4_EPHEMERAL_TREE_UMOUNTING | \
1477 NFS4_EPHEMERAL_TREE_LOCKED)
1478
1479 /*
1480 * This macro evaluates to non-zero if the given op releases state at the
1481 * server.
1482 */
1483 #define OH_IS_STATE_RELE(op) ((op) == OH_CLOSE || (op) == OH_LOCKU || \
1484 (op) == OH_DELEGRETURN)
1485
1486 #ifdef _KERNEL
1487
1488 extern void nfs4_async_manager(struct vfs *);
1489 extern void nfs4_async_manager_stop(struct vfs *);
1490 extern void nfs4_async_stop(struct vfs *);
1491 extern int nfs4_async_stop_sig(struct vfs *);
1492 extern int nfs4_async_readahead(vnode_t *, u_offset_t, caddr_t,
1493 struct seg *, cred_t *,
1494 void (*)(vnode_t *, u_offset_t,
1495 caddr_t, struct seg *, cred_t *));
1496 extern int nfs4_async_putapage(vnode_t *, page_t *, u_offset_t, size_t,
1497 int, cred_t *, int (*)(vnode_t *, page_t *,
1498 u_offset_t, size_t, int, cred_t *));
1499 extern int nfs4_async_pageio(vnode_t *, page_t *, u_offset_t, size_t,
1500 int, cred_t *, int (*)(vnode_t *, page_t *,
1501 u_offset_t, size_t, int, cred_t *));
1502 extern void nfs4_async_commit(vnode_t *, page_t *, offset3, count3,
1503 cred_t *, void (*)(vnode_t *, page_t *,
1504 offset3, count3, cred_t *));
1505 extern void nfs4_async_inactive(vnode_t *, cred_t *);
1506 extern void nfs4_inactive_thread(mntinfo4_t *mi);
1507 extern void nfs4_inactive_otw(vnode_t *, cred_t *);
1508 extern int nfs4_putpages(vnode_t *, u_offset_t, size_t, int, cred_t *);
1509
1510 extern int nfs4_setopts(vnode_t *, model_t, struct nfs_args *);
1511 extern void nfs4_mnt_kstat_init(struct vfs *);
1512
1513 extern void rfs4call(struct mntinfo4 *, struct COMPOUND4args_clnt *,
1514 struct COMPOUND4res_clnt *, cred_t *, int *, int,
1515 nfs4_error_t *);
1516 extern void nfs4_acl_fill_cache(struct rnode4 *, vsecattr_t *);
1517 extern int nfs4_attr_otw(vnode_t *, nfs4_tag_type_t,
1518 nfs4_ga_res_t *, bitmap4, cred_t *);
1519
1520 extern void nfs4_attrcache_noinval(vnode_t *, nfs4_ga_res_t *, hrtime_t);
1521 extern void nfs4_attr_cache(vnode_t *, nfs4_ga_res_t *,
1522 hrtime_t, cred_t *, int,
1523 change_info4 *);
1524 extern void nfs4_purge_rddir_cache(vnode_t *);
1525 extern void nfs4_invalidate_pages(vnode_t *, u_offset_t, cred_t *);
1526 extern void nfs4_purge_caches(vnode_t *, int, cred_t *, int);
1527 extern void nfs4_purge_stale_fh(int, vnode_t *, cred_t *);
1528 extern void nfs4_flush_pages(vnode_t *vp, cred_t *cr);
1529
1530 extern void nfs4rename_update(vnode_t *, vnode_t *, nfs_fh4 *, char *);
1531 extern void nfs4_update_paths(vnode_t *, char *, vnode_t *, char *,
1532 vnode_t *);
1533
1534 extern void nfs4args_lookup_free(nfs_argop4 *, int);
1535 extern void nfs4args_copen_free(OPEN4cargs *);
1536
1537 extern void nfs4_printfhandle(nfs4_fhandle_t *);
1538
1539 extern void nfs_free_mi4(mntinfo4_t *);
1540 extern void sv4_free(servinfo4_t *);
1541 extern void nfs4_mi_zonelist_add(mntinfo4_t *);
1542 extern int nfs4_mi_zonelist_remove(mntinfo4_t *);
1543 extern int nfs4_secinfo_recov(mntinfo4_t *, vnode_t *, vnode_t *);
1544 extern void nfs4_secinfo_init(void);
1545 extern void nfs4_secinfo_fini(void);
1546 extern int nfs4_secinfo_path(mntinfo4_t *, cred_t *, int);
1547 extern int nfs4_secinfo_vnode_otw(vnode_t *, char *, cred_t *);
1548 extern void secinfo_free(sv_secinfo_t *);
1549 extern void save_mnt_secinfo(servinfo4_t *);
1550 extern void check_mnt_secinfo(servinfo4_t *, vnode_t *);
1551 extern int vattr_to_fattr4(vattr_t *, vsecattr_t *, fattr4 *, int,
1552 enum nfs_opnum4, bitmap4 supp_mask);
1553 extern int nfs4_putapage(vnode_t *, page_t *, u_offset_t *, size_t *,
1554 int, cred_t *);
1555 extern void nfs4_write_error(vnode_t *, int, cred_t *);
1556 extern void nfs4_lockcompletion(vnode_t *, int);
1557 extern bool_t nfs4_map_lost_lock_conflict(vnode_t *);
1558 extern int vtodv(vnode_t *, vnode_t **, cred_t *, bool_t);
1559 extern int vtoname(vnode_t *, char *, ssize_t);
1560 extern void nfs4open_confirm(vnode_t *, seqid4*, stateid4 *, cred_t *,
1561 bool_t, bool_t *, nfs4_open_owner_t *, bool_t,
1562 nfs4_error_t *, int *);
1563 extern void nfs4_error_zinit(nfs4_error_t *);
1564 extern void nfs4_error_init(nfs4_error_t *, int);
1565 extern void nfs4_free_args(struct nfs_args *);
1566
1567 extern void mi_hold(mntinfo4_t *);
1568 extern void mi_rele(mntinfo4_t *);
1569
1570 extern vnode_t *find_referral_stubvp(vnode_t *, char *, cred_t *);
1571 extern int nfs4_setup_referral(vnode_t *, char *, vnode_t **, cred_t *);
1572
1573 extern sec_data_t *copy_sec_data(sec_data_t *);
1574 extern gss_clntdata_t *copy_sec_data_gss(gss_clntdata_t *);
1575
1576 #ifdef DEBUG
1577 extern int nfs4_consistent_type(vnode_t *);
1578 #endif
1579
1580 extern void nfs4_init_dot_entries(void);
1581 extern void nfs4_destroy_dot_entries(void);
1582 extern struct nfs4_callback_globals *nfs4_get_callback_globals(void);
1583
1584 extern struct nfs4_server nfs4_server_lst;
1585
1586 extern volatile clock_t nfs_write_error_interval;
1587
1588 #endif /* _KERNEL */
1589
1590 /*
1591 * Flags for nfs4getfh_otw.
1592 */
1593
1594 #define NFS4_GETFH_PUBLIC 0x01
1595 #define NFS4_GETFH_NEEDSOP 0x02
1596
1597 /*
1598 * Found through rnodes.
1599 *
1600 * The os_open_ref_count keeps track the number of open file descriptor
1601 * references on this data structure. It will be bumped for any successful
1602 * OTW OPEN call and any OPEN call that determines the OTW call is not
1603 * necessary and the open stream hasn't just been created (see
1604 * nfs4_is_otw_open_necessary).
1605 *
1606 * os_mapcnt is a count of the number of mmapped pages for a particular
1607 * open stream; this in conjunction w/ os_open_ref_count is used to
1608 * determine when to do a close to the server. This is necessary because
1609 * of the semantics of doing open, mmap, close; the OTW close must be wait
1610 * until all open and mmap references have vanished.
1611 *
1612 * 'os_valid' tells us whether this structure is about to be freed or not,
1613 * if it is then don't return it in find_open_stream().
1614 *
1615 * 'os_final_close' is set when a CLOSE OTW was attempted. This is needed
1616 * so we can properly count the os_open_ref_count in cases where we VOP_CLOSE
1617 * without a VOP_OPEN, and have nfs4_inactive() drive the OTW CLOSE. It
1618 * also helps differentiate the VOP_OPEN/VN_RELE case from the VOP_CLOSE
1619 * that tried to close OTW but failed, and left the state cleanup to
1620 * nfs4_inactive/CLOSE_FORCE.
1621 *
1622 * 'os_force_close' is used to let us know if an intervening thread came
1623 * and reopened the open stream after we decided to issue a CLOSE_FORCE,
1624 * but before we could actually process the CLOSE_FORCE.
1625 *
1626 * 'os_pending_close' is set when an over-the-wire CLOSE is deferred to the
1627 * lost state queue.
1628 *
1629 * 'open_stateid' is set to the last open stateid returned by the server unless
1630 * 'os_delegation' is 1, in which case 'open_stateid' refers to the
1631 * delegation stateid returned by the server. This is used in cases where the
1632 * client tries to OPEN a file but already has a suitable delegation, so we
1633 * just stick the delegation stateid in the open stream.
1634 *
1635 * os_dc_openacc are open access bits which have been granted to the
1636 * open stream by virtue of a delegation, but which have not been seen
1637 * by the server. This applies even if the open stream does not have
1638 * os_delegation set. These bits are used when setting file locks to
1639 * determine whether an open with CLAIM_DELEGATE_CUR needs to be done
1640 * before the lock request can be sent to the server. See
1641 * nfs4frlock_check_deleg().
1642 *
1643 * 'os_mmap_read/write' keep track of the read and write access our memory
1644 * maps require. We need to keep track of this so we can provide the proper
1645 * access bits in the open/mmap/close/reboot/reopen case.
1646 *
1647 * 'os_failed_reopen' tells us that we failed to successfully reopen this
1648 * open stream; therefore, we should not use this open stateid as it is
1649 * not valid anymore. This flag is also used to indicate an unsuccessful
1650 * attempt to reopen a delegation open stream with CLAIM_DELEGATE_CUR.
1651 *
1652 * If 'os_orig_oo_name' is different than os_open_owner's oo_name
1653 * then this tells us that this open stream's open owner used a
1654 * bad seqid (that is, got NFS4ERR_BAD_SEQID). If different, this open
1655 * stream will no longer be used for future OTW state releasing calls.
1656 *
1657 * Lock ordering:
1658 * rnode4_t::r_os_lock > os_sync_lock
1659 * os_sync_lock > rnode4_t::r_statelock
1660 * os_sync_lock > rnode4_t::r_statev4_lock
1661 * os_sync_lock > mntinfo4_t::mi_lock (via hold over rfs4call)
1662 *
1663 * The 'os_sync_lock' protects:
1664 * open_stateid
1665 * os_dc_openacc
1666 * os_delegation
1667 * os_failed_reopen
1668 * os_final_close
1669 * os_force_close
1670 * os_mapcnt
1671 * os_mmap_read
1672 * os_mmap_write
1673 * os_open_ref_count
1674 * os_pending_close
1675 * os_share_acc_read
1676 * os_share_acc_write
1677 * os_share_deny_none
1678 * os_share_deny_read
1679 * os_share_deny_write
1680 * os_ref_count
1681 * os_valid
1682 *
1683 * The rnode4_t::r_os_lock protects:
1684 * os_node
1685 *
1686 * These fields are set at creation time and
1687 * read only after that:
1688 * os_open_owner
1689 * os_orig_oo_name
1690 */
1691 typedef struct nfs4_open_stream {
1692 uint64_t os_share_acc_read;
1693 uint64_t os_share_acc_write;
1694 uint64_t os_mmap_read;
1695 uint64_t os_mmap_write;
1696 uint32_t os_share_deny_none;
1697 uint32_t os_share_deny_read;
1698 uint32_t os_share_deny_write;
1699 stateid4 open_stateid;
1700 int os_dc_openacc;
1701 int os_ref_count;
1702 unsigned os_valid:1;
1703 unsigned os_delegation:1;
1704 unsigned os_final_close:1;
1705 unsigned os_pending_close:1;
1706 unsigned os_failed_reopen:1;
1707 unsigned os_force_close:1;
1708 int os_open_ref_count;
1709 long os_mapcnt;
1710 list_node_t os_node;
1711 struct nfs4_open_owner *os_open_owner;
1712 uint64_t os_orig_oo_name;
1713 kmutex_t os_sync_lock;
1714 } nfs4_open_stream_t;
1715
1716 /*
1717 * This structure describes the format of the lock_owner_name
1718 * field of the lock owner.
1719 */
1720
1721 typedef struct nfs4_lo_name {
1722 uint64_t ln_seq_num;
1723 pid_t ln_pid;
1724 } nfs4_lo_name_t;
1725
1726 /*
1727 * Flags for lo_flags.
1728 */
1729 #define NFS4_LOCK_SEQID_INUSE 0x1
1730 #define NFS4_BAD_SEQID_LOCK 0x2
1731
1732 /*
1733 * The lo_prev_rnode and lo_next_rnode are for a circular list that hangs
1734 * off the rnode. If the links are NULL it means this object is not on the
1735 * list.
1736 *
1737 * 'lo_pending_rqsts' is non-zero if we ever tried to send a request and
1738 * didn't get a response back. This is used to figure out if we have
1739 * possible remote v4 locks, so that we can clean up at process exit. In
1740 * theory, the client should be able to figure out if the server received
1741 * the request (based on what seqid works), so maybe we can get rid of this
1742 * flag someday.
1743 *
1744 * 'lo_ref_count' tells us how many processes/threads are using this data
1745 * structure. The rnode's list accounts for one reference.
1746 *
1747 * 'lo_just_created' is set to NFS4_JUST_CREATED when we first create the
1748 * data structure. It is then set to NFS4_PERM_CREATED when a lock request
1749 * is successful using this lock owner structure. We need to keep 'temporary'
1750 * lock owners around so we can properly keep the lock seqid synchronization
1751 * when multiple processes/threads are trying to create the lock owner for the
1752 * first time (especially with the DENIED error case). Once
1753 * 'lo_just_created' is set to NFS4_PERM_CREATED, it doesn't change.
1754 *
1755 * 'lo_valid' tells us whether this structure is about to be freed or not,
1756 * if it is then don't return it from find_lock_owner().
1757 *
1758 * Retrieving and setting of 'lock_seqid' is protected by the
1759 * NFS4_LOCK_SEQID_INUSE flag. Waiters for NFS4_LOCK_SEQID_INUSE should
1760 * use 'lo_cv_seqid_sync'.
1761 *
1762 * The setting of 'lock_stateid' is protected by the
1763 * NFS4_LOCK_SEQID_INUSE flag and 'lo_lock'. The retrieving of the
1764 * 'lock_stateid' is protected by 'lo_lock', with the additional
1765 * requirement that the calling function can handle NFS4ERR_OLD_STATEID and
1766 * NFS4ERR_BAD_STATEID as appropiate.
1767 *
1768 * The setting of NFS4_BAD_SEQID_LOCK to lo_flags tells us whether this lock
1769 * owner used a bad seqid (that is, got NFS4ERR_BAD_SEQID). With this set,
1770 * this lock owner will no longer be used for future OTW calls. Once set,
1771 * it is never unset.
1772 *
1773 * Lock ordering:
1774 * rnode4_t::r_statev4_lock > lo_lock
1775 */
1776 typedef struct nfs4_lock_owner {
1777 struct nfs4_lock_owner *lo_next_rnode;
1778 struct nfs4_lock_owner *lo_prev_rnode;
1779 int lo_pid;
1780 stateid4 lock_stateid;
1781 seqid4 lock_seqid;
1782 /*
1783 * Fix this to always be 12 bytes
1784 */
1785 nfs4_lo_name_t lock_owner_name;
1786 int lo_ref_count;
1787 int lo_valid;
1788 int lo_pending_rqsts;
1789 int lo_just_created;
1790 int lo_flags;
1791 kcondvar_t lo_cv_seqid_sync;
1792 kmutex_t lo_lock;
1793 kthread_t *lo_seqid_holder; /* debugging aid */
1794 } nfs4_lock_owner_t;
1795
1796 /* for nfs4_lock_owner_t lookups */
1797 typedef enum {LOWN_ANY, LOWN_VALID_STATEID} lown_which_t;
1798
1799 /* Number of times to retry a call that fails with state independent error */
1800 #define NFS4_NUM_RECOV_RETRIES 3
1801
1802 typedef enum {
1803 NO_SID,
1804 DEL_SID,
1805 LOCK_SID,
1806 OPEN_SID,
1807 SPEC_SID
1808 } nfs4_stateid_type_t;
1809
1810 typedef struct nfs4_stateid_types {
1811 stateid4 d_sid;
1812 stateid4 l_sid;
1813 stateid4 o_sid;
1814 nfs4_stateid_type_t cur_sid_type;
1815 } nfs4_stateid_types_t;
1816
1817 /*
1818 * Per-zone data for dealing with callbacks. Included here solely for the
1819 * benefit of MDB.
1820 */
1821 struct nfs4_callback_stats {
1822 kstat_named_t delegations;
1823 kstat_named_t cb_getattr;
1824 kstat_named_t cb_recall;
1825 kstat_named_t cb_null;
1826 kstat_named_t cb_dispatch;
1827 kstat_named_t delegaccept_r;
1828 kstat_named_t delegaccept_rw;
1829 kstat_named_t delegreturn;
1830 kstat_named_t callbacks;
1831 kstat_named_t claim_cur;
1832 kstat_named_t claim_cur_ok;
1833 kstat_named_t recall_trunc;
1834 kstat_named_t recall_failed;
1835 kstat_named_t return_limit_write;
1836 kstat_named_t return_limit_addmap;
1837 kstat_named_t deleg_recover;
1838 kstat_named_t cb_illegal;
1839 };
1840
1841 struct nfs4_callback_globals {
1842 kmutex_t nfs4_cb_lock;
1843 kmutex_t nfs4_dlist_lock;
1844 int nfs4_program_hint;
1845 /* this table maps the program number to the nfs4_server structure */
1846 struct nfs4_server **nfs4prog2server;
1847 list_t nfs4_dlist;
1848 list_t nfs4_cb_ports;
1849 struct nfs4_callback_stats nfs4_callback_stats;
1850 #ifdef DEBUG
1851 int nfs4_dlistadd_c;
1852 int nfs4_dlistclean_c;
1853 #endif
1854 };
1855
1856 typedef enum {
1857 CLOSE_NORM,
1858 CLOSE_DELMAP,
1859 CLOSE_FORCE,
1860 CLOSE_RESEND,
1861 CLOSE_AFTER_RESEND
1862 } nfs4_close_type_t;
1863
1864 /*
1865 * Structure to hold the bad seqid information that is passed
1866 * to the recovery framework.
1867 */
1868 typedef struct nfs4_bseqid_entry {
1869 nfs4_open_owner_t *bs_oop;
1870 nfs4_lock_owner_t *bs_lop;
1871 vnode_t *bs_vp;
1872 pid_t bs_pid;
1873 nfs4_tag_type_t bs_tag;
1874 seqid4 bs_seqid;
1875 list_node_t bs_node;
1876 } nfs4_bseqid_entry_t;
1877
1878 #ifdef _KERNEL
1879
1880 extern void nfs4close_one(vnode_t *, nfs4_open_stream_t *, cred_t *, int,
1881 nfs4_lost_rqst_t *, nfs4_error_t *, nfs4_close_type_t,
1882 size_t, uint_t, uint_t);
1883 extern void nfs4close_notw(vnode_t *, nfs4_open_stream_t *, int *);
1884 extern void nfs4_set_lock_stateid(nfs4_lock_owner_t *, stateid4);
1885 extern void open_owner_hold(nfs4_open_owner_t *);
1886 extern void open_owner_rele(nfs4_open_owner_t *);
1887 extern nfs4_open_stream_t *find_or_create_open_stream(nfs4_open_owner_t *,
1888 struct rnode4 *, int *);
1889 extern nfs4_open_stream_t *find_open_stream(nfs4_open_owner_t *,
1890 struct rnode4 *);
1891 extern nfs4_open_stream_t *create_open_stream(nfs4_open_owner_t *oop,
1892 struct rnode4 *rp);
1893 extern void open_stream_hold(nfs4_open_stream_t *);
1894 extern void open_stream_rele(nfs4_open_stream_t *, struct rnode4 *);
1895 extern int nfs4close_all(vnode_t *, cred_t *);
1896 extern void lock_owner_hold(nfs4_lock_owner_t *);
1897 extern void lock_owner_rele(nfs4_lock_owner_t *);
1898 extern nfs4_lock_owner_t *create_lock_owner(struct rnode4 *, pid_t);
1899 extern nfs4_lock_owner_t *find_lock_owner(struct rnode4 *, pid_t, lown_which_t);
1900 extern void nfs4_rnode_remove_lock_owner(struct rnode4 *,
1901 nfs4_lock_owner_t *);
1902 extern void nfs4_flush_lock_owners(struct rnode4 *);
1903 extern void nfs4_setlockowner_args(lock_owner4 *, struct rnode4 *, pid_t);
1904 extern void nfs4_set_open_seqid(seqid4, nfs4_open_owner_t *,
1905 nfs4_tag_type_t);
1906 extern void nfs4_set_lock_seqid(seqid4, nfs4_lock_owner_t *);
1907 extern void nfs4_get_and_set_next_open_seqid(nfs4_open_owner_t *,
1908 nfs4_tag_type_t);
1909 extern void nfs4_end_open_seqid_sync(nfs4_open_owner_t *);
1910 extern int nfs4_start_open_seqid_sync(nfs4_open_owner_t *, mntinfo4_t *);
1911 extern void nfs4_end_lock_seqid_sync(nfs4_lock_owner_t *);
1912 extern int nfs4_start_lock_seqid_sync(nfs4_lock_owner_t *, mntinfo4_t *);
1913 extern void nfs4_setup_lock_args(nfs4_lock_owner_t *, nfs4_open_owner_t *,
1914 nfs4_open_stream_t *, clientid4, locker4 *);
1915 extern void nfs4_destroy_open_owner(nfs4_open_owner_t *);
1916
1917 extern void nfs4_renew_lease_thread(nfs4_server_t *);
1918 extern nfs4_server_t *find_nfs4_server(mntinfo4_t *);
1919 extern nfs4_server_t *find_nfs4_server_all(mntinfo4_t *, int all);
1920 extern nfs4_server_t *new_nfs4_server(servinfo4_t *, cred_t *);
1921 extern void nfs4_mark_srv_dead(nfs4_server_t *);
1922 extern nfs4_server_t *servinfo4_to_nfs4_server(servinfo4_t *);
1923 extern void nfs4_inc_state_ref_count(mntinfo4_t *);
1924 extern void nfs4_inc_state_ref_count_nolock(nfs4_server_t *,
1925 mntinfo4_t *);
1926 extern void nfs4_dec_state_ref_count(mntinfo4_t *);
1927 extern void nfs4_dec_state_ref_count_nolock(nfs4_server_t *,
1928 mntinfo4_t *);
1929 extern clientid4 mi2clientid(mntinfo4_t *);
1930 extern int nfs4_server_in_recovery(nfs4_server_t *);
1931 extern bool_t nfs4_server_vlock(nfs4_server_t *, int);
1932 extern nfs4_open_owner_t *create_open_owner(cred_t *, mntinfo4_t *);
1933 extern uint64_t nfs4_get_new_oo_name(void);
1934 extern nfs4_open_owner_t *find_open_owner(cred_t *, int, mntinfo4_t *);
1935 extern nfs4_open_owner_t *find_open_owner_nolock(cred_t *, int, mntinfo4_t *);
1936 extern void nfs4frlock(nfs4_lock_call_type_t, vnode_t *, int, flock64_t *,
1937 int, u_offset_t, cred_t *, nfs4_error_t *,
1938 nfs4_lost_rqst_t *, int *);
1939 extern void nfs4open_dg_save_lost_rqst(int, nfs4_lost_rqst_t *,
1940 nfs4_open_owner_t *, nfs4_open_stream_t *, cred_t *,
1941 vnode_t *, int, int);
1942 extern void nfs4_open_downgrade(int, int, nfs4_open_owner_t *,
1943 nfs4_open_stream_t *, vnode_t *, cred_t *,
1944 nfs4_lost_rqst_t *, nfs4_error_t *, cred_t **, seqid4 *);
1945 extern seqid4 nfs4_get_open_seqid(nfs4_open_owner_t *);
1946 extern cred_t *nfs4_get_otw_cred(cred_t *, mntinfo4_t *, nfs4_open_owner_t *);
1947 extern void nfs4_init_stateid_types(nfs4_stateid_types_t *);
1948 extern void nfs4_save_stateid(stateid4 *, nfs4_stateid_types_t *);
1949
1950 extern kmutex_t nfs4_server_lst_lock;
1951
1952 extern void nfs4callback_destroy(nfs4_server_t *);
1953 extern void nfs4_callback_init(void);
1954 extern void nfs4_callback_fini(void);
1955 extern void nfs4_cb_args(nfs4_server_t *, struct knetconfig *,
1956 SETCLIENTID4args *);
1957 extern void nfs4delegreturn_async(struct rnode4 *, int, bool_t);
1958
1959 extern enum nfs4_delegreturn_policy nfs4_delegreturn_policy;
1960
1961 extern void nfs4_add_mi_to_server(nfs4_server_t *, mntinfo4_t *);
1962 extern void nfs4_remove_mi_from_server(mntinfo4_t *, nfs4_server_t *);
1963 extern nfs4_server_t *nfs4_move_mi(mntinfo4_t *, servinfo4_t *, servinfo4_t *);
1964 extern bool_t nfs4_fs_active(nfs4_server_t *);
1965 extern void nfs4_server_rele(nfs4_server_t *);
1966 extern bool_t inlease(nfs4_server_t *);
1967 extern bool_t nfs4_has_pages(vnode_t *);
1968 extern void nfs4_log_badowner(mntinfo4_t *, nfs_opnum4);
1969
1970 #endif /* _KERNEL */
1971
1972 /*
1973 * Client State Recovery
1974 */
1975
1976 /*
1977 * The following defines are used for rs_flags in
1978 * a nfs4_recov_state_t structure.
1979 *
1980 * NFS4_RS_RENAME_HELD Indicates that the mi_rename_lock was held.
1981 * NFS4_RS_GRACE_MSG Set once we have uprintf'ed a grace message.
1982 * NFS4_RS_DELAY_MSG Set once we have uprintf'ed a delay message.
1983 * NFS4_RS_RECALL_HELD1 r_deleg_recall_lock for vp1 was held.
1984 * NFS4_RS_RECALL_HELD2 r_deleg_recall_lock for vp2 was held.
1985 */
1986 #define NFS4_RS_RENAME_HELD 0x000000001
1987 #define NFS4_RS_GRACE_MSG 0x000000002
1988 #define NFS4_RS_DELAY_MSG 0x000000004
1989 #define NFS4_RS_RECALL_HELD1 0x000000008
1990 #define NFS4_RS_RECALL_HELD2 0x000000010
1991
1992 /*
1993 * Information that is retrieved from nfs4_start_op() and that is
1994 * passed into nfs4_end_op().
1995 *
1996 * rs_sp is a reference to the nfs4_server that was found, or NULL.
1997 *
1998 * rs_num_retry_despite_err is the number times client retried an
1999 * OTW op despite a recovery error. It is only incremented for hints
2000 * exempt to normal R4RECOVERR processing
2001 * (OH_CLOSE/OH_LOCKU/OH_DELEGRETURN). (XXX this special-case code
2002 * needs review for possible removal.)
2003 * It is initialized wherever nfs4_recov_state_t is declared -- usually
2004 * very near initialization of rs_flags.
2005 */
2006 typedef struct {
2007 nfs4_server_t *rs_sp;
2008 int rs_flags;
2009 int rs_num_retry_despite_err;
2010 } nfs4_recov_state_t;
2011
2012 /*
2013 * Flags for nfs4_check_remap, nfs4_remap_file and nfs4_remap_root.
2014 */
2015
2016 #define NFS4_REMAP_CKATTRS 1
2017 #define NFS4_REMAP_NEEDSOP 2
2018
2019 #ifdef _KERNEL
2020
2021 extern int nfs4_is_otw_open_necessary(nfs4_open_owner_t *, int,
2022 vnode_t *, int, int *, int, nfs4_recov_state_t *);
2023 extern void nfs4setclientid(struct mntinfo4 *, struct cred *, bool_t,
2024 nfs4_error_t *);
2025 extern void nfs4_reopen(vnode_t *, nfs4_open_stream_t *, nfs4_error_t *,
2026 open_claim_type4, bool_t, bool_t);
2027 extern void nfs4_remap_root(struct mntinfo4 *, nfs4_error_t *, int);
2028 extern void nfs4_check_remap(mntinfo4_t *mi, vnode_t *vp, int,
2029 nfs4_error_t *);
2030 extern void nfs4_remap_file(mntinfo4_t *mi, vnode_t *vp, int,
2031 nfs4_error_t *);
2032 extern int nfs4_make_dotdot(struct nfs4_sharedfh *, hrtime_t,
2033 vnode_t *, cred_t *, vnode_t **, int);
2034 extern void nfs4_fail_recov(vnode_t *, char *, int, nfsstat4);
2035
2036 extern int nfs4_needs_recovery(nfs4_error_t *, bool_t, vfs_t *);
2037 extern int nfs4_recov_marks_dead(nfsstat4);
2038 extern bool_t nfs4_start_recovery(nfs4_error_t *, struct mntinfo4 *,
2039 vnode_t *, vnode_t *, stateid4 *,
2040 nfs4_lost_rqst_t *, nfs_opnum4, nfs4_bseqid_entry_t *,
2041 vnode_t *, char *);
2042 extern int nfs4_start_op(struct mntinfo4 *, vnode_t *, vnode_t *,
2043 nfs4_recov_state_t *);
2044 extern void nfs4_end_op(struct mntinfo4 *, vnode_t *, vnode_t *,
2045 nfs4_recov_state_t *, bool_t);
2046 extern int nfs4_start_fop(struct mntinfo4 *, vnode_t *, vnode_t *,
2047 nfs4_op_hint_t, nfs4_recov_state_t *, bool_t *);
2048 extern void nfs4_end_fop(struct mntinfo4 *, vnode_t *, vnode_t *,
2049 nfs4_op_hint_t, nfs4_recov_state_t *, bool_t);
2050 extern char *nfs4_recov_action_to_str(nfs4_recov_t);
2051
2052 /*
2053 * In sequence, code desiring to unmount an ephemeral tree must
2054 * call nfs4_ephemeral_umount, nfs4_ephemeral_umount_activate,
2055 * and nfs4_ephemeral_umount_unlock. The _unlock must also be
2056 * called on all error paths that occur before it would naturally
2057 * be invoked.
2058 *
2059 * The caller must also provde a pointer to a boolean to keep track
2060 * of whether or not the code in _unlock is to be ran.
2061 */
2062 extern void nfs4_ephemeral_umount_activate(mntinfo4_t *,
2063 bool_t *, nfs4_ephemeral_tree_t **);
2064 extern int nfs4_ephemeral_umount(mntinfo4_t *, int, cred_t *,
2065 bool_t *, nfs4_ephemeral_tree_t **);
2066 extern void nfs4_ephemeral_umount_unlock(bool_t *,
2067 nfs4_ephemeral_tree_t **);
2068
2069 extern int nfs4_record_ephemeral_mount(mntinfo4_t *mi, vnode_t *mvp);
2070
2071 extern int nfs4_callmapid(utf8string *, struct nfs_fsl_info *);
2072 extern int nfs4_fetch_locations(mntinfo4_t *, struct nfs4_sharedfh *,
2073 char *, cred_t *, nfs4_ga_res_t *, COMPOUND4res_clnt *, bool_t);
2074
2075 extern int wait_for_recall(vnode_t *, vnode_t *, nfs4_op_hint_t,
2076 nfs4_recov_state_t *);
2077 extern void nfs4_end_op_recall(vnode_t *, vnode_t *, nfs4_recov_state_t *);
2078 extern void nfs4_send_siglost(pid_t, mntinfo4_t *mi, vnode_t *vp, bool_t,
2079 int, nfsstat4);
2080 extern time_t nfs4err_delay_time;
2081 extern void nfs4_set_grace_wait(mntinfo4_t *);
2082 extern void nfs4_set_delay_wait(vnode_t *);
2083 extern int nfs4_wait_for_grace(mntinfo4_t *, nfs4_recov_state_t *);
2084 extern int nfs4_wait_for_delay(vnode_t *, nfs4_recov_state_t *);
2085 extern nfs4_bseqid_entry_t *nfs4_create_bseqid_entry(nfs4_open_owner_t *,
2086 nfs4_lock_owner_t *, vnode_t *, pid_t, nfs4_tag_type_t,
2087 seqid4);
2088
2089 extern void nfs4_resend_open_otw(vnode_t **, nfs4_lost_rqst_t *,
2090 nfs4_error_t *);
2091 extern void nfs4_resend_delegreturn(nfs4_lost_rqst_t *, nfs4_error_t *,
2092 nfs4_server_t *);
2093 extern int nfs4_rpc_retry_error(int);
2094 extern int nfs4_try_failover(nfs4_error_t *);
2095 extern void nfs4_free_msg(nfs4_debug_msg_t *);
2096 extern void nfs4_mnt_recov_kstat_init(vfs_t *);
2097 extern void nfs4_mi_kstat_inc_delay(mntinfo4_t *);
2098 extern void nfs4_mi_kstat_inc_no_grace(mntinfo4_t *);
2099 extern char *nfs4_stat_to_str(nfsstat4);
2100 extern char *nfs4_op_to_str(nfs_opnum4);
2101
2102 extern void nfs4_queue_event(nfs4_event_type_t, mntinfo4_t *, char *,
2103 uint_t, vnode_t *, vnode_t *, nfsstat4, char *, pid_t,
2104 nfs4_tag_type_t, nfs4_tag_type_t, seqid4, seqid4);
2105 extern void nfs4_queue_fact(nfs4_fact_type_t, mntinfo4_t *, nfsstat4,
2106 nfs4_recov_t, nfs_opnum4, bool_t, char *, int, vnode_t *);
2107 #pragma rarely_called(nfs4_queue_event)
2108 #pragma rarely_called(nfs4_queue_fact)
2109
2110 /* Used for preformed "." and ".." dirents */
2111 extern char *nfs4_dot_entries;
2112 extern char *nfs4_dot_dot_entry;
2113
2114 #ifdef DEBUG
2115 extern uint_t nfs4_tsd_key;
2116 #endif
2117
2118 #endif /* _KERNEL */
2119
2120 /*
2121 * Filehandle management.
2122 *
2123 * Filehandles can change in v4, so rather than storing the filehandle
2124 * directly in the rnode, etc., we manage the filehandle through one of
2125 * these objects.
2126 * Locking: sfh_fh and sfh_tree is protected by the filesystem's
2127 * mi_fh_lock. The reference count and flags are protected by sfh_lock.
2128 * sfh_mi is read-only.
2129 *
2130 * mntinfo4_t::mi_fh_lock > sfh_lock.
2131 */
2132
2133 typedef struct nfs4_sharedfh {
2134 nfs_fh4 sfh_fh; /* key and current filehandle */
2135 kmutex_t sfh_lock;
2136 uint_t sfh_refcnt; /* reference count */
2137 uint_t sfh_flags;
2138 mntinfo4_t *sfh_mi; /* backptr to filesystem */
2139 avl_node_t sfh_tree; /* used by avl package */
2140 } nfs4_sharedfh_t;
2141
2142 #define SFH4_SAME(sfh1, sfh2) ((sfh1) == (sfh2))
2143
2144 /*
2145 * Flags.
2146 */
2147 #define SFH4_IN_TREE 0x1 /* currently in an AVL tree */
2148
2149 #ifdef _KERNEL
2150
2151 extern void sfh4_createtab(avl_tree_t *);
2152 extern nfs4_sharedfh_t *sfh4_get(const nfs_fh4 *, mntinfo4_t *);
2153 extern nfs4_sharedfh_t *sfh4_put(const nfs_fh4 *, mntinfo4_t *,
2154 nfs4_sharedfh_t *);
2155 extern void sfh4_update(nfs4_sharedfh_t *, const nfs_fh4 *);
2156 extern void sfh4_copyval(const nfs4_sharedfh_t *, nfs4_fhandle_t *);
2157 extern void sfh4_hold(nfs4_sharedfh_t *);
2158 extern void sfh4_rele(nfs4_sharedfh_t **);
2159 extern void sfh4_printfhandle(const nfs4_sharedfh_t *);
2160
2161 #endif
2162
2163 /*
2164 * Path and file name management.
2165 *
2166 * This type stores the name of an entry in the filesystem and keeps enough
2167 * information that it can provide a complete path. All fields are
2168 * protected by fn_lock, except for the reference count, which is managed
2169 * using atomic add/subtract.
2170 *
2171 * Additionally shared filehandle for this fname is stored.
2172 * Normally, fn_get() when it creates this fname stores the passed in
2173 * shared fh in fn_sfh by doing sfh_hold. Similarly the path which
2174 * destroys this fname releases the reference on this fh by doing sfh_rele.
2175 *
2176 * fn_get uses the fn_sfh to refine the comparision in cases
2177 * where we have matched the name but have differing file handles,
2178 * this normally happens due to
2179 *
2180 * 1. Server side rename of a file/directory.
2181 * 2. Another client renaming a file/directory on the server.
2182 *
2183 * Differing names but same filehandle is possible as in the case of hardlinks,
2184 * but differing filehandles with same name component will later confuse
2185 * the client and can cause various panics.
2186 *
2187 * Lock order: child and then parent.
2188 */
2189
2190 typedef struct nfs4_fname {
2191 struct nfs4_fname *fn_parent; /* parent name; null if fs root */
2192 char *fn_name; /* the actual name */
2193 ssize_t fn_len; /* strlen(fn_name) */
2194 uint32_t fn_refcnt; /* reference count */
2195 kmutex_t fn_lock;
2196 avl_node_t fn_tree;
2197 avl_tree_t fn_children; /* children, if any */
2198 nfs4_sharedfh_t *fn_sfh; /* The fh for this fname */
2199 } nfs4_fname_t;
2200
2201 #ifdef _KERNEL
2202
2203 extern vnode_t nfs4_xattr_notsupp_vnode;
2204 #define NFS4_XATTR_DIR_NOTSUPP &nfs4_xattr_notsupp_vnode
2205
2206 extern nfs4_fname_t *fn_get(nfs4_fname_t *, char *, nfs4_sharedfh_t *);
2207 extern void fn_hold(nfs4_fname_t *);
2208 extern void fn_rele(nfs4_fname_t **);
2209 extern char *fn_name(nfs4_fname_t *);
2210 extern char *fn_path(nfs4_fname_t *);
2211 extern void fn_move(nfs4_fname_t *, nfs4_fname_t *, char *);
2212 extern nfs4_fname_t *fn_parent(nfs4_fname_t *);
2213
2214 /* Referral Support */
2215 extern int nfs4_process_referral(mntinfo4_t *, nfs4_sharedfh_t *, char *,
2216 cred_t *, nfs4_ga_res_t *, COMPOUND4res_clnt *, struct nfs_fsl_info *);
2217
2218 #endif
2219
2220 /*
2221 * Per-zone data for managing client handles, included in this file for the
2222 * benefit of MDB.
2223 */
2224 struct nfs4_clnt {
2225 struct chhead *nfscl_chtable4;
2226 kmutex_t nfscl_chtable4_lock;
2227 zoneid_t nfscl_zoneid;
2228 list_node_t nfscl_node;
2229 struct clstat4 nfscl_stat;
2230 };
2231
2232 #ifdef __cplusplus
2233 }
2234 #endif
2235
2236 #endif /* _NFS4_CLNT_H */