1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
  23  * Copyright 2013 Nexenta Systems, Inc.  All rights reserved.
  24  */
  25 
  26 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
  27 /*      All Rights Reserved   */
  28 
  29 /*
  30  * Portions of this source code were derived from Berkeley 4.3 BSD
  31  * under license from the Regents of the University of California.
  32  */
  33 
  34 #ifndef _NFS4_CLNT_H
  35 #define _NFS4_CLNT_H
  36 
  37 #include <sys/errno.h>
  38 #include <sys/types.h>
  39 #include <sys/kstat.h>
  40 #include <sys/time.h>
  41 #include <sys/flock.h>
  42 #include <vm/page.h>
  43 #include <nfs/nfs4_kprot.h>
  44 #include <nfs/nfs4.h>
  45 #include <nfs/rnode.h>
  46 #include <sys/avl.h>
  47 #include <sys/list.h>
  48 #include <rpc/auth.h>
  49 #include <sys/door.h>
  50 #include <sys/condvar_impl.h>
  51 #include <sys/zone.h>
  52 
  53 #ifdef  __cplusplus
  54 extern "C" {
  55 #endif
  56 
  57 #define NFS4_SIZE_OK(size)      ((size) <= MAXOFFSET_T)
  58 
  59 /* Four states of nfs4_server's lease_valid */
  60 #define NFS4_LEASE_INVALID              0
  61 #define NFS4_LEASE_VALID                1
  62 #define NFS4_LEASE_UNINITIALIZED        2
  63 #define NFS4_LEASE_NOT_STARTED          3
  64 
  65 /* flag to tell the renew thread it should exit */
  66 #define NFS4_THREAD_EXIT        1
  67 
  68 /* Default number of seconds to wait on GRACE and DELAY errors */
  69 #define NFS4ERR_DELAY_TIME      10
  70 
  71 /* Number of hash buckets for open owners for each nfs4_server */
  72 #define NFS4_NUM_OO_BUCKETS     53
  73 
  74 /* Number of freed open owners (per mntinfo4_t) to keep around */
  75 #define NFS4_NUM_FREED_OPEN_OWNERS      8
  76 
  77 /* Number of seconds to wait before retrying a SETCLIENTID(_CONFIRM) op */
  78 #define NFS4_RETRY_SCLID_DELAY  10
  79 
  80 /* Number of times we should retry a SETCLIENTID(_CONFIRM) op */
  81 #define NFS4_NUM_SCLID_RETRIES  3
  82 
  83 /* Number of times we should retry on open after getting NFS4ERR_BAD_SEQID */
  84 #define NFS4_NUM_RETRY_BAD_SEQID        3
  85 
  86 /*
  87  * Macro to wakeup sleeping async worker threads.
  88  */
  89 #define NFS4_WAKE_ASYNC_WORKER(work_cv) {                               \
  90         if (CV_HAS_WAITERS(&work_cv[NFS4_ASYNC_QUEUE]))             \
  91                 cv_signal(&work_cv[NFS4_ASYNC_QUEUE]);                      \
  92         else if (CV_HAS_WAITERS(&work_cv[NFS4_ASYNC_PGOPS_QUEUE]))  \
  93                 cv_signal(&work_cv[NFS4_ASYNC_PGOPS_QUEUE]);                \
  94 }
  95 
  96 #define NFS4_WAKEALL_ASYNC_WORKERS(work_cv) {                           \
  97                 cv_broadcast(&work_cv[NFS4_ASYNC_QUEUE]);           \
  98                 cv_broadcast(&work_cv[NFS4_ASYNC_PGOPS_QUEUE]);             \
  99 }
 100 
 101 /*
 102  * Is the attribute cache valid?  If client holds a delegation, then attrs
 103  * are by definition valid.  If not, then check to see if attrs have timed out.
 104  */
 105 #define ATTRCACHE4_VALID(vp) (VTOR4(vp)->r_deleg_type != OPEN_DELEGATE_NONE || \
 106         gethrtime() < VTOR4(vp)->r_time_attr_inval)
 107 
 108 /*
 109  * Flags to indicate whether to purge the DNLC for non-directory vnodes
 110  * in a call to nfs_purge_caches.
 111  */
 112 #define NFS4_NOPURGE_DNLC       0
 113 #define NFS4_PURGE_DNLC         1
 114 
 115 /*
 116  * Is cache valid?
 117  * Swap is always valid, if no attributes (attrtime == 0) or
 118  * if mtime matches cached mtime it is valid
 119  * NOTE: mtime is now a timestruc_t.
 120  * Caller should be holding the rnode r_statelock mutex.
 121  */
 122 #define CACHE4_VALID(rp, mtime, fsize)                          \
 123         ((RTOV4(rp)->v_flag & VISSWAP) == VISSWAP ||             \
 124         (((mtime).tv_sec == (rp)->r_attr.va_mtime.tv_sec &&  \
 125         (mtime).tv_nsec == (rp)->r_attr.va_mtime.tv_nsec) && \
 126         ((fsize) == (rp)->r_attr.va_size)))
 127 
 128 /*
 129  * Macro to detect forced unmount or a zone shutdown.
 130  */
 131 #define FS_OR_ZONE_GONE4(vfsp) \
 132         (((vfsp)->vfs_flag & VFS_UNMOUNTED) || \
 133         zone_status_get(curproc->p_zone) >= ZONE_IS_SHUTTING_DOWN)
 134 
 135 /*
 136  * Macro to help determine whether a request failed because the underlying
 137  * filesystem has been forcibly unmounted or because of zone shutdown.
 138  */
 139 #define NFS4_FRC_UNMT_ERR(err, vfsp) \
 140         ((err) == EIO && FS_OR_ZONE_GONE4((vfsp)))
 141 
 142 /*
 143  * Due to the way the address space callbacks are used to execute a delmap,
 144  * we must keep track of how many times the same thread has called
 145  * VOP_DELMAP()->nfs4_delmap().  This is done by having a list of
 146  * nfs4_delmapcall_t's associated with each rnode4_t.  This list is protected
 147  * by the rnode4_t's r_statelock.  The individual elements do not need to be
 148  * protected as they will only ever be created, modified and destroyed by
 149  * one thread (the call_id).
 150  * See nfs4_delmap() for further explanation.
 151  */
 152 typedef struct nfs4_delmapcall {
 153         kthread_t       *call_id;
 154         int             error;  /* error from delmap */
 155         list_node_t     call_node;
 156 } nfs4_delmapcall_t;
 157 
 158 /*
 159  * delmap address space callback args
 160  */
 161 typedef struct nfs4_delmap_args {
 162         vnode_t                 *vp;
 163         offset_t                off;
 164         caddr_t                 addr;
 165         size_t                  len;
 166         uint_t                  prot;
 167         uint_t                  maxprot;
 168         uint_t                  flags;
 169         cred_t                  *cr;
 170         nfs4_delmapcall_t       *caller; /* to retrieve errors from the cb */
 171 } nfs4_delmap_args_t;
 172 
 173 /*
 174  * client side statistics
 175  */
 176 /*
 177  * Per-zone counters
 178  */
 179 struct clstat4 {
 180         kstat_named_t   calls;                  /* client requests */
 181         kstat_named_t   badcalls;               /* rpc failures */
 182         kstat_named_t   referrals;              /* referrals */
 183         kstat_named_t   referlinks;             /* referrals as symlinks */
 184         kstat_named_t   clgets;                 /* client handle gets */
 185         kstat_named_t   cltoomany;              /* client handle cache misses */
 186 #ifdef DEBUG
 187         kstat_named_t   clalloc;                /* number of client handles */
 188         kstat_named_t   noresponse;             /* server not responding cnt */
 189         kstat_named_t   failover;               /* server failover count */
 190         kstat_named_t   remap;                  /* server remap count */
 191 #endif
 192 };
 193 
 194 #ifdef DEBUG
 195 /*
 196  * The following are statistics that describe the behavior of the system as a
 197  * whole and don't correspond to any particular zone.
 198  */
 199 struct clstat4_debug {
 200         kstat_named_t   nrnode;                 /* number of allocated rnodes */
 201         kstat_named_t   access;                 /* size of access cache */
 202         kstat_named_t   dirent;                 /* size of readdir cache */
 203         kstat_named_t   dirents;                /* size of readdir buf cache */
 204         kstat_named_t   reclaim;                /* number of reclaims */
 205         kstat_named_t   clreclaim;              /* number of cl reclaims */
 206         kstat_named_t   f_reclaim;              /* number of free reclaims */
 207         kstat_named_t   a_reclaim;              /* number of active reclaims */
 208         kstat_named_t   r_reclaim;              /* number of rnode reclaims */
 209         kstat_named_t   rpath;                  /* bytes used to store rpaths */
 210 };
 211 extern struct clstat4_debug clstat4_debug;
 212 
 213 #endif
 214 
 215 /*
 216  * The NFS specific async_reqs structure. iotype4 is grouped to support two
 217  * types of async thread pools, please read comments section of mntinfo4_t
 218  * definition for more information. Care should be taken while adding new
 219  * members to this group.
 220  */
 221 
 222 enum iotype4 {
 223         NFS4_PUTAPAGE,
 224         NFS4_PAGEIO,
 225         NFS4_COMMIT,
 226         NFS4_READ_AHEAD,
 227         NFS4_READDIR,
 228         NFS4_INACTIVE,
 229         NFS4_ASYNC_TYPES
 230 };
 231 #define NFS4_ASYNC_PGOPS_TYPES  (NFS4_COMMIT + 1)
 232 
 233 /*
 234  * NFS async requests queue type.
 235  */
 236 enum ioqtype4 {
 237         NFS4_ASYNC_QUEUE,
 238         NFS4_ASYNC_PGOPS_QUEUE,
 239         NFS4_MAX_ASYNC_QUEUES
 240 };
 241 
 242 /*
 243  * Number of NFS async threads operating exclusively on page op requests.
 244  */
 245 #define NUM_ASYNC_PGOPS_THREADS 0x2
 246 
 247 struct nfs4_async_read_req {
 248         void (*readahead)();            /* pointer to readahead function */
 249         u_offset_t blkoff;              /* offset in file */
 250         struct seg *seg;                /* segment to do i/o to */
 251         caddr_t addr;                   /* address to do i/o to */
 252 };
 253 
 254 struct nfs4_pageio_req {
 255         int (*pageio)();                /* pointer to pageio function */
 256         page_t *pp;                     /* page list */
 257         u_offset_t io_off;              /* offset in file */
 258         uint_t io_len;                  /* size of request */
 259         int flags;
 260 };
 261 
 262 struct nfs4_readdir_req {
 263         int (*readdir)();               /* pointer to readdir function */
 264         struct rddir4_cache *rdc;       /* pointer to cache entry to fill */
 265 };
 266 
 267 struct nfs4_commit_req {
 268         void (*commit)();               /* pointer to commit function */
 269         page_t *plist;                  /* page list */
 270         offset4 offset;                 /* starting offset */
 271         count4 count;                   /* size of range to be commited */
 272 };
 273 
 274 struct nfs4_async_reqs {
 275         struct nfs4_async_reqs *a_next; /* pointer to next arg struct */
 276 #ifdef DEBUG
 277         kthread_t *a_queuer;            /* thread id of queueing thread */
 278 #endif
 279         struct vnode *a_vp;             /* vnode pointer */
 280         struct cred *a_cred;            /* cred pointer */
 281         enum iotype4 a_io;              /* i/o type */
 282         union {
 283                 struct nfs4_async_read_req a_read_args;
 284                 struct nfs4_pageio_req a_pageio_args;
 285                 struct nfs4_readdir_req a_readdir_args;
 286                 struct nfs4_commit_req a_commit_args;
 287         } a_args;
 288 };
 289 
 290 #define a_nfs4_readahead a_args.a_read_args.readahead
 291 #define a_nfs4_blkoff a_args.a_read_args.blkoff
 292 #define a_nfs4_seg a_args.a_read_args.seg
 293 #define a_nfs4_addr a_args.a_read_args.addr
 294 
 295 #define a_nfs4_putapage a_args.a_pageio_args.pageio
 296 #define a_nfs4_pageio a_args.a_pageio_args.pageio
 297 #define a_nfs4_pp a_args.a_pageio_args.pp
 298 #define a_nfs4_off a_args.a_pageio_args.io_off
 299 #define a_nfs4_len a_args.a_pageio_args.io_len
 300 #define a_nfs4_flags a_args.a_pageio_args.flags
 301 
 302 #define a_nfs4_readdir a_args.a_readdir_args.readdir
 303 #define a_nfs4_rdc a_args.a_readdir_args.rdc
 304 
 305 #define a_nfs4_commit a_args.a_commit_args.commit
 306 #define a_nfs4_plist a_args.a_commit_args.plist
 307 #define a_nfs4_offset a_args.a_commit_args.offset
 308 #define a_nfs4_count a_args.a_commit_args.count
 309 
 310 /*
 311  * Security information
 312  */
 313 typedef struct sv_secinfo {
 314         uint_t          count;  /* how many sdata there are */
 315         uint_t          index;  /* which sdata[index] */
 316         struct sec_data *sdata;
 317 } sv_secinfo_t;
 318 
 319 /*
 320  * Hash bucket for the mi's open owner list (mi_oo_list).
 321  */
 322 typedef struct nfs4_oo_hash_bucket {
 323         list_t                  b_oo_hash_list;
 324         kmutex_t                b_lock;
 325 } nfs4_oo_hash_bucket_t;
 326 
 327 /*
 328  * Global array of ctags.
 329  */
 330 extern ctag_t nfs4_ctags[];
 331 
 332 typedef enum nfs4_tag_type {
 333         TAG_NONE,
 334         TAG_ACCESS,
 335         TAG_CLOSE,
 336         TAG_CLOSE_LOST,
 337         TAG_CLOSE_UNDO,
 338         TAG_COMMIT,
 339         TAG_DELEGRETURN,
 340         TAG_FSINFO,
 341         TAG_GET_SYMLINK,
 342         TAG_GETATTR,
 343         TAG_GETATTR_FSLOCATION,
 344         TAG_INACTIVE,
 345         TAG_LINK,
 346         TAG_LOCK,
 347         TAG_LOCK_RECLAIM,
 348         TAG_LOCK_RESEND,
 349         TAG_LOCK_REINSTATE,
 350         TAG_LOCK_UNKNOWN,
 351         TAG_LOCKT,
 352         TAG_LOCKU,
 353         TAG_LOCKU_RESEND,
 354         TAG_LOCKU_REINSTATE,
 355         TAG_LOOKUP,
 356         TAG_LOOKUP_PARENT,
 357         TAG_LOOKUP_VALID,
 358         TAG_LOOKUP_VPARENT,
 359         TAG_MKDIR,
 360         TAG_MKNOD,
 361         TAG_MOUNT,
 362         TAG_OPEN,
 363         TAG_OPEN_CONFIRM,
 364         TAG_OPEN_CONFIRM_LOST,
 365         TAG_OPEN_DG,
 366         TAG_OPEN_DG_LOST,
 367         TAG_OPEN_LOST,
 368         TAG_OPENATTR,
 369         TAG_PATHCONF,
 370         TAG_PUTROOTFH,
 371         TAG_READ,
 372         TAG_READAHEAD,
 373         TAG_READDIR,
 374         TAG_READLINK,
 375         TAG_RELOCK,
 376         TAG_REMAP_LOOKUP,
 377         TAG_REMAP_LOOKUP_AD,
 378         TAG_REMAP_LOOKUP_NA,
 379         TAG_REMAP_MOUNT,
 380         TAG_RMDIR,
 381         TAG_REMOVE,
 382         TAG_RENAME,
 383         TAG_RENAME_VFH,
 384         TAG_RENEW,
 385         TAG_REOPEN,
 386         TAG_REOPEN_LOST,
 387         TAG_SECINFO,
 388         TAG_SETATTR,
 389         TAG_SETCLIENTID,
 390         TAG_SETCLIENTID_CF,
 391         TAG_SYMLINK,
 392         TAG_WRITE
 393 } nfs4_tag_type_t;
 394 
 395 #define NFS4_TAG_INITIALIZER    {                               \
 396                 {TAG_NONE,              "",                     \
 397                         {0x20202020, 0x20202020, 0x20202020}},  \
 398                 {TAG_ACCESS,            "access",               \
 399                         {0x61636365, 0x73732020, 0x20202020}},  \
 400                 {TAG_CLOSE,             "close",                \
 401                         {0x636c6f73, 0x65202020, 0x20202020}},  \
 402                 {TAG_CLOSE_LOST,        "lost close",           \
 403                         {0x6c6f7374, 0x20636c6f, 0x73652020}},  \
 404                 {TAG_CLOSE_UNDO,        "undo close",           \
 405                         {0x756e646f, 0x20636c6f, 0x73652020}},  \
 406                 {TAG_COMMIT,            "commit",               \
 407                         {0x636f6d6d, 0x69742020, 0x20202020}},  \
 408                 {TAG_DELEGRETURN,       "delegreturn",          \
 409                         {0x64656c65, 0x67726574, 0x75726e20}},  \
 410                 {TAG_FSINFO,            "fsinfo",               \
 411                         {0x6673696e, 0x666f2020, 0x20202020}},  \
 412                 {TAG_GET_SYMLINK,       "get symlink text",     \
 413                         {0x67657420, 0x736c6e6b, 0x20747874}},  \
 414                 {TAG_GETATTR,           "getattr",              \
 415                         {0x67657461, 0x74747220, 0x20202020}},  \
 416                 {TAG_GETATTR_FSLOCATION, "getattr fslocation",  \
 417                         {0x67657461, 0x74747220, 0x66736c6f}},  \
 418                 {TAG_INACTIVE,          "inactive",             \
 419                         {0x696e6163, 0x74697665, 0x20202020}},  \
 420                 {TAG_LINK,              "link",                 \
 421                         {0x6c696e6b, 0x20202020, 0x20202020}},  \
 422                 {TAG_LOCK,              "lock",                 \
 423                         {0x6c6f636b, 0x20202020, 0x20202020}},  \
 424                 {TAG_LOCK_RECLAIM,      "reclaim lock",         \
 425                         {0x7265636c, 0x61696d20, 0x6c6f636b}},  \
 426                 {TAG_LOCK_RESEND,       "resend lock",          \
 427                         {0x72657365, 0x6e64206c, 0x6f636b20}},  \
 428                 {TAG_LOCK_REINSTATE,    "reinstate lock",       \
 429                         {0x7265696e, 0x7374206c, 0x6f636b20}},  \
 430                 {TAG_LOCK_UNKNOWN,      "unknown lock",         \
 431                         {0x756e6b6e, 0x6f776e20, 0x6c6f636b}},  \
 432                 {TAG_LOCKT,             "lock test",            \
 433                         {0x6c6f636b, 0x5f746573, 0x74202020}},  \
 434                 {TAG_LOCKU,             "unlock",               \
 435                         {0x756e6c6f, 0x636b2020, 0x20202020}},  \
 436                 {TAG_LOCKU_RESEND,      "resend locku",         \
 437                         {0x72657365, 0x6e64206c, 0x6f636b75}},  \
 438                 {TAG_LOCKU_REINSTATE,   "reinstate unlock",     \
 439                         {0x7265696e, 0x73742075, 0x6e6c636b}},  \
 440                 {TAG_LOOKUP,            "lookup",               \
 441                         {0x6c6f6f6b, 0x75702020, 0x20202020}},  \
 442                 {TAG_LOOKUP_PARENT,     "lookup parent",        \
 443                         {0x6c6f6f6b, 0x75702070, 0x6172656e}},  \
 444                 {TAG_LOOKUP_VALID,      "lookup valid",         \
 445                         {0x6c6f6f6b, 0x75702076, 0x616c6964}},  \
 446                 {TAG_LOOKUP_VPARENT,    "lookup valid parent",  \
 447                         {0x6c6f6f6b, 0x766c6420, 0x7061726e}},  \
 448                 {TAG_MKDIR,             "mkdir",                \
 449                         {0x6d6b6469, 0x72202020, 0x20202020}},  \
 450                 {TAG_MKNOD,             "mknod",                \
 451                         {0x6d6b6e6f, 0x64202020, 0x20202020}},  \
 452                 {TAG_MOUNT,             "mount",                \
 453                         {0x6d6f756e, 0x74202020, 0x20202020}},  \
 454                 {TAG_OPEN,              "open",                 \
 455                         {0x6f70656e, 0x20202020, 0x20202020}},  \
 456                 {TAG_OPEN_CONFIRM,      "open confirm",         \
 457                         {0x6f70656e, 0x5f636f6e, 0x6669726d}},  \
 458                 {TAG_OPEN_CONFIRM_LOST, "lost open confirm",    \
 459                         {0x6c6f7374, 0x206f7065, 0x6e5f636f}},  \
 460                 {TAG_OPEN_DG,           "open downgrade",       \
 461                         {0x6f70656e, 0x20646772, 0x61646520}},  \
 462                 {TAG_OPEN_DG_LOST,      "lost open downgrade",  \
 463                         {0x6c737420, 0x6f70656e, 0x20646772}},  \
 464                 {TAG_OPEN_LOST,         "lost open",            \
 465                         {0x6c6f7374, 0x206f7065, 0x6e202020}},  \
 466                 {TAG_OPENATTR,          "openattr",             \
 467                         {0x6f70656e, 0x61747472, 0x20202020}},  \
 468                 {TAG_PATHCONF,          "pathconf",             \
 469                         {0x70617468, 0x636f6e66, 0x20202020}},  \
 470                 {TAG_PUTROOTFH,         "putrootfh",            \
 471                         {0x70757472, 0x6f6f7466, 0x68202020}},  \
 472                 {TAG_READ,              "read",                 \
 473                         {0x72656164, 0x20202020, 0x20202020}},  \
 474                 {TAG_READAHEAD,         "readahead",            \
 475                         {0x72656164, 0x61686561, 0x64202020}},  \
 476                 {TAG_READDIR,           "readdir",              \
 477                         {0x72656164, 0x64697220, 0x20202020}},  \
 478                 {TAG_READLINK,          "readlink",             \
 479                         {0x72656164, 0x6c696e6b, 0x20202020}},  \
 480                 {TAG_RELOCK,            "relock",               \
 481                         {0x72656c6f, 0x636b2020, 0x20202020}},  \
 482                 {TAG_REMAP_LOOKUP,      "remap lookup",         \
 483                         {0x72656d61, 0x70206c6f, 0x6f6b7570}},  \
 484                 {TAG_REMAP_LOOKUP_AD,   "remap lookup attr dir",        \
 485                         {0x72656d70, 0x206c6b75, 0x70206164}},  \
 486                 {TAG_REMAP_LOOKUP_NA,   "remap lookup named attrs",     \
 487                         {0x72656d70, 0x206c6b75, 0x70206e61}},  \
 488                 {TAG_REMAP_MOUNT,       "remap mount",          \
 489                         {0x72656d61, 0x70206d6f, 0x756e7420}},  \
 490                 {TAG_RMDIR,             "rmdir",                \
 491                         {0x726d6469, 0x72202020, 0x20202020}},  \
 492                 {TAG_REMOVE,            "remove",               \
 493                         {0x72656d6f, 0x76652020, 0x20202020}},  \
 494                 {TAG_RENAME,            "rename",               \
 495                         {0x72656e61, 0x6d652020, 0x20202020}},  \
 496                 {TAG_RENAME_VFH,        "rename volatile fh",   \
 497                         {0x72656e61, 0x6d652028, 0x76666829}},  \
 498                 {TAG_RENEW,             "renew",                \
 499                         {0x72656e65, 0x77202020, 0x20202020}},  \
 500                 {TAG_REOPEN,            "reopen",               \
 501                         {0x72656f70, 0x656e2020, 0x20202020}},  \
 502                 {TAG_REOPEN_LOST,       "lost reopen",          \
 503                         {0x6c6f7374, 0x2072656f, 0x70656e20}},  \
 504                 {TAG_SECINFO,           "secinfo",              \
 505                         {0x73656369, 0x6e666f20, 0x20202020}},  \
 506                 {TAG_SETATTR,           "setattr",              \
 507                         {0x73657461, 0x74747220, 0x20202020}},  \
 508                 {TAG_SETCLIENTID,       "setclientid",          \
 509                         {0x73657463, 0x6c69656e, 0x74696420}},  \
 510                 {TAG_SETCLIENTID_CF,    "setclientid_confirm",  \
 511                         {0x73636c6e, 0x7469645f, 0x636f6e66}},  \
 512                 {TAG_SYMLINK,           "symlink",              \
 513                         {0x73796d6c, 0x696e6b20, 0x20202020}},  \
 514                 {TAG_WRITE,             "write",                \
 515                         {0x77726974, 0x65202020, 0x20202020}}   \
 516         }
 517 
 518 /*
 519  * These flags are for differentiating the search criterian for
 520  * find_open_owner().  The comparison is done with the open_owners's
 521  * 'oo_just_created' flag.
 522  */
 523 #define NFS4_PERM_CREATED       0x0
 524 #define NFS4_JUST_CREATED       0x1
 525 
 526 /*
 527  * Hashed by the cr_uid and cr_ruid of credential 'oo_cred'. 'oo_cred_otw'
 528  * is stored upon a successful OPEN.  This is needed when the user's effective
 529  * and real uid's don't match.  The 'oo_cred_otw' overrides the credential
 530  * passed down by VFS for async read/write, commit, lock, and close operations.
 531  *
 532  * The oo_ref_count keeps track the number of active references on this
 533  * data structure + number of nfs4_open_streams point to this structure.
 534  *
 535  * 'oo_valid' tells whether this stuct is about to be freed or not.
 536  *
 537  * 'oo_just_created' tells us whether this struct has just been created but
 538  * not been fully finalized (that is created upon an OPEN request and
 539  * finalized upon the OPEN success).
 540  *
 541  * The 'oo_seqid_inuse' is for the open seqid synchronization.  If a thread
 542  * is currently using the open owner and it's open_seqid, then it sets the
 543  * oo_seqid_inuse to true if it currently is not set.  If it is set then it
 544  * does a cv_wait on the oo_cv_seqid_sync condition variable.  When the thread
 545  * is done it unsets the oo_seqid_inuse and does a cv_signal to wake a process
 546  * waiting on the condition variable.
 547  *
 548  * 'oo_last_good_seqid' is the last valid seqid this open owner sent OTW,
 549  * and 'oo_last_good_op' is the operation that issued the last valid seqid.
 550  *
 551  * Lock ordering:
 552  *      mntinfo4_t::mi_lock > oo_lock (for searching mi_oo_list)
 553  *
 554  *      oo_seqid_inuse > mntinfo4_t::mi_lock
 555  *      oo_seqid_inuse > rnode4_t::r_statelock
 556  *      oo_seqid_inuse > rnode4_t::r_statev4_lock
 557  *      oo_seqid_inuse > nfs4_open_stream_t::os_sync_lock
 558  *
 559  * The 'oo_seqid_inuse'/'oo_cv_seqid_sync' protects:
 560  *      oo_last_good_op
 561  *      oo_last_good_seqid
 562  *      oo_name
 563  *      oo_seqid
 564  *
 565  * The 'oo_lock' protects:
 566  *      oo_cred
 567  *      oo_cred_otw
 568  *      oo_foo_node
 569  *      oo_hash_node
 570  *      oo_just_created
 571  *      oo_ref_count
 572  *      oo_valid
 573  */
 574 
 575 typedef struct nfs4_open_owner {
 576         cred_t                  *oo_cred;
 577         int                     oo_ref_count;
 578         int                     oo_valid;
 579         int                     oo_just_created;
 580         seqid4                  oo_seqid;
 581         seqid4                  oo_last_good_seqid;
 582         nfs4_tag_type_t         oo_last_good_op;
 583         unsigned                oo_seqid_inuse:1;
 584         cred_t                  *oo_cred_otw;
 585         kcondvar_t              oo_cv_seqid_sync;
 586         /*
 587          * Fix this to always be 8 bytes
 588          */
 589         uint64_t                oo_name;
 590         list_node_t             oo_hash_node;
 591         list_node_t             oo_foo_node;
 592         kmutex_t                oo_lock;
 593 } nfs4_open_owner_t;
 594 
 595 /*
 596  * Static server information.
 597  * These fields are read-only once they are initialized; sv_lock
 598  * should be held as writer if they are changed during mount:
 599  *      sv_addr
 600  *      sv_dhsec
 601  *      sv_hostname
 602  *      sv_hostnamelen
 603  *      sv_knconf
 604  *      sv_next
 605  *      sv_origknconf
 606  *
 607  * These fields are protected by sv_lock:
 608  *      sv_currsec
 609  *      sv_fhandle
 610  *      sv_flags
 611  *      sv_fsid
 612  *      sv_path
 613  *      sv_pathlen
 614  *      sv_pfhandle
 615  *      sv_save_secinfo
 616  *      sv_savesec
 617  *      sv_secdata
 618  *      sv_secinfo
 619  *      sv_supp_attrs
 620  *
 621  * Lock ordering:
 622  * nfs_rtable4_lock > sv_lock
 623  * rnode4_t::r_statelock > sv_lock
 624  */
 625 typedef struct servinfo4 {
 626         struct knetconfig *sv_knconf;   /* bound TLI fd */
 627         struct knetconfig *sv_origknconf;       /* For RDMA save orig knconf */
 628         struct netbuf      sv_addr;     /* server's address */
 629         nfs4_fhandle_t     sv_fhandle;  /* this server's filehandle */
 630         nfs4_fhandle_t     sv_pfhandle; /* parent dir filehandle */
 631         int                sv_pathlen;  /* Length of server path */
 632         char              *sv_path;     /* Path name on server */
 633         uint32_t           sv_flags;    /* flags for this server */
 634         sec_data_t        *sv_secdata;  /* client initiated security data */
 635         sv_secinfo_t      *sv_secinfo;  /* server security information */
 636         sec_data_t        *sv_currsec;  /* security data currently used; */
 637                                         /* points to one of the sec_data */
 638                                         /* entries in sv_secinfo */
 639         sv_secinfo_t      *sv_save_secinfo; /* saved secinfo */
 640         sec_data_t        *sv_savesec;  /* saved security data */
 641         sec_data_t        *sv_dhsec;    /* AUTH_DH data from the user land */
 642         char              *sv_hostname; /* server's hostname */
 643         int                sv_hostnamelen;  /* server's hostname length */
 644         fattr4_fsid             sv_fsid;    /* fsid of shared obj       */
 645         fattr4_supported_attrs  sv_supp_attrs;
 646         struct servinfo4  *sv_next;     /* next in list */
 647         nfs_rwlock_t       sv_lock;
 648 } servinfo4_t;
 649 
 650 /* sv_flags fields */
 651 #define SV4_TRYSECINFO          0x001   /* try secinfo data from the server */
 652 #define SV4_TRYSECDEFAULT       0x002   /* try a default flavor */
 653 #define SV4_NOTINUSE            0x004   /* servinfo4_t had fatal errors */
 654 #define SV4_ROOT_STALE          0x008   /* root vnode got ESTALE */
 655 
 656 /*
 657  * Lock call types.  See nfs4frlock().
 658  */
 659 typedef enum nfs4_lock_call_type {
 660         NFS4_LCK_CTYPE_NORM,
 661         NFS4_LCK_CTYPE_RECLAIM,
 662         NFS4_LCK_CTYPE_RESEND,
 663         NFS4_LCK_CTYPE_REINSTATE
 664 } nfs4_lock_call_type_t;
 665 
 666 /*
 667  * This structure holds the information for a lost open/close/open downgrade/
 668  * lock/locku request.  It is also used for requests that are queued up so
 669  * that the recovery thread can release server state after a forced
 670  * unmount.
 671  * "lr_op" is 0 if the struct is uninitialized.  Otherwise, it is set to
 672  * the proper OP_* nfs_opnum4 number.  The other fields contain information
 673  * to reconstruct the call.
 674  *
 675  * lr_dvp is used for OPENs with CREATE, so that we can do a PUTFH of the
 676  * parent directroy without relying on vtodv (since we may not have a vp
 677  * for the file we wish to create).
 678  *
 679  * lr_putfirst means that the request should go to the front of the resend
 680  * queue, rather than the end.
 681  */
 682 typedef struct nfs4_lost_rqst {
 683         list_node_t                     lr_node;
 684         nfs_opnum4                      lr_op;
 685         vnode_t                         *lr_vp;
 686         vnode_t                         *lr_dvp;
 687         nfs4_open_owner_t               *lr_oop;
 688         struct nfs4_open_stream         *lr_osp;
 689         struct nfs4_lock_owner          *lr_lop;
 690         cred_t                          *lr_cr;
 691         flock64_t                       *lr_flk;
 692         bool_t                          lr_putfirst;
 693         union {
 694                 struct {
 695                         nfs4_lock_call_type_t lru_ctype;
 696                         nfs_lock_type4  lru_locktype;
 697                 } lru_lockargs;         /* LOCK, LOCKU */
 698                 struct {
 699                         uint32_t                lru_oaccess;
 700                         uint32_t                lru_odeny;
 701                         enum open_claim_type4   lru_oclaim;
 702                         stateid4                lru_ostateid; /* reopen only */
 703                         component4              lru_ofile;
 704                 } lru_open_args;
 705                 struct {
 706                         uint32_t        lru_dg_access;
 707                         uint32_t        lru_dg_deny;
 708                 } lru_open_dg_args;
 709         } nfs4_lr_u;
 710 } nfs4_lost_rqst_t;
 711 
 712 #define lr_oacc         nfs4_lr_u.lru_open_args.lru_oaccess
 713 #define lr_odeny        nfs4_lr_u.lru_open_args.lru_odeny
 714 #define lr_oclaim       nfs4_lr_u.lru_open_args.lru_oclaim
 715 #define lr_ostateid     nfs4_lr_u.lru_open_args.lru_ostateid
 716 #define lr_ofile        nfs4_lr_u.lru_open_args.lru_ofile
 717 #define lr_dg_acc       nfs4_lr_u.lru_open_dg_args.lru_dg_access
 718 #define lr_dg_deny      nfs4_lr_u.lru_open_dg_args.lru_dg_deny
 719 #define lr_ctype        nfs4_lr_u.lru_lockargs.lru_ctype
 720 #define lr_locktype     nfs4_lr_u.lru_lockargs.lru_locktype
 721 
 722 /*
 723  * Recovery actions.  Some actions can imply further recovery using a
 724  * different recovery action (e.g., recovering the clientid leads to
 725  * recovering open files and locks).
 726  */
 727 
 728 typedef enum {
 729         NR_UNUSED,
 730         NR_CLIENTID,
 731         NR_OPENFILES,
 732         NR_FHEXPIRED,
 733         NR_FAILOVER,
 734         NR_WRONGSEC,
 735         NR_EXPIRED,
 736         NR_BAD_STATEID,
 737         NR_BADHANDLE,
 738         NR_BAD_SEQID,
 739         NR_OLDSTATEID,
 740         NR_GRACE,
 741         NR_DELAY,
 742         NR_LOST_LOCK,
 743         NR_LOST_STATE_RQST,
 744         NR_STALE,
 745         NR_MOVED
 746 } nfs4_recov_t;
 747 
 748 /*
 749  * Administrative and debug message framework.
 750  */
 751 
 752 #define NFS4_MSG_MAX    100
 753 extern int nfs4_msg_max;
 754 
 755 #define NFS4_REFERRAL_LOOP_MAX  20
 756 
 757 typedef enum {
 758         RE_BAD_SEQID,
 759         RE_BADHANDLE,
 760         RE_CLIENTID,
 761         RE_DEAD_FILE,
 762         RE_END,
 763         RE_FAIL_RELOCK,
 764         RE_FAIL_REMAP_LEN,
 765         RE_FAIL_REMAP_OP,
 766         RE_FAILOVER,
 767         RE_FILE_DIFF,
 768         RE_LOST_STATE,
 769         RE_OPENS_CHANGED,
 770         RE_SIGLOST,
 771         RE_SIGLOST_NO_DUMP,
 772         RE_START,
 773         RE_UNEXPECTED_ACTION,
 774         RE_UNEXPECTED_ERRNO,
 775         RE_UNEXPECTED_STATUS,
 776         RE_WRONGSEC,
 777         RE_LOST_STATE_BAD_OP,
 778         RE_REFERRAL
 779 } nfs4_event_type_t;
 780 
 781 typedef enum {
 782         RFS_NO_INSPECT,
 783         RFS_INSPECT
 784 } nfs4_fact_status_t;
 785 
 786 typedef enum {
 787         RF_BADOWNER,
 788         RF_ERR,
 789         RF_RENEW_EXPIRED,
 790         RF_SRV_NOT_RESPOND,
 791         RF_SRV_OK,
 792         RF_SRVS_NOT_RESPOND,
 793         RF_SRVS_OK,
 794         RF_DELMAP_CB_ERR,
 795         RF_SENDQ_FULL
 796 } nfs4_fact_type_t;
 797 
 798 typedef enum {
 799         NFS4_MS_DUMP,
 800         NFS4_MS_NO_DUMP
 801 } nfs4_msg_status_t;
 802 
 803 typedef struct nfs4_rfact {
 804         nfs4_fact_type_t        rf_type;
 805         nfs4_fact_status_t      rf_status;
 806         bool_t                  rf_reboot;
 807         nfs4_recov_t            rf_action;
 808         nfs_opnum4              rf_op;
 809         nfsstat4                rf_stat4;
 810         timespec_t              rf_time;
 811         int                     rf_error;
 812         struct rnode4           *rf_rp1;
 813         char                    *rf_char1;
 814 } nfs4_rfact_t;
 815 
 816 typedef struct nfs4_revent {
 817         nfs4_event_type_t       re_type;
 818         nfsstat4                re_stat4;
 819         uint_t                  re_uint;
 820         pid_t                   re_pid;
 821         struct mntinfo4         *re_mi;
 822         struct rnode4           *re_rp1;
 823         struct rnode4           *re_rp2;
 824         char                    *re_char1;
 825         char                    *re_char2;
 826         nfs4_tag_type_t         re_tag1;
 827         nfs4_tag_type_t         re_tag2;
 828         seqid4                  re_seqid1;
 829         seqid4                  re_seqid2;
 830 } nfs4_revent_t;
 831 
 832 typedef enum {
 833         RM_EVENT,
 834         RM_FACT
 835 } nfs4_msg_type_t;
 836 
 837 typedef struct nfs4_debug_msg {
 838         timespec_t              msg_time;
 839         nfs4_msg_type_t         msg_type;
 840         char                    *msg_srv;
 841         char                    *msg_mntpt;
 842         union {
 843                 nfs4_rfact_t    msg_fact;
 844                 nfs4_revent_t   msg_event;
 845         } rmsg_u;
 846         nfs4_msg_status_t       msg_status;
 847         list_node_t             msg_node;
 848 } nfs4_debug_msg_t;
 849 
 850 /*
 851  * NFS private data per mounted file system
 852  *      The mi_lock mutex protects the following fields:
 853  *              mi_flags
 854  *              mi_in_recovery
 855  *              mi_recovflags
 856  *              mi_recovthread
 857  *              mi_error
 858  *              mi_printed
 859  *              mi_down
 860  *              mi_stsize
 861  *              mi_curread
 862  *              mi_curwrite
 863  *              mi_timers
 864  *              mi_curr_serv
 865  *              mi_klmconfig
 866  *              mi_oo_list
 867  *              mi_foo_list
 868  *              mi_foo_num
 869  *              mi_foo_max
 870  *              mi_lost_state
 871  *              mi_bseqid_list
 872  *              mi_ephemeral
 873  *              mi_ephemeral_tree
 874  *
 875  *      Normally the netconfig information for the mount comes from
 876  *      mi_curr_serv and mi_klmconfig is NULL.  If NLM calls need to use a
 877  *      different transport, mi_klmconfig contains the necessary netconfig
 878  *      information.
 879  *
 880  *      The mi_async_lock mutex protects the following fields:
 881  *              mi_async_reqs
 882  *              mi_async_req_count
 883  *              mi_async_tail
 884  *              mi_async_curr[NFS4_MAX_ASYNC_QUEUES]
 885  *              mi_async_clusters
 886  *              mi_async_init_clusters
 887  *              mi_threads[NFS4_MAX_ASYNC_QUEUES]
 888  *              mi_inactive_thread
 889  *              mi_manager_thread
 890  *
 891  *      The nfs4_server_t::s_lock protects the following fields:
 892  *              mi_clientid
 893  *              mi_clientid_next
 894  *              mi_clientid_prev
 895  *              mi_open_files
 896  *
 897  *      The mntinfo4_t::mi_recovlock protects the following fields:
 898  *              mi_srvsettime
 899  *              mi_srvset_cnt
 900  *              mi_srv
 901  *
 902  * Changing mi_srv from one nfs4_server_t to a different one requires
 903  * holding the mi_recovlock as RW_WRITER.
 904  * Exception: setting mi_srv the first time in mount/mountroot is done
 905  * holding the mi_recovlock as RW_READER.
 906  *
 907  *      Locking order:
 908  *        mi4_globals::mig_lock > mi_async_lock
 909  *        mi_async_lock > nfs4_server_t::s_lock > mi_lock
 910  *        mi_recovlock > mi_rename_lock > nfs_rtable4_lock
 911  *        nfs4_server_t::s_recovlock > mi_recovlock
 912  *        rnode4_t::r_rwlock > mi_rename_lock
 913  *        nfs_rtable4_lock > mi_lock
 914  *        nfs4_server_t::s_lock > mi_msg_list_lock
 915  *        mi_recovlock > nfs4_server_t::s_lock
 916  *        mi_recovlock > nfs4_server_lst_lock
 917  *
 918  * The 'mi_oo_list' represents the hash buckets that contain the
 919  * nfs4_open_owenrs for this particular mntinfo4.
 920  *
 921  * The 'mi_foo_list' represents the freed nfs4_open_owners for this mntinfo4.
 922  * 'mi_foo_num' is the current number of freed open owners on the list,
 923  * 'mi_foo_max' is the maximum number of freed open owners that are allowable
 924  * on the list.
 925  *
 926  * mi_rootfh and mi_srvparentfh are read-only once created, but that just
 927  * refers to the pointer.  The contents must be updated to keep in sync
 928  * with mi_curr_serv.
 929  *
 930  * The mi_msg_list_lock protects against adding/deleting entries to the
 931  * mi_msg_list, and also the updating/retrieving of mi_lease_period;
 932  *
 933  * 'mi_zone' is initialized at structure creation time, and never
 934  * changes; it may be read without a lock.
 935  *
 936  * mi_zone_node is linkage into the mi4_globals.mig_list, and is
 937  * protected by mi4_globals.mig_list_lock.
 938  *
 939  * If MI4_EPHEMERAL is set in mi_flags, then mi_ephemeral points to an
 940  * ephemeral structure for this ephemeral mount point. It can not be
 941  * NULL. Also, mi_ephemeral_tree points to the root of the ephemeral
 942  * tree.
 943  *
 944  * If MI4_EPHEMERAL is not set in mi_flags, then mi_ephemeral has
 945  * to be NULL. If mi_ephemeral_tree is non-NULL, then this node
 946  * is the enclosing mntinfo4 for the ephemeral tree.
 947  */
 948 struct zone;
 949 struct nfs4_ephemeral;
 950 struct nfs4_ephemeral_tree;
 951 struct nfs4_server;
 952 typedef struct mntinfo4 {
 953         kmutex_t        mi_lock;        /* protects mntinfo4 fields */
 954         struct servinfo4 *mi_servers;   /* server list */
 955         struct servinfo4 *mi_curr_serv; /* current server */
 956         struct nfs4_sharedfh *mi_rootfh; /* root filehandle */
 957         struct nfs4_sharedfh *mi_srvparentfh; /* root's parent on server */
 958         kcondvar_t      mi_failover_cv; /* failover synchronization */
 959         struct vfs      *mi_vfsp;       /* back pointer to vfs */
 960         enum vtype      mi_type;        /* file type of the root vnode */
 961         uint_t          mi_flags;       /* see below */
 962         uint_t          mi_recovflags;  /* if recovery active; see below */
 963         kthread_t       *mi_recovthread; /* active recov thread or NULL */
 964         uint_t          mi_error;       /* only set/valid when MI4_RECOV_FAIL */
 965                                         /* is set in mi_flags */
 966         int             mi_tsize;       /* transfer size (bytes) */
 967                                         /* really read size */
 968         int             mi_stsize;      /* server's max transfer size (bytes) */
 969                                         /* really write size */
 970         int             mi_timeo;       /* inital timeout in 10th sec */
 971         int             mi_retrans;     /* times to retry request */
 972         hrtime_t        mi_acregmin;    /* min time to hold cached file attr */
 973         hrtime_t        mi_acregmax;    /* max time to hold cached file attr */
 974         hrtime_t        mi_acdirmin;    /* min time to hold cached dir attr */
 975         hrtime_t        mi_acdirmax;    /* max time to hold cached dir attr */
 976         len_t           mi_maxfilesize; /* for pathconf _PC_FILESIZEBITS */
 977         int             mi_curread;     /* current read size */
 978         int             mi_curwrite;    /* current write size */
 979         uint_t          mi_count;       /* ref count */
 980         /*
 981          * Async I/O management
 982          * We have 2 pools of threads working on async I/O:
 983          *      (1) Threads which work on all async queues. Default number of
 984          *      threads in this queue is 8. Threads in this pool work on async
 985          *      queue pointed by mi_async_curr[NFS4_ASYNC_QUEUE]. Number of
 986          *      active threads in this pool is tracked by
 987          *      mi_threads[NFS4_ASYNC_QUEUE].
 988          *      (ii)Threads which work only on page op async queues.
 989          *      Page ops queue comprises of NFS4_PUTAPAGE, NFS4_PAGEIO &
 990          *      NFS4_COMMIT. Default number of threads in this queue is 2
 991          *      (NUM_ASYNC_PGOPS_THREADS). Threads in this pool work on async
 992          *      queue pointed by mi_async_curr[NFS4_ASYNC_PGOPS_QUEUE]. Number
 993          *      of active threads in this pool is tracked by
 994          *      mi_threads[NFS4_ASYNC_PGOPS_QUEUE].
 995          *
 996          * In addition to above two pools, there is always one thread that
 997          * handles over-the-wire requests for VOP_INACTIVE.
 998          */
 999         struct nfs4_async_reqs *mi_async_reqs[NFS4_ASYNC_TYPES];
1000         struct nfs4_async_reqs *mi_async_tail[NFS4_ASYNC_TYPES];
1001         struct nfs4_async_reqs **mi_async_curr[NFS4_MAX_ASYNC_QUEUES];
1002                                                 /* current async queue */
1003         uint_t          mi_async_clusters[NFS4_ASYNC_TYPES];
1004         uint_t          mi_async_init_clusters;
1005         uint_t          mi_async_req_count; /* # outstanding work requests */
1006         kcondvar_t      mi_async_reqs_cv; /* signaled when there's work */
1007         ushort_t        mi_threads[NFS4_MAX_ASYNC_QUEUES];
1008                                         /* number of active async threads */
1009         ushort_t        mi_max_threads; /* max number of async threads */
1010         kthread_t       *mi_manager_thread; /* async manager thread id */
1011         kthread_t       *mi_inactive_thread; /* inactive thread id */
1012         kcondvar_t      mi_inact_req_cv; /* notify VOP_INACTIVE thread */
1013         kcondvar_t      mi_async_work_cv[NFS4_MAX_ASYNC_QUEUES];
1014                                         /* tell workers to work */
1015         kcondvar_t      mi_async_cv;    /* all pool threads exited */
1016         kmutex_t        mi_async_lock;
1017         /*
1018          * Other stuff
1019          */
1020         struct pathcnf  *mi_pathconf;   /* static pathconf kludge */
1021         rpcprog_t       mi_prog;        /* RPC program number */
1022         rpcvers_t       mi_vers;        /* RPC program version number */
1023         char            **mi_rfsnames;  /* mapping to proc names */
1024         kstat_named_t   *mi_reqs;       /* count of requests */
1025         clock_t         mi_printftime;  /* last error printf time */
1026         nfs_rwlock_t    mi_recovlock;   /* separate ops from recovery (v4) */
1027         time_t          mi_grace_wait;  /* non-zero represents time to wait */
1028         /* when we switched nfs4_server_t - only for observability purposes */
1029         time_t          mi_srvsettime;
1030         nfs_rwlock_t    mi_rename_lock; /* atomic volfh rename  */
1031         struct nfs4_fname *mi_fname;    /* root fname */
1032         list_t          mi_lost_state;  /* resend list */
1033         list_t          mi_bseqid_list; /* bad seqid list */
1034         /*
1035          * Client Side Failover stats
1036          */
1037         uint_t          mi_noresponse;  /* server not responding count */
1038         uint_t          mi_failover;    /* failover to new server count */
1039         uint_t          mi_remap;       /* remap to new server count */
1040         /*
1041          * Kstat statistics
1042          */
1043         struct kstat    *mi_io_kstats;
1044         struct kstat    *mi_ro_kstats;
1045         kstat_t         *mi_recov_ksp;  /* ptr to the recovery kstat */
1046 
1047         /*
1048          * Volatile fh flags (nfsv4)
1049          */
1050         uint32_t        mi_fh_expire_type;
1051         /*
1052          * Lease Management
1053          */
1054         struct mntinfo4 *mi_clientid_next;
1055         struct mntinfo4 *mi_clientid_prev;
1056         clientid4       mi_clientid; /* redundant info found in nfs4_server */
1057         int             mi_open_files;  /* count of open files */
1058         int             mi_in_recovery; /* count of recovery instances */
1059         kcondvar_t      mi_cv_in_recov; /* cv for recovery threads */
1060         /*
1061          * Open owner stuff.
1062          */
1063         struct nfs4_oo_hash_bucket      mi_oo_list[NFS4_NUM_OO_BUCKETS];
1064         list_t                          mi_foo_list;
1065         int                             mi_foo_num;
1066         int                             mi_foo_max;
1067         /*
1068          * Shared filehandle pool.
1069          */
1070         nfs_rwlock_t                    mi_fh_lock;
1071         avl_tree_t                      mi_filehandles;
1072 
1073         /*
1074          * Debug message queue.
1075          */
1076         list_t                  mi_msg_list;
1077         int                     mi_msg_count;
1078         time_t                  mi_lease_period;
1079                                         /*
1080                                          * not guaranteed to be accurate.
1081                                          * only should be used by debug queue.
1082                                          */
1083         kmutex_t                mi_msg_list_lock;
1084         /*
1085          * Zones support.
1086          */
1087         struct zone     *mi_zone;       /* Zone in which FS is mounted */
1088         zone_ref_t      mi_zone_ref;    /* Reference to aforementioned zone */
1089         list_node_t     mi_zone_node;  /* linkage into per-zone mi list */
1090 
1091         /*
1092          * Links for unmounting ephemeral mounts.
1093          */
1094         struct nfs4_ephemeral           *mi_ephemeral;
1095         struct nfs4_ephemeral_tree      *mi_ephemeral_tree;
1096 
1097         uint_t mi_srvset_cnt; /* increment when changing the nfs4_server_t */
1098         struct nfs4_server *mi_srv; /* backpointer to nfs4_server_t */
1099         /*
1100          * Referral related info.
1101          */
1102         int             mi_vfs_referral_loop_cnt;
1103 } mntinfo4_t;
1104 
1105 /*
1106  * The values for mi_flags.
1107  *
1108  *      MI4_HARD                 hard or soft mount
1109  *      MI4_PRINTED              responding message printed
1110  *      MI4_INT                  allow INTR on hard mount
1111  *      MI4_DOWN                 server is down
1112  *      MI4_NOAC                 don't cache attributes
1113  *      MI4_NOCTO                no close-to-open consistency
1114  *      MI4_LLOCK                local locking only (no lockmgr)
1115  *      MI4_GRPID                System V group id inheritance
1116  *      MI4_SHUTDOWN             System is rebooting or shutting down
1117  *      MI4_LINK                 server supports link
1118  *      MI4_SYMLINK              server supports symlink
1119  *      MI4_EPHEMERAL_RECURSED   an ephemeral mount being unmounted
1120  *                               due to a recursive call - no need
1121  *                               for additional recursion
1122  *      MI4_ACL                  server supports NFSv4 ACLs
1123  *      MI4_MIRRORMOUNT          is a mirrormount
1124  *      MI4_NOPRINT              don't print messages
1125  *      MI4_DIRECTIO             do direct I/O
1126  *      MI4_RECOV_ACTIV          filesystem has recovery a thread
1127  *      MI4_REMOVE_ON_LAST_CLOSE remove from server's list
1128  *      MI4_RECOV_FAIL           client recovery failed
1129  *      MI4_PUBLIC               public/url option used
1130  *      MI4_MOUNTING             mount in progress, don't failover
1131  *      MI4_POSIX_LOCK           if server is using POSIX locking
1132  *      MI4_LOCK_DEBUG           cmn_err'd posix lock err msg
1133  *      MI4_DEAD                 zone has released it
1134  *      MI4_INACTIVE_IDLE        inactive thread idle
1135  *      MI4_BADOWNER_DEBUG       badowner error msg per mount
1136  *      MI4_ASYNC_MGR_STOP       tell async manager to die
1137  *      MI4_TIMEDOUT             saw a timeout during zone shutdown
1138  *      MI4_EPHEMERAL            is an ephemeral mount
1139  */
1140 #define MI4_HARD                 0x1
1141 #define MI4_PRINTED              0x2
1142 #define MI4_INT                  0x4
1143 #define MI4_DOWN                 0x8
1144 #define MI4_NOAC                 0x10
1145 #define MI4_NOCTO                0x20
1146 #define MI4_LLOCK                0x80
1147 #define MI4_GRPID                0x100
1148 #define MI4_SHUTDOWN             0x200
1149 #define MI4_LINK                 0x400
1150 #define MI4_SYMLINK              0x800
1151 #define MI4_EPHEMERAL_RECURSED   0x1000
1152 #define MI4_ACL                  0x2000
1153 /* MI4_MIRRORMOUNT is also defined in nfsstat.c */
1154 #define MI4_MIRRORMOUNT          0x4000
1155 #define MI4_REFERRAL             0x8000
1156 /* 0x10000 is available */
1157 #define MI4_NOPRINT              0x20000
1158 #define MI4_DIRECTIO             0x40000
1159 /* 0x80000 is available */
1160 #define MI4_RECOV_ACTIV          0x100000
1161 #define MI4_REMOVE_ON_LAST_CLOSE 0x200000
1162 #define MI4_RECOV_FAIL           0x400000
1163 #define MI4_PUBLIC               0x800000
1164 #define MI4_MOUNTING             0x1000000
1165 #define MI4_POSIX_LOCK           0x2000000
1166 #define MI4_LOCK_DEBUG           0x4000000
1167 #define MI4_DEAD                 0x8000000
1168 #define MI4_INACTIVE_IDLE        0x10000000
1169 #define MI4_BADOWNER_DEBUG       0x20000000
1170 #define MI4_ASYNC_MGR_STOP       0x40000000
1171 #define MI4_TIMEDOUT             0x80000000
1172 
1173 #define MI4_EPHEMERAL           (MI4_MIRRORMOUNT | MI4_REFERRAL)
1174 
1175 #define INTR4(vp)       (VTOMI4(vp)->mi_flags & MI4_INT)
1176 
1177 #define FAILOVER_MOUNT4(mi)     (mi->mi_servers->sv_next)
1178 
1179 /*
1180  * Recovery flags.
1181  *
1182  * MI4R_NEED_CLIENTID is sort of redundant (it's the nfs4_server_t flag
1183  * that's important), but some flag is needed to indicate that recovery is
1184  * going on for the filesystem.
1185  */
1186 #define MI4R_NEED_CLIENTID      0x1
1187 #define MI4R_REOPEN_FILES       0x2
1188 #define MI4R_NEED_SECINFO       0x4
1189 #define MI4R_NEED_NEW_SERVER    0x8
1190 #define MI4R_REMAP_FILES        0x10
1191 #define MI4R_SRV_REBOOT         0x20    /* server has rebooted */
1192 #define MI4R_LOST_STATE         0x40
1193 #define MI4R_BAD_SEQID          0x80
1194 #define MI4R_MOVED              0x100
1195 
1196 #define MI4_HOLD(mi) {          \
1197         mi_hold(mi);            \
1198 }
1199 
1200 #define MI4_RELE(mi) {          \
1201         mi_rele(mi);            \
1202 }
1203 
1204 /*
1205  * vfs pointer to mount info
1206  */
1207 #define VFTOMI4(vfsp)   ((mntinfo4_t *)((vfsp)->vfs_data))
1208 
1209 /*
1210  * vnode pointer to mount info
1211  */
1212 #define VTOMI4(vp)      ((mntinfo4_t *)(((vp)->v_vfsp)->vfs_data))
1213 
1214 /*
1215  * Lease Management
1216  *
1217  * lease_valid is initially set to NFS4_LEASE_NOT_STARTED.  This is when the
1218  * nfs4_server is first created.  lease_valid is then set to
1219  * NFS4_LEASE_UNITIALIZED when the renew thread is started.  The extra state of
1220  * NFS4_LEASE_NOT_STARTED is needed for client recovery (so we know if a thread
1221  * already exists when we do SETCLIENTID).  lease_valid is then set to
1222  * NFS4_LEASE_VALID (if it is at NFS4_LEASE_UNITIALIZED) when a state creating
1223  * operation (OPEN) is done. lease_valid stays at NFS4_LEASE_VALID as long as
1224  * the lease is renewed.  It is set to NFS4_LEASE_INVALID when the lease
1225  * expires.  Client recovery is needed to set the lease back to
1226  * NFS4_LEASE_VALID from NFS4_LEASE_INVALID.
1227  *
1228  * The s_cred is the credential used to mount the first file system for this
1229  * server.  It used as the credential for the renew thread's calls to the
1230  * server.
1231  *
1232  * The renew thread waits on the condition variable cv_thread_exit.  If the cv
1233  * is signalled, then the thread knows it must check s_thread_exit to see if
1234  * it should exit.  The cv is signaled when the last file system is unmounted
1235  * from a particular server.  s_thread_exit is set to 0 upon thread startup,
1236  * and set to NFS4_THREAD_EXIT, when the last file system is unmounted thereby
1237  * telling the thread to exit.  s_thread_exit is needed to avoid spurious
1238  * wakeups.
1239  *
1240  * state_ref_count is incremented every time a new file is opened and
1241  * decremented every time a file is closed otw.  This keeps track of whether
1242  * the nfs4_server has state associated with it or not.
1243  *
1244  * s_refcnt is the reference count for storage management of the struct
1245  * itself.
1246  *
1247  * mntinfo4_list points to the doubly linked list of mntinfo4s that share
1248  * this nfs4_server (ie: <clientid, saddr> pair) in the current zone.  This is
1249  * needed for a nfs4_server to get a mntinfo4 for use in rfs4call.
1250  *
1251  * s_recovlock is used to synchronize recovery operations.  The thread
1252  * that is recovering the client must acquire it as a writer.  If the
1253  * thread is using the clientid (including recovery operations on other
1254  * state), acquire it as a reader.
1255  *
1256  * The 's_otw_call_count' keeps track of the number of outstanding over the
1257  * wire requests for this structure.  The struct will not go away as long
1258  * as this is non-zero (or s_refcnt is non-zero).
1259  *
1260  * The 's_cv_otw_count' is used in conjuntion with the 's_otw_call_count'
1261  * variable to let the renew thread when an outstanding otw request has
1262  * finished.
1263  *
1264  * 'zoneid' and 'zone_globals' are set at creation of this structure
1265  * and are read-only after that; no lock is required to read them.
1266  *
1267  * s_lock protects: everything except cv_thread_exit and s_recovlock.
1268  *
1269  * s_program is used as the index into the nfs4_callback_globals's
1270  * nfs4prog2server table.  When a callback request comes in, we can
1271  * use that request's program number (minus NFS4_CALLBACK) as an index
1272  * into the nfs4prog2server.  That entry will hold the nfs4_server_t ptr.
1273  * We can then access that nfs4_server_t and its 's_deleg_list' (its list of
1274  * delegated rnode4_ts).
1275  *
1276  * Lock order:
1277  * nfs4_server::s_lock > mntinfo4::mi_lock
1278  * nfs_rtable4_lock > s_lock
1279  * nfs4_server_lst_lock > s_lock
1280  * s_recovlock > s_lock
1281  */
1282 struct nfs4_callback_globals;
1283 
1284 typedef struct nfs4_server {
1285         struct nfs4_server      *forw;
1286         struct nfs4_server      *back;
1287         struct netbuf           saddr;
1288         uint_t                  s_flags; /* see below */
1289         uint_t                  s_refcnt;
1290         clientid4               clientid;       /* what we get from server */
1291         nfs_client_id4          clidtosend;     /* what we send to server */
1292         mntinfo4_t              *mntinfo4_list;
1293         int                     lease_valid;
1294         time_t                  s_lease_time;
1295         time_t                  last_renewal_time;
1296         timespec_t              propagation_delay;
1297         cred_t                  *s_cred;
1298         kcondvar_t              cv_thread_exit;
1299         int                     s_thread_exit;
1300         int                     state_ref_count;
1301         int                     s_otw_call_count;
1302         kcondvar_t              s_cv_otw_count;
1303         kcondvar_t              s_clientid_pend;
1304         kmutex_t                s_lock;
1305         list_t                  s_deleg_list;
1306         rpcprog_t               s_program;
1307         nfs_rwlock_t            s_recovlock;
1308         kcondvar_t              wait_cb_null; /* used to wait for CB_NULL */
1309         zoneid_t                zoneid; /* zone using this nfs4_server_t */
1310         struct nfs4_callback_globals *zone_globals;     /* globals */
1311 } nfs4_server_t;
1312 
1313 /* nfs4_server flags */
1314 #define N4S_CLIENTID_SET        1       /* server has our clientid */
1315 #define N4S_CLIENTID_PEND       0x2     /* server doesn't have clientid */
1316 #define N4S_CB_PINGED           0x4     /* server has sent us a CB_NULL */
1317 #define N4S_CB_WAITER           0x8     /* is/has wait{ing/ed} for cb_null */
1318 #define N4S_INSERTED            0x10    /* list has reference for server */
1319 #define N4S_BADOWNER_DEBUG      0x20    /* bad owner err msg per client */
1320 
1321 #define N4S_CB_PAUSE_TIME       10000   /* Amount of time to pause (10ms) */
1322 
1323 struct lease_time_arg {
1324         time_t  lease_time;
1325 };
1326 
1327 enum nfs4_delegreturn_policy {
1328         IMMEDIATE,
1329         FIRSTCLOSE,
1330         LASTCLOSE,
1331         INACTIVE
1332 };
1333 
1334 /*
1335  * Operation hints for the recovery framework (mostly).
1336  *
1337  * EXCEPTIONS:
1338  * OH_ACCESS, OH_GETACL, OH_GETATTR, OH_LOOKUP, OH_READDIR
1339  *      These hints exist to allow user visit/readdir a R4SRVSTUB dir.
1340  *      (dir represents the root of a server fs that has not yet been
1341  *      mounted at client)
1342  */
1343 typedef enum {
1344         OH_OTHER,
1345         OH_READ,
1346         OH_WRITE,
1347         OH_COMMIT,
1348         OH_VFH_RENAME,
1349         OH_MOUNT,
1350         OH_CLOSE,
1351         OH_LOCKU,
1352         OH_DELEGRETURN,
1353         OH_ACCESS,
1354         OH_GETACL,
1355         OH_GETATTR,
1356         OH_LOOKUP,
1357         OH_READDIR
1358 } nfs4_op_hint_t;
1359 
1360 /*
1361  * This data structure is used to track ephemeral mounts for both
1362  * mirror mounts and referrals.
1363  *
1364  * Note that each nfs4_ephemeral can only have one other nfs4_ephemeral
1365  * pointing at it. So we don't need two backpointers to walk
1366  * back up the tree.
1367  *
1368  * An ephemeral tree is pointed to by an enclosing non-ephemeral
1369  * mntinfo4. The root is also pointed to by its ephemeral
1370  * mntinfo4. ne_child will get us back to it, while ne_prior
1371  * will get us back to the non-ephemeral mntinfo4. This is an
1372  * edge case we will need to be wary of when walking back up the
1373  * tree.
1374  *
1375  * The way we handle this edge case is to have ne_prior be NULL
1376  * for the root nfs4_ephemeral node.
1377  */
1378 typedef struct nfs4_ephemeral {
1379         mntinfo4_t              *ne_mount;      /* who encloses us */
1380         struct nfs4_ephemeral   *ne_child;      /* first child node */
1381         struct nfs4_ephemeral   *ne_peer;       /* next sibling */
1382         struct nfs4_ephemeral   *ne_prior;      /* who points at us */
1383         time_t                  ne_ref_time;    /* time last referenced */
1384         uint_t                  ne_mount_to;    /* timeout at */
1385         int                     ne_state;       /* used to traverse */
1386 } nfs4_ephemeral_t;
1387 
1388 /*
1389  * State for the node (set in ne_state):
1390  */
1391 #define NFS4_EPHEMERAL_OK               0x0
1392 #define NFS4_EPHEMERAL_VISIT_CHILD      0x1
1393 #define NFS4_EPHEMERAL_VISIT_SIBLING    0x2
1394 #define NFS4_EPHEMERAL_PROCESS_ME       0x4
1395 #define NFS4_EPHEMERAL_CHILD_ERROR      0x8
1396 #define NFS4_EPHEMERAL_PEER_ERROR       0x10
1397 
1398 /*
1399  * These are the locks used in processing ephemeral data:
1400  *
1401  * mi->mi_lock
1402  *
1403  * net->net_tree_lock
1404  *     This lock is used to gate all tree operations.
1405  *     If it is held, then no other process may
1406  *     traverse the tree. This allows us to not
1407  *     throw a hold on each vfs_t in the tree.
1408  *     Can be held for a "long" time.
1409  *
1410  * net->net_cnt_lock
1411  *     Used to protect refcnt and status.
1412  *     Must be held for a really short time.
1413  *
1414  * nfs4_ephemeral_thread_lock
1415  *     Is only held to create the harvester for the zone.
1416  *     There is no ordering imposed on it.
1417  *     Held for a really short time.
1418  *
1419  * Some further detail on the interactions:
1420  *
1421  * net_tree_lock controls access to net_root. Access needs to first be
1422  * attempted in a non-blocking check.
1423  *
1424  * net_cnt_lock controls access to net_refcnt and net_status. It must only be
1425  * held for very short periods of time, unless the refcnt is 0 and the status
1426  * is INVALID.
1427  *
1428  * Before a caller can grab net_tree_lock, it must first grab net_cnt_lock
1429  * to bump the net_refcnt. It then releases it and does the action specific
1430  * algorithm to get the net_tree_lock. Once it has that, then it is okay to
1431  * grab the net_cnt_lock and change the status. The status can only be
1432  * changed if the caller has the net_tree_lock held as well.
1433  *
1434  * Note that the initial grab of net_cnt_lock must occur whilst
1435  * mi_lock is being held. This prevents stale data in that if the
1436  * ephemeral tree is non-NULL, then the harvester can not remove
1437  * the tree from the mntinfo node until it grabs that lock. I.e.,
1438  * we get the pointer to the tree and hold the lock atomically
1439  * with respect to being in mi_lock.
1440  *
1441  * When a caller is done with net_tree_lock, it can decrement the net_refcnt
1442  * either before it releases net_tree_lock or after.
1443  *
1444  * In either event, to decrement net_refcnt, it must hold net_cnt_lock.
1445  *
1446  * Note that the overall locking scheme for the nodes is to control access
1447  * via the tree. The current scheme could easily be extended such that
1448  * the enclosing root referenced a "forest" of trees. The underlying trees
1449  * would be autonomous with respect to locks.
1450  *
1451  * Note that net_next is controlled by external locks
1452  * particular to the data structure that the tree is being added to.
1453  */
1454 typedef struct nfs4_ephemeral_tree {
1455         mntinfo4_t                      *net_mount;
1456         nfs4_ephemeral_t                *net_root;
1457         struct nfs4_ephemeral_tree      *net_next;
1458         kmutex_t                        net_tree_lock;
1459         kmutex_t                        net_cnt_lock;
1460         uint_t                          net_status;
1461         uint_t                          net_refcnt;
1462 } nfs4_ephemeral_tree_t;
1463 
1464 /*
1465  * State for the tree (set in net_status):
1466  */
1467 #define NFS4_EPHEMERAL_TREE_OK          0x0
1468 #define NFS4_EPHEMERAL_TREE_BUILDING    0x1
1469 #define NFS4_EPHEMERAL_TREE_DEROOTING   0x2
1470 #define NFS4_EPHEMERAL_TREE_INVALID     0x4
1471 #define NFS4_EPHEMERAL_TREE_MOUNTING    0x8
1472 #define NFS4_EPHEMERAL_TREE_UMOUNTING   0x10
1473 #define NFS4_EPHEMERAL_TREE_LOCKED      0x20
1474 
1475 #define NFS4_EPHEMERAL_TREE_PROCESSING  (NFS4_EPHEMERAL_TREE_DEROOTING | \
1476         NFS4_EPHEMERAL_TREE_INVALID | NFS4_EPHEMERAL_TREE_UMOUNTING | \
1477         NFS4_EPHEMERAL_TREE_LOCKED)
1478 
1479 /*
1480  * This macro evaluates to non-zero if the given op releases state at the
1481  * server.
1482  */
1483 #define OH_IS_STATE_RELE(op)    ((op) == OH_CLOSE || (op) == OH_LOCKU || \
1484                                 (op) == OH_DELEGRETURN)
1485 
1486 #ifdef _KERNEL
1487 
1488 extern void     nfs4_async_manager(struct vfs *);
1489 extern void     nfs4_async_manager_stop(struct vfs *);
1490 extern void     nfs4_async_stop(struct vfs *);
1491 extern int      nfs4_async_stop_sig(struct vfs *);
1492 extern int      nfs4_async_readahead(vnode_t *, u_offset_t, caddr_t,
1493                                 struct seg *, cred_t *,
1494                                 void (*)(vnode_t *, u_offset_t,
1495                                 caddr_t, struct seg *, cred_t *));
1496 extern int      nfs4_async_putapage(vnode_t *, page_t *, u_offset_t, size_t,
1497                                 int, cred_t *, int (*)(vnode_t *, page_t *,
1498                                 u_offset_t, size_t, int, cred_t *));
1499 extern int      nfs4_async_pageio(vnode_t *, page_t *, u_offset_t, size_t,
1500                                 int, cred_t *, int (*)(vnode_t *, page_t *,
1501                                 u_offset_t, size_t, int, cred_t *));
1502 extern void     nfs4_async_commit(vnode_t *, page_t *, offset3, count3,
1503                                 cred_t *, void (*)(vnode_t *, page_t *,
1504                                 offset3, count3, cred_t *));
1505 extern void     nfs4_async_inactive(vnode_t *, cred_t *);
1506 extern void     nfs4_inactive_thread(mntinfo4_t *mi);
1507 extern void     nfs4_inactive_otw(vnode_t *, cred_t *);
1508 extern int      nfs4_putpages(vnode_t *, u_offset_t, size_t, int, cred_t *);
1509 
1510 extern int      nfs4_setopts(vnode_t *, model_t, struct nfs_args *);
1511 extern void     nfs4_mnt_kstat_init(struct vfs *);
1512 
1513 extern void     rfs4call(struct mntinfo4 *, struct COMPOUND4args_clnt *,
1514                         struct COMPOUND4res_clnt *, cred_t *, int *, int,
1515                         nfs4_error_t *);
1516 extern void     nfs4_acl_fill_cache(struct rnode4 *, vsecattr_t *);
1517 extern int      nfs4_attr_otw(vnode_t *, nfs4_tag_type_t,
1518                                 nfs4_ga_res_t *, bitmap4, cred_t *);
1519 
1520 extern void     nfs4_attrcache_noinval(vnode_t *, nfs4_ga_res_t *, hrtime_t);
1521 extern void     nfs4_attr_cache(vnode_t *, nfs4_ga_res_t *,
1522                                 hrtime_t, cred_t *, int,
1523                                 change_info4 *);
1524 extern void     nfs4_purge_rddir_cache(vnode_t *);
1525 extern void     nfs4_invalidate_pages(vnode_t *, u_offset_t, cred_t *);
1526 extern void     nfs4_purge_caches(vnode_t *, int, cred_t *, int);
1527 extern void     nfs4_purge_stale_fh(int, vnode_t *, cred_t *);
1528 extern void     nfs4_flush_pages(vnode_t *vp, cred_t *cr);
1529 
1530 extern void     nfs4rename_update(vnode_t *, vnode_t *, nfs_fh4 *, char *);
1531 extern void     nfs4_update_paths(vnode_t *, char *, vnode_t *, char *,
1532                         vnode_t *);
1533 
1534 extern void     nfs4args_lookup_free(nfs_argop4 *, int);
1535 extern void     nfs4args_copen_free(OPEN4cargs *);
1536 
1537 extern void     nfs4_printfhandle(nfs4_fhandle_t *);
1538 
1539 extern void     nfs_free_mi4(mntinfo4_t *);
1540 extern void     sv4_free(servinfo4_t *);
1541 extern void     nfs4_mi_zonelist_add(mntinfo4_t *);
1542 extern int      nfs4_mi_zonelist_remove(mntinfo4_t *);
1543 extern int      nfs4_secinfo_recov(mntinfo4_t *, vnode_t *, vnode_t *);
1544 extern void     nfs4_secinfo_init(void);
1545 extern void     nfs4_secinfo_fini(void);
1546 extern int      nfs4_secinfo_path(mntinfo4_t *, cred_t *, int);
1547 extern int      nfs4_secinfo_vnode_otw(vnode_t *, char *, cred_t *);
1548 extern void     secinfo_free(sv_secinfo_t *);
1549 extern void     save_mnt_secinfo(servinfo4_t *);
1550 extern void     check_mnt_secinfo(servinfo4_t *, vnode_t *);
1551 extern int      vattr_to_fattr4(vattr_t *, vsecattr_t *, fattr4 *, int,
1552                                 enum nfs_opnum4, bitmap4 supp_mask);
1553 extern int      nfs4_putapage(vnode_t *, page_t *, u_offset_t *, size_t *,
1554                         int, cred_t *);
1555 extern void     nfs4_write_error(vnode_t *, int, cred_t *);
1556 extern void     nfs4_lockcompletion(vnode_t *, int);
1557 extern bool_t   nfs4_map_lost_lock_conflict(vnode_t *);
1558 extern int      vtodv(vnode_t *, vnode_t **, cred_t *, bool_t);
1559 extern int      vtoname(vnode_t *, char *, ssize_t);
1560 extern void     nfs4open_confirm(vnode_t *, seqid4*, stateid4 *, cred_t *,
1561                     bool_t, bool_t *, nfs4_open_owner_t *, bool_t,
1562                     nfs4_error_t *, int *);
1563 extern void     nfs4_error_zinit(nfs4_error_t *);
1564 extern void     nfs4_error_init(nfs4_error_t *, int);
1565 extern void     nfs4_free_args(struct nfs_args *);
1566 
1567 extern void     mi_hold(mntinfo4_t *);
1568 extern void     mi_rele(mntinfo4_t *);
1569 
1570 extern vnode_t  *find_referral_stubvp(vnode_t *, char *, cred_t *);
1571 extern int       nfs4_setup_referral(vnode_t *, char *, vnode_t **, cred_t *);
1572 
1573 extern sec_data_t       *copy_sec_data(sec_data_t *);
1574 extern gss_clntdata_t   *copy_sec_data_gss(gss_clntdata_t *);
1575 
1576 #ifdef DEBUG
1577 extern int      nfs4_consistent_type(vnode_t *);
1578 #endif
1579 
1580 extern void     nfs4_init_dot_entries(void);
1581 extern void     nfs4_destroy_dot_entries(void);
1582 extern struct nfs4_callback_globals     *nfs4_get_callback_globals(void);
1583 
1584 extern struct nfs4_server nfs4_server_lst;
1585 
1586 extern volatile clock_t nfs_write_error_interval;
1587 
1588 #endif /* _KERNEL */
1589 
1590 /*
1591  * Flags for nfs4getfh_otw.
1592  */
1593 
1594 #define NFS4_GETFH_PUBLIC       0x01
1595 #define NFS4_GETFH_NEEDSOP      0x02
1596 
1597 /*
1598  * Found through rnodes.
1599  *
1600  * The os_open_ref_count keeps track the number of open file descriptor
1601  * references on this data structure.  It will be bumped for any successful
1602  * OTW OPEN call and any OPEN call that determines the OTW call is not
1603  * necessary and the open stream hasn't just been created (see
1604  * nfs4_is_otw_open_necessary).
1605  *
1606  * os_mapcnt is a count of the number of mmapped pages for a particular
1607  * open stream; this in conjunction w/ os_open_ref_count is used to
1608  * determine when to do a close to the server.  This is necessary because
1609  * of the semantics of doing open, mmap, close; the OTW close must be wait
1610  * until all open and mmap references have vanished.
1611  *
1612  * 'os_valid' tells us whether this structure is about to be freed or not,
1613  * if it is then don't return it in find_open_stream().
1614  *
1615  * 'os_final_close' is set when a CLOSE OTW was attempted.  This is needed
1616  * so we can properly count the os_open_ref_count in cases where we VOP_CLOSE
1617  * without a VOP_OPEN, and have nfs4_inactive() drive the OTW CLOSE.  It
1618  * also helps differentiate the VOP_OPEN/VN_RELE case from the VOP_CLOSE
1619  * that tried to close OTW but failed, and left the state cleanup to
1620  * nfs4_inactive/CLOSE_FORCE.
1621  *
1622  * 'os_force_close' is used to let us know if an intervening thread came
1623  * and reopened the open stream after we decided to issue a CLOSE_FORCE,
1624  * but before we could actually process the CLOSE_FORCE.
1625  *
1626  * 'os_pending_close' is set when an over-the-wire CLOSE is deferred to the
1627  * lost state queue.
1628  *
1629  * 'open_stateid' is set to the last open stateid returned by the server unless
1630  * 'os_delegation' is 1, in which case 'open_stateid' refers to the
1631  * delegation stateid returned by the server.  This is used in cases where the
1632  * client tries to OPEN a file but already has a suitable delegation, so we
1633  * just stick the delegation stateid in the open stream.
1634  *
1635  * os_dc_openacc are open access bits which have been granted to the
1636  * open stream by virtue of a delegation, but which have not been seen
1637  * by the server.  This applies even if the open stream does not have
1638  * os_delegation set.  These bits are used when setting file locks to
1639  * determine whether an open with CLAIM_DELEGATE_CUR needs to be done
1640  * before the lock request can be sent to the server.  See
1641  * nfs4frlock_check_deleg().
1642  *
1643  * 'os_mmap_read/write' keep track of the read and write access our memory
1644  * maps require.  We need to keep track of this so we can provide the proper
1645  * access bits in the open/mmap/close/reboot/reopen case.
1646  *
1647  * 'os_failed_reopen' tells us that we failed to successfully reopen this
1648  * open stream; therefore, we should not use this open stateid as it is
1649  * not valid anymore. This flag is also used to indicate an unsuccessful
1650  * attempt to reopen a delegation open stream with CLAIM_DELEGATE_CUR.
1651  *
1652  * If 'os_orig_oo_name' is different than os_open_owner's oo_name
1653  * then this tells us that this open stream's open owner used a
1654  * bad seqid (that is, got NFS4ERR_BAD_SEQID).  If different, this open
1655  * stream will no longer be used for future OTW state releasing calls.
1656  *
1657  * Lock ordering:
1658  * rnode4_t::r_os_lock > os_sync_lock
1659  * os_sync_lock > rnode4_t::r_statelock
1660  * os_sync_lock > rnode4_t::r_statev4_lock
1661  * os_sync_lock > mntinfo4_t::mi_lock (via hold over rfs4call)
1662  *
1663  * The 'os_sync_lock' protects:
1664  *      open_stateid
1665  *      os_dc_openacc
1666  *      os_delegation
1667  *      os_failed_reopen
1668  *      os_final_close
1669  *      os_force_close
1670  *      os_mapcnt
1671  *      os_mmap_read
1672  *      os_mmap_write
1673  *      os_open_ref_count
1674  *      os_pending_close
1675  *      os_share_acc_read
1676  *      os_share_acc_write
1677  *      os_share_deny_none
1678  *      os_share_deny_read
1679  *      os_share_deny_write
1680  *      os_ref_count
1681  *      os_valid
1682  *
1683  * The rnode4_t::r_os_lock protects:
1684  *      os_node
1685  *
1686  * These fields are set at creation time and
1687  * read only after that:
1688  *      os_open_owner
1689  *      os_orig_oo_name
1690  */
1691 typedef struct nfs4_open_stream {
1692         uint64_t                os_share_acc_read;
1693         uint64_t                os_share_acc_write;
1694         uint64_t                os_mmap_read;
1695         uint64_t                os_mmap_write;
1696         uint32_t                os_share_deny_none;
1697         uint32_t                os_share_deny_read;
1698         uint32_t                os_share_deny_write;
1699         stateid4                open_stateid;
1700         int                     os_dc_openacc;
1701         int                     os_ref_count;
1702         unsigned                os_valid:1;
1703         unsigned                os_delegation:1;
1704         unsigned                os_final_close:1;
1705         unsigned                os_pending_close:1;
1706         unsigned                os_failed_reopen:1;
1707         unsigned                os_force_close:1;
1708         int                     os_open_ref_count;
1709         long                    os_mapcnt;
1710         list_node_t             os_node;
1711         struct nfs4_open_owner  *os_open_owner;
1712         uint64_t                os_orig_oo_name;
1713         kmutex_t                os_sync_lock;
1714 } nfs4_open_stream_t;
1715 
1716 /*
1717  * This structure describes the format of the lock_owner_name
1718  * field of the lock owner.
1719  */
1720 
1721 typedef struct nfs4_lo_name {
1722         uint64_t        ln_seq_num;
1723         pid_t           ln_pid;
1724 } nfs4_lo_name_t;
1725 
1726 /*
1727  * Flags for lo_flags.
1728  */
1729 #define NFS4_LOCK_SEQID_INUSE   0x1
1730 #define NFS4_BAD_SEQID_LOCK     0x2
1731 
1732 /*
1733  * The lo_prev_rnode and lo_next_rnode are for a circular list that hangs
1734  * off the rnode.  If the links are NULL it means this object is not on the
1735  * list.
1736  *
1737  * 'lo_pending_rqsts' is non-zero if we ever tried to send a request and
1738  * didn't get a response back.  This is used to figure out if we have
1739  * possible remote v4 locks, so that we can clean up at process exit.  In
1740  * theory, the client should be able to figure out if the server received
1741  * the request (based on what seqid works), so maybe we can get rid of this
1742  * flag someday.
1743  *
1744  * 'lo_ref_count' tells us how many processes/threads are using this data
1745  * structure.  The rnode's list accounts for one reference.
1746  *
1747  * 'lo_just_created' is set to NFS4_JUST_CREATED when we first create the
1748  * data structure.  It is then set to NFS4_PERM_CREATED when a lock request
1749  * is successful using this lock owner structure.  We need to keep 'temporary'
1750  * lock owners around so we can properly keep the lock seqid synchronization
1751  * when multiple processes/threads are trying to create the lock owner for the
1752  * first time (especially with the DENIED error case).  Once
1753  * 'lo_just_created' is set to NFS4_PERM_CREATED, it doesn't change.
1754  *
1755  * 'lo_valid' tells us whether this structure is about to be freed or not,
1756  * if it is then don't return it from find_lock_owner().
1757  *
1758  * Retrieving and setting of 'lock_seqid' is protected by the
1759  * NFS4_LOCK_SEQID_INUSE flag.  Waiters for NFS4_LOCK_SEQID_INUSE should
1760  * use 'lo_cv_seqid_sync'.
1761  *
1762  * The setting of 'lock_stateid' is protected by the
1763  * NFS4_LOCK_SEQID_INUSE flag and 'lo_lock'.  The retrieving of the
1764  * 'lock_stateid' is protected by 'lo_lock', with the additional
1765  * requirement that the calling function can handle NFS4ERR_OLD_STATEID and
1766  * NFS4ERR_BAD_STATEID as appropiate.
1767  *
1768  * The setting of NFS4_BAD_SEQID_LOCK to lo_flags tells us whether this lock
1769  * owner used a bad seqid (that is, got NFS4ERR_BAD_SEQID).  With this set,
1770  * this lock owner will no longer be used for future OTW calls.  Once set,
1771  * it is never unset.
1772  *
1773  * Lock ordering:
1774  * rnode4_t::r_statev4_lock > lo_lock
1775  */
1776 typedef struct nfs4_lock_owner {
1777         struct nfs4_lock_owner  *lo_next_rnode;
1778         struct nfs4_lock_owner  *lo_prev_rnode;
1779         int                     lo_pid;
1780         stateid4                lock_stateid;
1781         seqid4                  lock_seqid;
1782         /*
1783          * Fix this to always be 12 bytes
1784          */
1785         nfs4_lo_name_t          lock_owner_name;
1786         int                     lo_ref_count;
1787         int                     lo_valid;
1788         int                     lo_pending_rqsts;
1789         int                     lo_just_created;
1790         int                     lo_flags;
1791         kcondvar_t              lo_cv_seqid_sync;
1792         kmutex_t                lo_lock;
1793         kthread_t               *lo_seqid_holder; /* debugging aid */
1794 } nfs4_lock_owner_t;
1795 
1796 /* for nfs4_lock_owner_t lookups */
1797 typedef enum {LOWN_ANY, LOWN_VALID_STATEID} lown_which_t;
1798 
1799 /* Number of times to retry a call that fails with state independent error */
1800 #define NFS4_NUM_RECOV_RETRIES  3
1801 
1802 typedef enum {
1803         NO_SID,
1804         DEL_SID,
1805         LOCK_SID,
1806         OPEN_SID,
1807         SPEC_SID
1808 } nfs4_stateid_type_t;
1809 
1810 typedef struct nfs4_stateid_types {
1811         stateid4 d_sid;
1812         stateid4 l_sid;
1813         stateid4 o_sid;
1814         nfs4_stateid_type_t cur_sid_type;
1815 } nfs4_stateid_types_t;
1816 
1817 /*
1818  * Per-zone data for dealing with callbacks.  Included here solely for the
1819  * benefit of MDB.
1820  */
1821 struct nfs4_callback_stats {
1822         kstat_named_t   delegations;
1823         kstat_named_t   cb_getattr;
1824         kstat_named_t   cb_recall;
1825         kstat_named_t   cb_null;
1826         kstat_named_t   cb_dispatch;
1827         kstat_named_t   delegaccept_r;
1828         kstat_named_t   delegaccept_rw;
1829         kstat_named_t   delegreturn;
1830         kstat_named_t   callbacks;
1831         kstat_named_t   claim_cur;
1832         kstat_named_t   claim_cur_ok;
1833         kstat_named_t   recall_trunc;
1834         kstat_named_t   recall_failed;
1835         kstat_named_t   return_limit_write;
1836         kstat_named_t   return_limit_addmap;
1837         kstat_named_t   deleg_recover;
1838         kstat_named_t   cb_illegal;
1839 };
1840 
1841 struct nfs4_callback_globals {
1842         kmutex_t nfs4_cb_lock;
1843         kmutex_t nfs4_dlist_lock;
1844         int nfs4_program_hint;
1845         /* this table maps the program number to the nfs4_server structure */
1846         struct nfs4_server **nfs4prog2server;
1847         list_t nfs4_dlist;
1848         list_t nfs4_cb_ports;
1849         struct nfs4_callback_stats nfs4_callback_stats;
1850 #ifdef DEBUG
1851         int nfs4_dlistadd_c;
1852         int nfs4_dlistclean_c;
1853 #endif
1854 };
1855 
1856 typedef enum {
1857         CLOSE_NORM,
1858         CLOSE_DELMAP,
1859         CLOSE_FORCE,
1860         CLOSE_RESEND,
1861         CLOSE_AFTER_RESEND
1862 } nfs4_close_type_t;
1863 
1864 /*
1865  * Structure to hold the bad seqid information that is passed
1866  * to the recovery framework.
1867  */
1868 typedef struct nfs4_bseqid_entry {
1869         nfs4_open_owner_t       *bs_oop;
1870         nfs4_lock_owner_t       *bs_lop;
1871         vnode_t                 *bs_vp;
1872         pid_t                   bs_pid;
1873         nfs4_tag_type_t         bs_tag;
1874         seqid4                  bs_seqid;
1875         list_node_t             bs_node;
1876 } nfs4_bseqid_entry_t;
1877 
1878 #ifdef _KERNEL
1879 
1880 extern void     nfs4close_one(vnode_t *, nfs4_open_stream_t *, cred_t *, int,
1881                     nfs4_lost_rqst_t *, nfs4_error_t *, nfs4_close_type_t,
1882                     size_t, uint_t, uint_t);
1883 extern void     nfs4close_notw(vnode_t *, nfs4_open_stream_t *, int *);
1884 extern void     nfs4_set_lock_stateid(nfs4_lock_owner_t *, stateid4);
1885 extern void     open_owner_hold(nfs4_open_owner_t *);
1886 extern void     open_owner_rele(nfs4_open_owner_t *);
1887 extern nfs4_open_stream_t       *find_or_create_open_stream(nfs4_open_owner_t *,
1888                                         struct rnode4 *, int *);
1889 extern nfs4_open_stream_t *find_open_stream(nfs4_open_owner_t *,
1890                                 struct rnode4 *);
1891 extern nfs4_open_stream_t *create_open_stream(nfs4_open_owner_t *oop,
1892                                 struct rnode4 *rp);
1893 extern void     open_stream_hold(nfs4_open_stream_t *);
1894 extern void     open_stream_rele(nfs4_open_stream_t *, struct rnode4 *);
1895 extern int      nfs4close_all(vnode_t *, cred_t *);
1896 extern void     lock_owner_hold(nfs4_lock_owner_t *);
1897 extern void     lock_owner_rele(nfs4_lock_owner_t *);
1898 extern nfs4_lock_owner_t *create_lock_owner(struct rnode4 *, pid_t);
1899 extern nfs4_lock_owner_t *find_lock_owner(struct rnode4 *, pid_t, lown_which_t);
1900 extern void     nfs4_rnode_remove_lock_owner(struct rnode4 *,
1901                         nfs4_lock_owner_t *);
1902 extern void     nfs4_flush_lock_owners(struct rnode4 *);
1903 extern void nfs4_setlockowner_args(lock_owner4 *, struct rnode4 *, pid_t);
1904 extern void     nfs4_set_open_seqid(seqid4, nfs4_open_owner_t *,
1905                     nfs4_tag_type_t);
1906 extern void     nfs4_set_lock_seqid(seqid4, nfs4_lock_owner_t *);
1907 extern void     nfs4_get_and_set_next_open_seqid(nfs4_open_owner_t *,
1908                     nfs4_tag_type_t);
1909 extern void     nfs4_end_open_seqid_sync(nfs4_open_owner_t *);
1910 extern int      nfs4_start_open_seqid_sync(nfs4_open_owner_t *, mntinfo4_t *);
1911 extern void     nfs4_end_lock_seqid_sync(nfs4_lock_owner_t *);
1912 extern int      nfs4_start_lock_seqid_sync(nfs4_lock_owner_t *, mntinfo4_t *);
1913 extern void     nfs4_setup_lock_args(nfs4_lock_owner_t *, nfs4_open_owner_t *,
1914                         nfs4_open_stream_t *, clientid4, locker4 *);
1915 extern void     nfs4_destroy_open_owner(nfs4_open_owner_t *);
1916 
1917 extern void             nfs4_renew_lease_thread(nfs4_server_t *);
1918 extern nfs4_server_t    *find_nfs4_server(mntinfo4_t *);
1919 extern nfs4_server_t    *find_nfs4_server_all(mntinfo4_t *, int all);
1920 extern nfs4_server_t    *new_nfs4_server(servinfo4_t *, cred_t *);
1921 extern void             nfs4_mark_srv_dead(nfs4_server_t *);
1922 extern nfs4_server_t    *servinfo4_to_nfs4_server(servinfo4_t *);
1923 extern void             nfs4_inc_state_ref_count(mntinfo4_t *);
1924 extern void             nfs4_inc_state_ref_count_nolock(nfs4_server_t *,
1925                                 mntinfo4_t *);
1926 extern void             nfs4_dec_state_ref_count(mntinfo4_t *);
1927 extern void             nfs4_dec_state_ref_count_nolock(nfs4_server_t *,
1928                                 mntinfo4_t *);
1929 extern clientid4        mi2clientid(mntinfo4_t *);
1930 extern int              nfs4_server_in_recovery(nfs4_server_t *);
1931 extern bool_t           nfs4_server_vlock(nfs4_server_t *, int);
1932 extern nfs4_open_owner_t *create_open_owner(cred_t *, mntinfo4_t *);
1933 extern uint64_t         nfs4_get_new_oo_name(void);
1934 extern nfs4_open_owner_t *find_open_owner(cred_t *, int, mntinfo4_t *);
1935 extern nfs4_open_owner_t *find_open_owner_nolock(cred_t *, int, mntinfo4_t *);
1936 extern void     nfs4frlock(nfs4_lock_call_type_t, vnode_t *, int, flock64_t *,
1937                         int, u_offset_t, cred_t *, nfs4_error_t *,
1938                         nfs4_lost_rqst_t *, int *);
1939 extern void     nfs4open_dg_save_lost_rqst(int, nfs4_lost_rqst_t *,
1940                     nfs4_open_owner_t *, nfs4_open_stream_t *, cred_t *,
1941                     vnode_t *, int, int);
1942 extern void     nfs4_open_downgrade(int, int, nfs4_open_owner_t *,
1943                     nfs4_open_stream_t *, vnode_t *, cred_t *,
1944                     nfs4_lost_rqst_t *, nfs4_error_t *, cred_t **, seqid4 *);
1945 extern seqid4   nfs4_get_open_seqid(nfs4_open_owner_t *);
1946 extern cred_t   *nfs4_get_otw_cred(cred_t *, mntinfo4_t *, nfs4_open_owner_t *);
1947 extern void     nfs4_init_stateid_types(nfs4_stateid_types_t *);
1948 extern void     nfs4_save_stateid(stateid4 *, nfs4_stateid_types_t *);
1949 
1950 extern kmutex_t nfs4_server_lst_lock;
1951 
1952 extern void     nfs4callback_destroy(nfs4_server_t *);
1953 extern void     nfs4_callback_init(void);
1954 extern void     nfs4_callback_fini(void);
1955 extern void     nfs4_cb_args(nfs4_server_t *, struct knetconfig *,
1956                         SETCLIENTID4args *);
1957 extern void     nfs4delegreturn_async(struct rnode4 *, int, bool_t);
1958 
1959 extern enum nfs4_delegreturn_policy nfs4_delegreturn_policy;
1960 
1961 extern void     nfs4_add_mi_to_server(nfs4_server_t *, mntinfo4_t *);
1962 extern void     nfs4_remove_mi_from_server(mntinfo4_t *, nfs4_server_t *);
1963 extern nfs4_server_t *nfs4_move_mi(mntinfo4_t *, servinfo4_t *, servinfo4_t *);
1964 extern bool_t   nfs4_fs_active(nfs4_server_t *);
1965 extern void     nfs4_server_rele(nfs4_server_t *);
1966 extern bool_t   inlease(nfs4_server_t *);
1967 extern bool_t   nfs4_has_pages(vnode_t *);
1968 extern void     nfs4_log_badowner(mntinfo4_t *, nfs_opnum4);
1969 
1970 #endif /* _KERNEL */
1971 
1972 /*
1973  * Client State Recovery
1974  */
1975 
1976 /*
1977  * The following defines are used for rs_flags in
1978  * a nfs4_recov_state_t structure.
1979  *
1980  * NFS4_RS_RENAME_HELD          Indicates that the mi_rename_lock was held.
1981  * NFS4_RS_GRACE_MSG            Set once we have uprintf'ed a grace message.
1982  * NFS4_RS_DELAY_MSG            Set once we have uprintf'ed a delay message.
1983  * NFS4_RS_RECALL_HELD1         r_deleg_recall_lock for vp1 was held.
1984  * NFS4_RS_RECALL_HELD2         r_deleg_recall_lock for vp2 was held.
1985  */
1986 #define NFS4_RS_RENAME_HELD     0x000000001
1987 #define NFS4_RS_GRACE_MSG       0x000000002
1988 #define NFS4_RS_DELAY_MSG       0x000000004
1989 #define NFS4_RS_RECALL_HELD1    0x000000008
1990 #define NFS4_RS_RECALL_HELD2    0x000000010
1991 
1992 /*
1993  * Information that is retrieved from nfs4_start_op() and that is
1994  * passed into nfs4_end_op().
1995  *
1996  * rs_sp is a reference to the nfs4_server that was found, or NULL.
1997  *
1998  * rs_num_retry_despite_err is the number times client retried an
1999  * OTW op despite a recovery error.  It is only incremented for hints
2000  * exempt to normal R4RECOVERR processing
2001  * (OH_CLOSE/OH_LOCKU/OH_DELEGRETURN).  (XXX this special-case code
2002  * needs review for possible removal.)
2003  * It is initialized wherever nfs4_recov_state_t is declared -- usually
2004  * very near initialization of rs_flags.
2005  */
2006 typedef struct {
2007         nfs4_server_t   *rs_sp;
2008         int             rs_flags;
2009         int             rs_num_retry_despite_err;
2010 } nfs4_recov_state_t;
2011 
2012 /*
2013  * Flags for nfs4_check_remap, nfs4_remap_file and nfs4_remap_root.
2014  */
2015 
2016 #define NFS4_REMAP_CKATTRS      1
2017 #define NFS4_REMAP_NEEDSOP      2
2018 
2019 #ifdef _KERNEL
2020 
2021 extern int      nfs4_is_otw_open_necessary(nfs4_open_owner_t *, int,
2022                         vnode_t *, int, int *, int, nfs4_recov_state_t *);
2023 extern void     nfs4setclientid(struct mntinfo4 *, struct cred *, bool_t,
2024                         nfs4_error_t *);
2025 extern void     nfs4_reopen(vnode_t *, nfs4_open_stream_t *, nfs4_error_t *,
2026                         open_claim_type4, bool_t, bool_t);
2027 extern void     nfs4_remap_root(struct mntinfo4 *, nfs4_error_t *, int);
2028 extern void     nfs4_check_remap(mntinfo4_t *mi, vnode_t *vp, int,
2029                         nfs4_error_t *);
2030 extern void     nfs4_remap_file(mntinfo4_t *mi, vnode_t *vp, int,
2031                         nfs4_error_t *);
2032 extern int      nfs4_make_dotdot(struct nfs4_sharedfh *, hrtime_t,
2033                         vnode_t *, cred_t *, vnode_t **, int);
2034 extern void     nfs4_fail_recov(vnode_t *, char *, int, nfsstat4);
2035 
2036 extern int      nfs4_needs_recovery(nfs4_error_t *, bool_t, vfs_t *);
2037 extern int      nfs4_recov_marks_dead(nfsstat4);
2038 extern bool_t   nfs4_start_recovery(nfs4_error_t *, struct mntinfo4 *,
2039                         vnode_t *, vnode_t *, stateid4 *,
2040                         nfs4_lost_rqst_t *, nfs_opnum4, nfs4_bseqid_entry_t *,
2041                         vnode_t *, char *);
2042 extern int      nfs4_start_op(struct mntinfo4 *, vnode_t *, vnode_t *,
2043                         nfs4_recov_state_t *);
2044 extern void     nfs4_end_op(struct mntinfo4 *, vnode_t *, vnode_t *,
2045                         nfs4_recov_state_t *, bool_t);
2046 extern int      nfs4_start_fop(struct mntinfo4 *, vnode_t *, vnode_t *,
2047                         nfs4_op_hint_t, nfs4_recov_state_t *, bool_t *);
2048 extern void     nfs4_end_fop(struct mntinfo4 *, vnode_t *, vnode_t *,
2049                                 nfs4_op_hint_t, nfs4_recov_state_t *, bool_t);
2050 extern char     *nfs4_recov_action_to_str(nfs4_recov_t);
2051 
2052 /*
2053  * In sequence, code desiring to unmount an ephemeral tree must
2054  * call nfs4_ephemeral_umount, nfs4_ephemeral_umount_activate,
2055  * and nfs4_ephemeral_umount_unlock. The _unlock must also be
2056  * called on all error paths that occur before it would naturally
2057  * be invoked.
2058  *
2059  * The caller must also provde a pointer to a boolean to keep track
2060  * of whether or not the code in _unlock is to be ran.
2061  */
2062 extern void     nfs4_ephemeral_umount_activate(mntinfo4_t *,
2063     bool_t *, nfs4_ephemeral_tree_t **);
2064 extern int      nfs4_ephemeral_umount(mntinfo4_t *, int, cred_t *,
2065     bool_t *, nfs4_ephemeral_tree_t **);
2066 extern void     nfs4_ephemeral_umount_unlock(bool_t *,
2067     nfs4_ephemeral_tree_t **);
2068 
2069 extern int      nfs4_record_ephemeral_mount(mntinfo4_t *mi, vnode_t *mvp);
2070 
2071 extern int      nfs4_callmapid(utf8string *, struct nfs_fsl_info *);
2072 extern int      nfs4_fetch_locations(mntinfo4_t *, struct nfs4_sharedfh *,
2073     char *, cred_t *, nfs4_ga_res_t *, COMPOUND4res_clnt *, bool_t);
2074 
2075 extern int      wait_for_recall(vnode_t *, vnode_t *, nfs4_op_hint_t,
2076                         nfs4_recov_state_t *);
2077 extern void     nfs4_end_op_recall(vnode_t *, vnode_t *, nfs4_recov_state_t *);
2078 extern void     nfs4_send_siglost(pid_t, mntinfo4_t *mi, vnode_t *vp, bool_t,
2079                     int, nfsstat4);
2080 extern time_t   nfs4err_delay_time;
2081 extern void     nfs4_set_grace_wait(mntinfo4_t *);
2082 extern void     nfs4_set_delay_wait(vnode_t *);
2083 extern int      nfs4_wait_for_grace(mntinfo4_t *, nfs4_recov_state_t *);
2084 extern int      nfs4_wait_for_delay(vnode_t *, nfs4_recov_state_t *);
2085 extern nfs4_bseqid_entry_t *nfs4_create_bseqid_entry(nfs4_open_owner_t *,
2086                     nfs4_lock_owner_t *, vnode_t *, pid_t, nfs4_tag_type_t,
2087                     seqid4);
2088 
2089 extern void     nfs4_resend_open_otw(vnode_t **, nfs4_lost_rqst_t *,
2090                         nfs4_error_t *);
2091 extern void     nfs4_resend_delegreturn(nfs4_lost_rqst_t *, nfs4_error_t *,
2092                         nfs4_server_t *);
2093 extern int      nfs4_rpc_retry_error(int);
2094 extern int      nfs4_try_failover(nfs4_error_t *);
2095 extern void     nfs4_free_msg(nfs4_debug_msg_t *);
2096 extern void     nfs4_mnt_recov_kstat_init(vfs_t *);
2097 extern void     nfs4_mi_kstat_inc_delay(mntinfo4_t *);
2098 extern void     nfs4_mi_kstat_inc_no_grace(mntinfo4_t *);
2099 extern char     *nfs4_stat_to_str(nfsstat4);
2100 extern char     *nfs4_op_to_str(nfs_opnum4);
2101 
2102 extern void     nfs4_queue_event(nfs4_event_type_t, mntinfo4_t *, char *,
2103                     uint_t, vnode_t *, vnode_t *, nfsstat4, char *, pid_t,
2104                     nfs4_tag_type_t, nfs4_tag_type_t, seqid4, seqid4);
2105 extern void     nfs4_queue_fact(nfs4_fact_type_t, mntinfo4_t *, nfsstat4,
2106                     nfs4_recov_t, nfs_opnum4, bool_t, char *, int, vnode_t *);
2107 #pragma rarely_called(nfs4_queue_event)
2108 #pragma rarely_called(nfs4_queue_fact)
2109 
2110 /* Used for preformed "." and ".." dirents */
2111 extern char     *nfs4_dot_entries;
2112 extern char     *nfs4_dot_dot_entry;
2113 
2114 #ifdef  DEBUG
2115 extern uint_t   nfs4_tsd_key;
2116 #endif
2117 
2118 #endif /* _KERNEL */
2119 
2120 /*
2121  * Filehandle management.
2122  *
2123  * Filehandles can change in v4, so rather than storing the filehandle
2124  * directly in the rnode, etc., we manage the filehandle through one of
2125  * these objects.
2126  * Locking: sfh_fh and sfh_tree is protected by the filesystem's
2127  * mi_fh_lock.  The reference count and flags are protected by sfh_lock.
2128  * sfh_mi is read-only.
2129  *
2130  * mntinfo4_t::mi_fh_lock > sfh_lock.
2131  */
2132 
2133 typedef struct nfs4_sharedfh {
2134         nfs_fh4 sfh_fh;                 /* key and current filehandle */
2135         kmutex_t sfh_lock;
2136         uint_t sfh_refcnt;              /* reference count */
2137         uint_t sfh_flags;
2138         mntinfo4_t *sfh_mi;             /* backptr to filesystem */
2139         avl_node_t sfh_tree;            /* used by avl package */
2140 } nfs4_sharedfh_t;
2141 
2142 #define SFH4_SAME(sfh1, sfh2)   ((sfh1) == (sfh2))
2143 
2144 /*
2145  * Flags.
2146  */
2147 #define SFH4_IN_TREE    0x1             /* currently in an AVL tree */
2148 
2149 #ifdef _KERNEL
2150 
2151 extern void sfh4_createtab(avl_tree_t *);
2152 extern nfs4_sharedfh_t *sfh4_get(const nfs_fh4 *, mntinfo4_t *);
2153 extern nfs4_sharedfh_t *sfh4_put(const nfs_fh4 *, mntinfo4_t *,
2154                                 nfs4_sharedfh_t *);
2155 extern void sfh4_update(nfs4_sharedfh_t *, const nfs_fh4 *);
2156 extern void sfh4_copyval(const nfs4_sharedfh_t *, nfs4_fhandle_t *);
2157 extern void sfh4_hold(nfs4_sharedfh_t *);
2158 extern void sfh4_rele(nfs4_sharedfh_t **);
2159 extern void sfh4_printfhandle(const nfs4_sharedfh_t *);
2160 
2161 #endif
2162 
2163 /*
2164  * Path and file name management.
2165  *
2166  * This type stores the name of an entry in the filesystem and keeps enough
2167  * information that it can provide a complete path.  All fields are
2168  * protected by fn_lock, except for the reference count, which is managed
2169  * using atomic add/subtract.
2170  *
2171  * Additionally shared filehandle for this fname is stored.
2172  * Normally, fn_get() when it creates this fname stores the passed in
2173  * shared fh in fn_sfh by doing sfh_hold. Similarly the path which
2174  * destroys this fname releases the reference on this fh by doing sfh_rele.
2175  *
2176  * fn_get uses the fn_sfh to refine the comparision in cases
2177  * where we have matched the name but have differing file handles,
2178  * this normally happens due to
2179  *
2180  *      1. Server side rename of a file/directory.
2181  *      2. Another client renaming a file/directory on the server.
2182  *
2183  * Differing names but same filehandle is possible as in the case of hardlinks,
2184  * but differing filehandles with same name component will later confuse
2185  * the client and can cause various panics.
2186  *
2187  * Lock order: child and then parent.
2188  */
2189 
2190 typedef struct nfs4_fname {
2191         struct nfs4_fname *fn_parent;   /* parent name; null if fs root */
2192         char *fn_name;                  /* the actual name */
2193         ssize_t fn_len;                 /* strlen(fn_name) */
2194         uint32_t fn_refcnt;             /* reference count */
2195         kmutex_t fn_lock;
2196         avl_node_t fn_tree;
2197         avl_tree_t fn_children;         /* children, if any */
2198         nfs4_sharedfh_t *fn_sfh;        /* The fh for this fname */
2199 } nfs4_fname_t;
2200 
2201 #ifdef _KERNEL
2202 
2203 extern vnode_t  nfs4_xattr_notsupp_vnode;
2204 #define NFS4_XATTR_DIR_NOTSUPP  &nfs4_xattr_notsupp_vnode
2205 
2206 extern nfs4_fname_t *fn_get(nfs4_fname_t *, char *, nfs4_sharedfh_t *);
2207 extern void fn_hold(nfs4_fname_t *);
2208 extern void fn_rele(nfs4_fname_t **);
2209 extern char *fn_name(nfs4_fname_t *);
2210 extern char *fn_path(nfs4_fname_t *);
2211 extern void fn_move(nfs4_fname_t *, nfs4_fname_t *, char *);
2212 extern nfs4_fname_t *fn_parent(nfs4_fname_t *);
2213 
2214 /* Referral Support */
2215 extern int nfs4_process_referral(mntinfo4_t *, nfs4_sharedfh_t *, char *,
2216     cred_t *, nfs4_ga_res_t *, COMPOUND4res_clnt *, struct nfs_fsl_info *);
2217 
2218 #endif
2219 
2220 /*
2221  * Per-zone data for managing client handles, included in this file for the
2222  * benefit of MDB.
2223  */
2224 struct nfs4_clnt {
2225         struct chhead   *nfscl_chtable4;
2226         kmutex_t        nfscl_chtable4_lock;
2227         zoneid_t        nfscl_zoneid;
2228         list_node_t     nfscl_node;
2229         struct clstat4  nfscl_stat;
2230 };
2231 
2232 #ifdef  __cplusplus
2233 }
2234 #endif
2235 
2236 #endif /* _NFS4_CLNT_H */