1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
  24  * Use is subject to license terms.
  25  */
  26 
  27 /*
  28  * Copyright 2018 Nexenta Systems, Inc.
  29  * Copyright 2019 Nexenta by DDN, Inc.
  30  */
  31 
  32 #ifndef _NFS4_H
  33 #define _NFS4_H
  34 
  35 #include <sys/types.h>
  36 #include <sys/vnode.h>
  37 #include <sys/fem.h>
  38 #include <rpc/rpc.h>
  39 #include <nfs/nfs.h>
  40 
  41 #ifdef _KERNEL
  42 #include <nfs/nfs4_kprot.h>
  43 #include <nfs/nfs4_drc.h>
  44 #include <sys/nvpair.h>
  45 #else
  46 #include <rpcsvc/nfs4_prot.h>
  47 #endif
  48 #include <nfs/nfs4_attr.h>
  49 #include <sys/acl.h>
  50 #include <sys/list.h>
  51 
  52 #ifdef  __cplusplus
  53 extern "C" {
  54 #endif
  55 
  56 #define NFS4_MAX_SECOID4        65536
  57 #define NFS4_MAX_UTF8STRING     65536
  58 #define NFS4_MAX_LINKTEXT4      65536
  59 #define NFS4_MAX_PATHNAME4      65536
  60 
  61 struct nfs_fsl_info {
  62         uint_t netbuf_len;
  63         uint_t netnm_len;
  64         uint_t knconf_len;
  65         char *netname;
  66         struct netbuf *addr;
  67         struct knetconfig *knconf;
  68 };
  69 
  70 #ifdef _KERNEL
  71 
  72 typedef struct nfs4_fhandle {
  73         int fh_len;
  74         char fh_buf[NFS4_FHSIZE];
  75 } nfs4_fhandle_t;
  76 
  77 #define NFS4_MINORVERSION 0
  78 #define CB4_MINORVERSION 0
  79 
  80 /*
  81  * Set the fattr4_change variable using a time struct. Note that change
  82  * is 64 bits, but timestruc_t is 128 bits in a 64-bit kernel.
  83  */
  84 #define NFS4_SET_FATTR4_CHANGE(change, ts)                      \
  85 {                                                       \
  86         change = (ts).tv_sec;                           \
  87         change <<= 32;                                    \
  88         change |= (uint32_t)((ts).tv_nsec);             \
  89 }
  90 
  91 /*
  92  * Server lease period.  Value is in seconds;  Also used for grace period
  93  */
  94 extern time_t rfs4_lease_time;
  95 
  96 /*
  97  * This set of typedefs and interfaces represent the core or base set
  98  * of functionality that backs the NFSv4 server's state related data
  99  * structures.  Since the NFSv4 server needs inter-RPC state to be
 100  * available that is unrelated to the filesystem (in other words,
 101  * soft-state), this functionality is needed to maintain that and is
 102  * written to be somewhat flexible to adapt to the various types of
 103  * data structures contained within the server.
 104  *
 105  * The basic structure at this level is that the server maintains a
 106  * global "database" which consists of a set of tables.  Each table
 107  * contains a set of like data structures.  Each table is indexed by
 108  * at least one hash function and in most cases two hashes.  Each
 109  * table's characteristics is set when it is created at run-time via
 110  * rfs4_table_create().  All table creation and related functions are
 111  * located in nfs4_state.c.  The generic database functionality is
 112  * located in nfs4_db.c.
 113  */
 114 
 115 typedef struct rfs4_dbe rfs4_dbe_t;             /* basic opaque db entry */
 116 typedef struct rfs4_table rfs4_table_t;         /* basic table type */
 117 typedef struct rfs4_index rfs4_index_t;         /* index */
 118 typedef struct rfs4_database rfs4_database_t;   /* and database */
 119 
 120 typedef struct {                /* opaque entry type for later use */
 121         rfs4_dbe_t *dbe;
 122 } *rfs4_entry_t;
 123 
 124 /*
 125  * NFSv4 server state databases
 126  *
 127  * Initilized when the module is loaded and used by NFSv4 state tables.
 128  * These kmem_cache free pools are used globally, the NFSv4 state tables
 129  * which make use of these kmem_cache free pools are per zone.
 130  */
 131 extern kmem_cache_t *rfs4_client_mem_cache;
 132 extern kmem_cache_t *rfs4_clntIP_mem_cache;
 133 extern kmem_cache_t *rfs4_openown_mem_cache;
 134 extern kmem_cache_t *rfs4_openstID_mem_cache;
 135 extern kmem_cache_t *rfs4_lockstID_mem_cache;
 136 extern kmem_cache_t *rfs4_lockown_mem_cache;
 137 extern kmem_cache_t *rfs4_file_mem_cache;
 138 extern kmem_cache_t *rfs4_delegstID_mem_cache;
 139 
 140 /* database, table, index creation entry points */
 141 extern rfs4_database_t *rfs4_database_create(uint32_t);
 142 extern void             rfs4_database_shutdown(rfs4_database_t *);
 143 extern void             rfs4_database_destroy(rfs4_database_t *);
 144 
 145 extern void             rfs4_database_destroy(rfs4_database_t *);
 146 
 147 extern kmem_cache_t     *nfs4_init_mem_cache(char *, uint32_t, uint32_t,
 148                                 uint32_t);
 149 extern rfs4_table_t     *rfs4_table_create(rfs4_database_t *, char *,
 150                                 time_t, uint32_t,
 151                                 bool_t (*create)(rfs4_entry_t, void *),
 152                                 void (*destroy)(rfs4_entry_t),
 153                                 bool_t (*expiry)(rfs4_entry_t),
 154                                 uint32_t, uint32_t, uint32_t, id_t);
 155 extern void             rfs4_table_destroy(rfs4_database_t *, rfs4_table_t *);
 156 extern rfs4_index_t     *rfs4_index_create(rfs4_table_t *, char *,
 157                                 uint32_t (*hash)(void *),
 158                                 bool_t (compare)(rfs4_entry_t, void *),
 159                                 void *(*mkkey)(rfs4_entry_t), bool_t);
 160 extern void             rfs4_index_destroy(rfs4_index_t *);
 161 
 162 /* Type used to direct rfs4_dbsearch() in what types of records to inspect */
 163 typedef enum {RFS4_DBS_VALID, RFS4_DBS_INVALID} rfs4_dbsearch_type_t;
 164 /* search and db entry manipulation entry points */
 165 extern rfs4_entry_t     rfs4_dbsearch(rfs4_index_t *, void *,
 166                                 bool_t *, void *, rfs4_dbsearch_type_t);
 167 extern void             rfs4_dbe_lock(rfs4_dbe_t *);
 168 extern void             rfs4_dbe_unlock(rfs4_dbe_t *);
 169 extern clock_t          rfs4_dbe_twait(rfs4_dbe_t *, clock_t);
 170 extern void             rfs4_dbe_cv_broadcast(rfs4_dbe_t *);
 171 extern void             rfs4_dbe_hold(rfs4_dbe_t *);
 172 extern void             rfs4_dbe_hold_nolock(rfs4_dbe_t *);
 173 extern void             rfs4_dbe_rele_nolock(rfs4_dbe_t *);
 174 extern void             rfs4_dbe_rele(rfs4_dbe_t *);
 175 extern uint32_t rfs4_dbe_refcnt(rfs4_dbe_t *);
 176 extern id_t             rfs4_dbe_getid(rfs4_dbe_t *);
 177 extern void             rfs4_dbe_invalidate(rfs4_dbe_t *);
 178 extern bool_t           rfs4_dbe_is_invalid(rfs4_dbe_t *);
 179 extern time_t           rfs4_dbe_get_timerele(rfs4_dbe_t *);
 180 extern void             rfs4_dbe_hide(rfs4_dbe_t *);
 181 extern void             rfs4_dbe_unhide(rfs4_dbe_t *);
 182 #ifdef DEBUG
 183 extern bool_t           rfs4_dbe_islocked(rfs4_dbe_t *);
 184 #endif
 185 extern void             rfs4_dbe_walk(rfs4_table_t *,
 186                         void (*callout)(rfs4_entry_t, void *), void *);
 187 
 188 /*
 189  * Minimal server stable storage.
 190  *
 191  * Currently the NFSv4 server will only save the client
 192  * ID (the long version) so that it will be able to
 193  * grant possible reclaim requests during the infamous
 194  * grace_period.
 195  */
 196 
 197 #define RFS4_SS_DIRSIZE 64 * 1024
 198 #define NFS4_SS_VERSION 1
 199 
 200 /* handy pathname structure */
 201 typedef struct ss_pn {
 202         char *leaf;
 203         char pn[MAXPATHLEN];
 204 } rfs4_ss_pn_t;
 205 
 206 /*
 207  * The server will build this link list on startup. It represents the
 208  * clients that have had valid state on the server in a prior instance.
 209  *
 210  */
 211 typedef struct rfs4_oldstate {
 212         struct rfs4_oldstate    *next;
 213         struct rfs4_oldstate    *prev;
 214         rfs4_ss_pn_t            *ss_pn;
 215         nfs_client_id4          cl_id4;
 216 } rfs4_oldstate_t;
 217 
 218 /*
 219  * This union is used to overlay the server's internal treatment of
 220  * the protocols stateid4 datatype.  Therefore, "bits" must not exceed
 221  * the size of stateid4 and more importantly should match the size of
 222  * stateid4.  The chgseq field must the first entry since it overlays
 223  * stateid4.seqid.
 224  */
 225 typedef union {
 226         stateid4 stateid;
 227         struct {
 228                 uint32_t chgseq;        /* State changes / protocol's seqid */
 229                 uint32_t boottime;      /* boot time  */
 230                 uint32_t type:2;        /* stateid_type_t as define below */
 231                 uint32_t clnodeid:8;    /* cluster server nodeid */
 232                 uint32_t ident:22;      /* 2^22-1 openowner x fhs */
 233                 pid_t    pid;           /* pid of corresponding lock owner */
 234         } bits;
 235 } stateid_t;
 236 /*
 237  * Note that the way the type field above is defined, this enum must
 238  * not have more than 4 members.
 239  */
 240 typedef enum {OPENID, LOCKID, DELEGID} stateid_type_t;
 241 
 242 
 243 /*
 244  * Set of RPC credentials used for a particular operation.
 245  * Used for operations like SETCLIENTID_CONFIRM where the
 246  * credentials needs to match those used at SETCLIENTID.
 247  */
 248 typedef void *cred_set_t;               /* For now XXX */
 249 
 250 /*
 251  * "wait" struct for use in the open open and lock owner state
 252  * structures to provide serialization between server threads that are
 253  * handling requests for the same open owner or lock stateid.  This
 254  * way only one thread will be updating things like sequence ids,
 255  * replay cache and stateid at a time.
 256  */
 257 typedef struct rfs4_state_wait {
 258         uint32_t                sw_active;
 259         uint32_t                sw_wait_count;
 260         kmutex_t                sw_cv_lock[1];
 261         kcondvar_t              sw_cv[1];
 262 } rfs4_state_wait_t;
 263 
 264 extern void     rfs4_sw_enter(rfs4_state_wait_t *);
 265 extern void     rfs4_sw_exit(rfs4_state_wait_t *);
 266 
 267 /*
 268  * This enum and the following rfs4_cbinfo_t struct are used to
 269  * maintain information about the callback path used from the server
 270  * to client for operations like CB_GETATTR and CB_RECALL.  The
 271  * rfs4_cbinfo_t struct is meant to be encompassed in the client
 272  * struct and managed within that structure's locking scheme.
 273  *
 274  * The various states of the callback path are used by the server to
 275  * determine if delegations should initially be provided to a client
 276  * and then later on if connectivity has been lost and delegations
 277  * should be revoked.
 278  */
 279 
 280 /*
 281  * CB_NOCHANGE - Special value used for interfaces within the delegation
 282  *              code to signify that "no change" has occurred to the
 283  *              callback path
 284  * CB_UNINIT    - No callback info provided by the client
 285  * CB_NONE      - Callback info provided but CB_NULL call
 286  *                has yet to be attempted
 287  * CB_OK        - Callback path tested with CB_NULL with success
 288  * CB_INPROG    - Callback path currently being tested with CB_NULL
 289  * CB_FAILED    - Callback path was == CB_OK but has failed
 290  *                with timeout/rpc error
 291  * CB_BAD       - Callback info provided but CB_NULL failed
 292  */
 293 typedef enum {
 294         CB_NOCHANGE = 0,
 295         CB_UNINIT = 1,
 296         CB_NONE = 2,
 297         CB_OK = 3,
 298         CB_INPROG = 4,
 299         CB_FAILED = 5,
 300         CB_BAD = 6
 301 } rfs4_cbstate_t;
 302 
 303 #define RFS4_CBCH_MAX   10      /* size callback client handle cache */
 304 /*
 305  * Callback info for a client.
 306  * Client only provides: cb_client4 and cb_ident
 307  * The rest of the information is used to track callback path status
 308  * and usage.
 309  *
 310  * cb_state - used as comments for the rfs4_cbstate_t enum indicate
 311  * cb_notified_of_cb_path_down - if the callback path was once CB_OK and
 312  *      has hence CB_FAILED, the client needs to be notified via RENEW.
 313  * cb_timefailed - current time when cb_state transitioned from
 314  *      CB_OK -> CB_FAILED.  Meant for observability.  When did that happen?
 315  * cb_chc_free/cb_chc - cache of client handles for the callback path
 316  * cb_ident - SETCLIENTID provided callback_ident value
 317  * callback - SETCLIENTID provided cb_client4 value
 318  * cb_refcnt - current number of users of this structure's content
 319  *      protected by cb_lock
 320  * cb_badbehavior - how many times did a client do something we didn't like?
 321  * cb_lock - lock for contents of cbinfo
 322  * cb_cv - used to allow threads to wait on CB_NULL completion
 323  * cb_nullcaller - is there a thread currently taking care of
 324  *      new callback information?
 325  * cb_cv_nullcaller - used by the thread doing CB_NULL to wait on
 326  *      threads that may be using client handles of the current
 327  *      client handle cache.
 328  * newer - new callback info provided by a client and awaiting
 329  *      CB_NULL testing and move to regular cbinfo.
 330  */
 331 typedef struct {
 332         rfs4_cbstate_t  cb_state;
 333         unsigned        cb_notified_of_cb_path_down:1;
 334         time_t          cb_timefailed;
 335         int             cb_chc_free;
 336         CLIENT          *cb_chc[RFS4_CBCH_MAX];
 337         uint32_t        cb_ident;
 338         cb_client4      cb_callback;
 339         uint32_t        cb_refcnt;
 340         uint32_t        cb_badbehavior;
 341         kmutex_t        cb_lock[1];
 342         kcondvar_t      cb_cv[1];
 343         bool_t          cb_nullcaller;
 344         kcondvar_t      cb_cv_nullcaller[1];
 345         struct {
 346                 bool_t          cb_new;
 347                 bool_t          cb_confirmed;
 348                 uint32_t        cb_ident;
 349                 cb_client4      cb_callback;
 350         } cb_newer;
 351 } rfs4_cbinfo_t;
 352 
 353 /*
 354  * A server instance. We can associate sets of clients - via a pointer in
 355  * rfs4_client_t - with a given server instance, allowing us to treat clients
 356  * in the set differently to clients in other sets.
 357  *
 358  * Currently used only for Sun Cluster HA-NFS support, to group clients
 359  * on NFS resource failover so each set of clients gets its own dedicated
 360  * grace period and distributed stable storage data.
 361  */
 362 typedef struct rfs4_servinst {
 363         int                     dss_npaths;
 364         krwlock_t               rwlock;
 365         krwlock_t               oldstate_lock;
 366         time_t                  start_time;
 367         time_t                  grace_period;
 368         rfs4_oldstate_t         *oldstate;
 369         struct rfs4_dss_path    **dss_paths;
 370         struct rfs4_servinst    *next;
 371         struct rfs4_servinst    *prev;
 372 } rfs4_servinst_t;
 373 
 374 /*
 375  * DSS: distributed stable storage
 376  */
 377 
 378 typedef struct rfs4_dss_path {
 379         struct rfs4_dss_path    *next; /* for insque/remque */
 380         struct rfs4_dss_path    *prev; /* for insque/remque */
 381         char                    *path;
 382         struct rfs4_servinst    *sip;
 383         unsigned                index; /* offset in servinst's array */
 384 } rfs4_dss_path_t;
 385 
 386 /* array of paths passed-in from nfsd command-line; stored in nvlist */
 387 char            **rfs4_dss_newpaths;
 388 uint_t          rfs4_dss_numnewpaths;
 389 
 390 /* nvlists of all DSS paths: current, and before last warmstart */
 391 nvlist_t *rfs4_dss_paths, *rfs4_dss_oldpaths;
 392 
 393 /*
 394  * The server maintains a set of state on a per client basis that
 395  * matches that of the protocol requirements.  A client's state is
 396  * rooted with the rfs4_client_t struct of which there is one per
 397  * client and is created when SETCLIENTID/SETCLIENTID_CONFIRM are
 398  * received.  From there, the server then creates rfs4_openowner_t
 399  * structs for each new open owner from that client and are initiated
 400  * at OPEN/OPEN_CONFIRM (when the open owner is new to the server).
 401  * At OPEN, at least two other structures are created, and potentially a
 402  * third.  rfs4_state_t is created to track the association between an
 403  * open owner and a particular file. An rfs4_file_t struct may be
 404  * created (if the file is not already open) at OPEN as well.  The
 405  * rfs4_file_t struct is the only one that is per server and not per
 406  * client.  The rfs4_deleg_state_t struct is created in the
 407  * instance that the server is going to provide a delegation for the
 408  * file being OPENed.  Finally, the rfs4_lockowner_t is created at the
 409  * first use of a lock owner at the server and is a result of the LOCK
 410  * operation.  The rfs4_lo_state_t struct is then created to represent
 411  * the relation between the lock owner and the file.
 412  *
 413  */
 414 /*
 415  * The following ascii art represents each of these data structs and
 416  * their references to each other.  Note: "<-(x)->" represents the
 417  * doubly link lists (list_t).
 418  *
 419  *                          ____________________
 420  *                         |                    |
 421  *                         |    rfs4_client_t   |
 422  *                       ->|         (1)        |<-
 423  *                      /  |____________________|  \
 424  *                     /              ^             \
 425  *                    /               |              \
 426  *  ____________________    ____________________    ____________________
 427  * |                    |  |                    |  |                    |
 428  * |  rfs4_lockowner_t  |  |  rfs4_openowner_t  |  | rfs4_deleg_state_t |
 429  * |                    |  |     (3)    <-(1)-> |  |            <-(2)-> |
 430  * |____________________|  |____________________|  |____________________|
 431  *           ^                        ^                       |
 432  *           |                        |                       V
 433  *  ____________________    ____________________    ____________________
 434  * |                    |  |                    |  |                    |
 435  * |  rfs4_lo_state_t   |->|    rfs4_state_t    |->|     rfs4_file_t    |
 436  * |            <-(4)-> |  |     (4)    <-(3)-> |  |        (2)         |
 437  * |____________________|  |____________________|  |____________________|
 438  */
 439 /*
 440  * Each of these data types are kept in a separate rfs4_table_t and is
 441  * actually encapsulated within a rfs4_dbe_t struct.  The various
 442  * tables and their construction is done in nfs4_state.c but
 443  * documented here to completeness.
 444  *
 445  * Table                Data struct stored      Indexed by
 446  * -----                ------------------      ----------
 447  * rfs4_client_tab      rfs4_client_t           nfs_client_id4
 448  *                                              clientid4
 449  *
 450  * rfs4_openowner_tab   rfs4_openowner_t        open_owner4
 451  *
 452  * rfs4_state_tab       rfs4_state_t            open_owner4 | file
 453  *                                              stateid
 454  *
 455  * rfs4_lo_state_tab    rfs4_lo_state_t         lockowner | stateid
 456  *                                              lock_stateid
 457  *
 458  * rfs4_lockowner_tab   rfs4_lockowner_t        lockowner
 459  *                                              pid
 460  *
 461  * rfs4_file_tab        rfs4_file_t             filehandle
 462  *
 463  * rfs4_deleg_state_tab rfs4_deleg_state_t      clientid4 | file
 464  *                                              deleg_stateid
 465  */
 466 
 467 /*
 468  * The client struct, it is the root of all state for a particular
 469  * client.  The client is identified by the nfs_client_id4 via
 470  * SETCLIENTID and the server returns the clientid4 as short hand reference
 471  */
 472 /*
 473  * Client struct - as mentioned above it is the root of all state for
 474  * a single client as identified by the client supplied nfs_client_id4
 475  *
 476  * dbe - encapsulation struct
 477  * clientid - server assigned short hand reference to client
 478  * nfs_client - client supplied identifier for itself
 479  * confirm_verf - the value provided to the client for SETCLIENTID_CONFIRM
 480  * need_confirm - does this client need to be SETCLIENTID_CONFIRMed?
 481  *
 482  * unlksys_completed - has an F_UNLKSYS been done for this client which
 483  *              says that the use of cleanlocks() on individual files
 484  *              is not required?
 485  * can_reclaim - indicates if client is allowed to reclaim after server
 486  *              start-up (client had previous state at server)
 487  * ss_remove - indicates that the rfs4_client_destroy function should
 488  *              clean up stable storage file.
 489  * forced_expire - set if the sysadmin has used clear_locks for this client.
 490  * no_referrals - set if the client is Solaris and pre-dates referrals
 491  * deleg_revoked - how many delegations have been revoked for this client?
 492  *
 493  * cp_confirmed - this refers to a confirmed client struct that has
 494  * the same nfs_client_id4 as this client struct.  When/if this client
 495  * struct is confirmed via SETCLINETID_CONFIRM, the previously
 496  * confirmed client struct will be "closed" and hence this reference.
 497  *
 498  * last_access - used to determine if the client has let its lease expire
 499  * cbinfo - struct containing all callback related information
 500  * cr_set - credentials used for the SETCLIENTID/SETCLIENTID_CONFIRM pair
 501  * sysid - the lock manager sysid allocated for this client's file locks
 502  * openownerlist - root of openowners list associated with this client
 503  * ss_pn - Pathname to the stable storage file.
 504  * cl_addr - Clients network address.
 505  * server_instance - pointer to the currently associated server instance
 506  */
 507 typedef struct rfs4_client {
 508         rfs4_dbe_t              *rc_dbe;
 509         clientid4               rc_clientid;
 510         nfs_client_id4          rc_nfs_client;
 511         verifier4               rc_confirm_verf;
 512         unsigned                rc_need_confirm:1;
 513         unsigned                rc_unlksys_completed:1;
 514         unsigned                rc_can_reclaim:1;
 515         unsigned                rc_ss_remove:1;
 516         unsigned                rc_forced_expire:1;
 517         uint_t                  rc_deleg_revoked;
 518         struct rfs4_client      *rc_cp_confirmed;
 519         time_t                  rc_last_access;
 520         rfs4_cbinfo_t           rc_cbinfo;
 521         cred_set_t              rc_cr_set;
 522         sysid_t                 rc_sysidt;
 523         list_t                  rc_openownerlist;
 524         rfs4_ss_pn_t            *rc_ss_pn;
 525         struct sockaddr_storage rc_addr;
 526         rfs4_servinst_t         *rc_server_instance;
 527 } rfs4_client_t;
 528 
 529 /*
 530  * ClntIP struct - holds the diagnosis about whether the client
 531  * cannot support referrals.  Set to true for old Solaris clients.
 532  */
 533 
 534 typedef struct rfs4_clntip {
 535         rfs4_dbe_t              *ri_dbe;
 536         struct sockaddr_storage ri_addr;
 537         unsigned                ri_no_referrals:1;
 538 } rfs4_clntip_t;
 539 
 540 /*
 541  * The openowner contains the client supplied open_owner4 as well as
 542  * the matching sequence id and is used to track the client's usage of
 543  * the open_owner4.  Note that a reply is saved here as well for
 544  * processing of retransmissions.
 545  *
 546  * dbe - encapsulation struct
 547  * client - reference to rfs4_client_t for this openowner
 548  * owner - actual client supplied open_owner4
 549  * need_confirm - does this openowner need to be OPEN_CONFIRMed
 550  * postpone_confirm - set if error received on first use of open_owner
 551  * state2confirm - what stateid4 should be used on the OPEN_CONFIRM
 552  * open_seqid - what is the next open_seqid expected for this openowner
 553  * oo_sw - used to serialize access to the open seqid/reply handling
 554  * cr_set - credential used for the OPEN
 555  * statelist - root of state struct list associated with this openowner
 556  * node - node for client struct list of openowners
 557  * reply_fh - open replay processing needs the filehandle so that it is
 558  *      able to reset the current filehandle for appropriate compound
 559  *      processing and reply.
 560  * reply - last reply sent in relation to this openowner
 561  */
 562 typedef struct rfs4_openowner {
 563         rfs4_dbe_t              *ro_dbe;
 564         rfs4_client_t           *ro_client;
 565         open_owner4             ro_owner;
 566         unsigned                ro_need_confirm:1;
 567         unsigned                ro_postpone_confirm:1;
 568         seqid4                  ro_open_seqid;
 569         rfs4_state_wait_t       ro_sw;
 570         cred_set_t              ro_cr_set;
 571         list_t                  ro_statelist;
 572         list_node_t             ro_node;
 573         nfs_fh4                 ro_reply_fh;
 574         nfs_resop4              ro_reply;
 575 } rfs4_openowner_t;
 576 
 577 /*
 578  * This state struct represents the association between an openowner
 579  * and a file that has been OPENed by that openowner.
 580  *
 581  * dbe - encapsulation struct
 582  * stateid - server provided stateid
 583  * owner - reference back to the openowner for this state
 584  * finfo - reference to the open file for this state
 585  * open_access - how did the openowner OPEN the file (access)
 586  * open_deny - how did the openowner OPEN the file (deny)
 587  * share_access - what share reservation is on the file (access)
 588  * share_deny - what share reservation is on the file (deny)
 589  * closed - has this file been closed?
 590  * lostatelist - root of list of lo_state associated with this state/file
 591  * node - node for state struct list of states
 592  */
 593 typedef struct rfs4_state {
 594         rfs4_dbe_t              *rs_dbe;
 595         stateid_t               rs_stateid;
 596         rfs4_openowner_t        *rs_owner;
 597         struct rfs4_file        *rs_finfo;
 598         uint32_t                rs_open_access;
 599         uint32_t                rs_open_deny;
 600         uint32_t                rs_share_access;
 601         uint32_t                rs_share_deny;
 602         unsigned                rs_closed:1;
 603         list_t                  rs_lostatelist;
 604         list_node_t             rs_node;
 605 } rfs4_state_t;
 606 
 607 /*
 608  * Lockowner - track the lockowner and its related info
 609  *
 610  * dbe - encapsulation struct
 611  * client - reference to the client
 612  * owner - lockowner supplied by the client
 613  * pid - local identifier used for file locking
 614  */
 615 typedef struct rfs4_lockowner {
 616         rfs4_dbe_t              *rl_dbe;
 617         rfs4_client_t           *rl_client;
 618         lock_owner4             rl_owner;
 619         pid_t                   rl_pid;
 620 } rfs4_lockowner_t;
 621 
 622 /*
 623  * Lockowner_state associated with a state struct and lockowner
 624  *
 625  * dbe - encapsulation struct
 626  * state - reference back to state struct for open file
 627  * lockid - stateid for this lockowner/state
 628  * locker - reference to lockowner
 629  * seqid - sequence id for this lockowner/state
 630  * skip_seqid_check - used on initialization of struct
 631  * locks_cleaned - have all locks been released for this lockowner/file?
 632  * lock_completed - successful LOCK with lockowner/file?
 633  * ls_sw - used to serialize update seqid/reply/stateid handling
 634  * node - node for state struct list of lo_states
 635  * reply - last reply sent in relation to this lockowner/state
 636  */
 637 typedef struct rfs4_lo_state {
 638         rfs4_dbe_t              *rls_dbe;
 639         rfs4_state_t            *rls_state;
 640         stateid_t               rls_lockid;
 641         rfs4_lockowner_t        *rls_locker;
 642         seqid4                  rls_seqid;
 643         unsigned                rls_skip_seqid_check:1;
 644         unsigned                rls_locks_cleaned:1;
 645         unsigned                rls_lock_completed:1;
 646         rfs4_state_wait_t       rls_sw;
 647         list_node_t             rls_node;
 648         nfs_resop4              rls_reply;
 649 } rfs4_lo_state_t;
 650 
 651 /*
 652  * Delegation state - per client
 653  *
 654  * dbe - encapsulation struct
 655  * dtype - type of delegation (NONE, READ, WRITE)
 656  * delegid - stateid for this delegation
 657  * time_granted - time this delegation was assigned to client
 658  * time_recalled - time when the server started recall process
 659  * time_revoked - if revoked, time that the revoke occurred
 660  * finfo - reference to the file associated with this delegation
 661  * client - reference to client for which this delegation is associated
 662  * node - list of delegations for the file (WRITE == 1, READ == )
 663  */
 664 typedef struct rfs4_deleg_state {
 665         rfs4_dbe_t              *rds_dbe;
 666         open_delegation_type4   rds_dtype;
 667         stateid_t               rds_delegid;
 668         time_t                  rds_time_granted;
 669         time_t                  rds_time_recalled;
 670         time_t                  rds_time_revoked;
 671         struct rfs4_file        *rds_finfo;
 672         rfs4_client_t           *rds_client;
 673         list_node_t             rds_node;
 674 } rfs4_deleg_state_t;
 675 
 676 /*
 677  * Delegation info associated with the file
 678  *
 679  * dtype - type of delegation for file (NONE, READ, WRITE)
 680  * time_returned - time that last delegation was returned for file
 681  * time_recalled - time that recall sequence started
 682  * time_lastgrant - time that last delegation was provided to a client
 683  * time_lastwrite - time of last write to use the delegation stateid
 684  * time_rm_delayed - time of last remove/rename which was DELAYed
 685  * rdgrants - how many read delegations have been provided for this file
 686  * wrgrants - how many write delegations provided (can only be one)
 687  * recall_count - how many recall threads are outstanding
 688  * recall_lock - lock to protect contents of this struct
 689  * recall_cv - condition var for the "parent" thread to wait upon
 690  * deleg_change_grant - value for change attribute at time of write grant
 691  * deleg_change - most recent value of change obtained from client
 692  * deleg_change_ts - time of last deleg_change update
 693  * ever_recalled - has this particular delegation ever been recalled?
 694  * dont_grant - file deletion is impending, don't grant a delegation
 695  * conflicted_client - clientid of the client that caused a CB_RECALL
 696  *      to occur. This is used for delegation policy (should a delegation
 697  *      be granted shortly after it has been returned?)
 698  */
 699 typedef struct rfs4_dinfo {
 700         open_delegation_type4 rd_dtype;
 701         time_t          rd_time_returned;
 702         time_t          rd_time_recalled;
 703         time_t          rd_time_lastgrant;
 704         time_t          rd_time_lastwrite;
 705         time_t          rd_time_rm_delayed;
 706         uint32_t        rd_rdgrants;
 707         uint32_t        rd_wrgrants;
 708         int32_t         rd_recall_count;
 709         kmutex_t        rd_recall_lock[1];
 710         kcondvar_t      rd_recall_cv[1];
 711         bool_t          rd_ever_recalled;
 712         uint32_t        rd_hold_grant;
 713         clientid4       rd_conflicted_client;
 714 } rfs4_dinfo_t;
 715 
 716 /*
 717  * File
 718  *
 719  * dbe - encapsulation struct
 720  * vp - vnode for the file that is open or has a delegation
 721  * filehandle - the filehandle generated by the server for this file
 722  * delegstatelist - root of delegation list for this file
 723  * dinfo - see struct definition above
 724  * share_deny - union of all deny modes on file
 725  * share_access - union of all access modes on file
 726  * access_read - count of read access
 727  * access_write - count of write access
 728  * deny_read - count of deny reads
 729  * deny_write - count of deny writes
 730  * file_rwlock - lock for serializing the removal of a file while
 731  *      the state structures are active within the server
 732  *
 733  *      The only requirement for locking file_rwlock is that the
 734  *      caller have a reference to the containing rfs4_file.  The dbe
 735  *      lock may or may not be held for lock/unlock of file_rwlock.
 736  *      As mentioned above, the file_rwlock is used for serialization
 737  *      of file removal and more specifically reference to the held
 738  *      vnode (e.g. vp).
 739  */
 740 typedef struct rfs4_file {
 741         rfs4_dbe_t      *rf_dbe;
 742         vnode_t         *rf_vp;
 743         nfs_fh4         rf_filehandle;
 744         list_t          rf_delegstatelist;
 745         rfs4_dinfo_t    rf_dinfo;
 746         uint32_t        rf_share_deny;
 747         uint32_t        rf_share_access;
 748         uint32_t        rf_access_read;
 749         uint32_t        rf_access_write;
 750         uint32_t        rf_deny_read;
 751         uint32_t        rf_deny_write;
 752         krwlock_t       rf_file_rwlock;
 753 } rfs4_file_t;
 754 
 755 /*
 756  * nfs4_deleg_policy is used to signify the server's global delegation
 757  * policy.  The default is to NEVER delegate files and the
 758  * administrator must configure the server to enable delegations.
 759  *
 760  * The disable/enable delegation functions are used to eliminate a
 761  * race with exclusive creates.
 762  */
 763 typedef enum {
 764         SRV_NEVER_DELEGATE = 0,
 765         SRV_NORMAL_DELEGATE = 1
 766 } srv_deleg_policy_t;
 767 
 768 extern void rfs4_disable_delegation(void), rfs4_enable_delegation(void);
 769 
 770 /*
 771  * Request types for delegation. These correspond with
 772  * open_delegation_type4 with the addition of a new value, DELEG_ANY,
 773  * to reqequest any delegation.
 774  */
 775 typedef enum {
 776         DELEG_NONE = 0,         /* Corresponds to OPEN_DELEG_NONE */
 777         DELEG_READ = 1,         /* Corresponds to OPEN_DELEG_READ */
 778         DELEG_WRITE = 2,        /* Corresponds to OPEN_DELEG_WRITE */
 779         DELEG_ANY = -1          /* New value to request any delegation type */
 780 } delegreq_t;
 781 
 782 #define NFS4_DELEG4TYPE2REQTYPE(x) (delegreq_t)(x)
 783 
 784 /*
 785  * Zone global variables of NFSv4 server
 786  */
 787 typedef struct nfs4_srv {
 788         /* Unique write verifier */
 789         verifier4       write4verf;
 790         /* Delegation lock */
 791         kmutex_t        deleg_lock;
 792         /* Used to serialize create/destroy of nfs4_server_state database */
 793         kmutex_t        state_lock;
 794         rfs4_database_t *nfs4_server_state;
 795         /* Used to manage access to server instance linked list */
 796         kmutex_t        servinst_lock;
 797         rfs4_servinst_t *nfs4_cur_servinst;
 798         /* Used to manage access to nfs4_deleg_policy */
 799         krwlock_t       deleg_policy_lock;
 800         srv_deleg_policy_t nfs4_deleg_policy;
 801         /* Set first time we see one */
 802         int             seen_first_compound;
 803         /*
 804          * Circular double-linked list of paths for currently-served RGs.
 805          * No locking required -- only changed on server start.
 806          * Managed with insque/remque.
 807          */
 808         rfs4_dss_path_t *dss_pathlist;
 809         /* Duplicate request cache */
 810         rfs4_drc_t      *nfs4_drc;
 811         /* nfsv4 server start time */
 812         time_t rfs4_start_time;
 813         /* Used to serialize lookups of clientids */
 814         krwlock_t rfs4_findclient_lock;
 815 
 816         /* NFSv4 server state client tables */
 817         /* table expiry times */
 818         time_t rfs4_client_cache_time;
 819         time_t rfs4_openowner_cache_time;
 820         time_t rfs4_state_cache_time;
 821         time_t rfs4_lo_state_cache_time;
 822         time_t rfs4_lockowner_cache_time;
 823         time_t rfs4_file_cache_time;
 824         time_t rfs4_deleg_state_cache_time;
 825         time_t rfs4_clntip_cache_time;
 826         /* tables and indexes */
 827         /* client table */
 828         rfs4_table_t *rfs4_client_tab;
 829         rfs4_index_t *rfs4_clientid_idx;
 830         rfs4_index_t *rfs4_nfsclnt_idx;
 831         /* client IP table */
 832         rfs4_table_t *rfs4_clntip_tab;
 833         rfs4_index_t *rfs4_clntip_idx;
 834         /* Open Owner table */
 835         rfs4_table_t *rfs4_openowner_tab;
 836         rfs4_index_t *rfs4_openowner_idx;
 837         /* Open State ID table */
 838         rfs4_table_t *rfs4_state_tab;
 839         rfs4_index_t *rfs4_state_idx;
 840         rfs4_index_t *rfs4_state_owner_file_idx;
 841         rfs4_index_t *rfs4_state_file_idx;
 842         /* Lock State ID table */
 843         rfs4_table_t *rfs4_lo_state_tab;
 844         rfs4_index_t *rfs4_lo_state_idx;
 845         rfs4_index_t *rfs4_lo_state_owner_idx;
 846         /* Lock owner table */
 847         rfs4_table_t *rfs4_lockowner_tab;
 848         rfs4_index_t *rfs4_lockowner_idx;
 849         rfs4_index_t *rfs4_lockowner_pid_idx;
 850         /* File table */
 851         rfs4_table_t *rfs4_file_tab;
 852         rfs4_index_t *rfs4_file_idx;
 853         /* Deleg State table */
 854         rfs4_table_t *rfs4_deleg_state_tab;
 855         rfs4_index_t *rfs4_deleg_idx;
 856         rfs4_index_t *rfs4_deleg_state_idx;
 857 
 858         /* client stable storage */
 859         int rfs4_ss_enabled;
 860 } nfs4_srv_t;
 861 
 862 /*
 863  * max length of the NFSv4 server database name
 864  */
 865 #define RFS4_MAX_MEM_CACHE_NAME 48
 866 
 867 /*
 868  * global NFSv4 server kmem caches
 869  * r_db_name - The name of the state database and the table that will use it
 870  *             These tables are defined in nfs4_srv_t
 871  * r_db_mem_cache - The kmem cache associated with the state database name
 872  */
 873 typedef struct rfs4_db_mem_cache {
 874         char            r_db_name[RFS4_MAX_MEM_CACHE_NAME];
 875         kmem_cache_t    *r_db_mem_cache;
 876 } rfs4_db_mem_cache_t;
 877 
 878 #define RFS4_DB_MEM_CACHE_NUM 8
 879 
 880 rfs4_db_mem_cache_t rfs4_db_mem_cache_table[RFS4_DB_MEM_CACHE_NUM];
 881 
 882 
 883 extern srv_deleg_policy_t nfs4_get_deleg_policy();
 884 
 885 extern void             rfs4_servinst_create(nfs4_srv_t *, int, int, char **);
 886 extern void             rfs4_servinst_destroy_all(nfs4_srv_t *);
 887 extern void             rfs4_servinst_assign(nfs4_srv_t *, rfs4_client_t *,
 888                             rfs4_servinst_t *);
 889 extern rfs4_servinst_t  *rfs4_servinst(rfs4_client_t *);
 890 extern int              rfs4_clnt_in_grace(rfs4_client_t *);
 891 extern int              rfs4_servinst_in_grace(rfs4_servinst_t *);
 892 extern int              rfs4_servinst_grace_new(rfs4_servinst_t *);
 893 extern void             rfs4_grace_start(rfs4_servinst_t *);
 894 extern void             rfs4_grace_start_new(nfs4_srv_t *);
 895 extern void             rfs4_grace_reset_all(nfs4_srv_t *);
 896 extern void             rfs4_ss_oldstate(rfs4_oldstate_t *, char *, char *);
 897 extern void             rfs4_dss_readstate(nfs4_srv_t *, int, char **);
 898 
 899 /*
 900  * Various interfaces to manipulate the state structures introduced
 901  * above
 902  */
 903 extern  void            rfs4_free_reply(nfs_resop4 *);
 904 extern  void            rfs4_copy_reply(nfs_resop4 *, nfs_resop4 *);
 905 
 906 /* rfs4_client_t handling */
 907 extern  rfs4_client_t   *rfs4_findclient(nfs_client_id4 *,
 908                                         bool_t *, rfs4_client_t *);
 909 extern  rfs4_client_t   *rfs4_findclient_by_id(clientid4, bool_t);
 910 extern  rfs4_client_t   *rfs4_findclient_by_addr(struct sockaddr *);
 911 extern  void            rfs4_client_rele(rfs4_client_t *);
 912 extern  void            rfs4_client_close(rfs4_client_t *);
 913 extern  void            rfs4_client_state_remove(rfs4_client_t *);
 914 extern  void            rfs4_client_scv_next(rfs4_client_t *);
 915 extern  void            rfs4_update_lease(rfs4_client_t *);
 916 extern  bool_t          rfs4_lease_expired(rfs4_client_t *);
 917 extern  nfsstat4        rfs4_check_clientid(clientid4 *, int);
 918 
 919 /* rfs4_clntip_t handling */
 920 extern  rfs4_clntip_t   *rfs4_find_clntip(struct sockaddr *, bool_t *);
 921 extern  void            rfs4_invalidate_clntip(struct sockaddr *);
 922 
 923 /* rfs4_openowner_t handling */
 924 extern  rfs4_openowner_t *rfs4_findopenowner(open_owner4 *, bool_t *, seqid4);
 925 extern  void            rfs4_update_open_sequence(rfs4_openowner_t *);
 926 extern  void            rfs4_update_open_resp(rfs4_openowner_t *,
 927                                         nfs_resop4 *, nfs_fh4 *);
 928 extern  void            rfs4_openowner_rele(rfs4_openowner_t *);
 929 extern  void            rfs4_free_opens(rfs4_openowner_t *, bool_t, bool_t);
 930 
 931 /* rfs4_lockowner_t handling */
 932 extern  rfs4_lockowner_t *rfs4_findlockowner(lock_owner4 *, bool_t *);
 933 extern  rfs4_lockowner_t *rfs4_findlockowner_by_pid(pid_t);
 934 extern  void            rfs4_lockowner_rele(rfs4_lockowner_t *);
 935 
 936 /* rfs4_state_t handling */
 937 extern  rfs4_state_t    *rfs4_findstate_by_owner_file(rfs4_openowner_t *,
 938                                         rfs4_file_t *, bool_t *);
 939 extern  void            rfs4_state_rele(rfs4_state_t *);
 940 extern  void            rfs4_state_close(rfs4_state_t *, bool_t,
 941                                         bool_t, cred_t *);
 942 extern  void            rfs4_release_share_lock_state(rfs4_state_t *,
 943                                         cred_t *, bool_t);
 944 extern  void            rfs4_close_all_state(rfs4_file_t *);
 945 
 946 /* rfs4_lo_state_t handling */
 947 extern  rfs4_lo_state_t *rfs4_findlo_state_by_owner(rfs4_lockowner_t *,
 948                                                 rfs4_state_t *, bool_t *);
 949 extern  void            rfs4_lo_state_rele(rfs4_lo_state_t *, bool_t);
 950 extern  void            rfs4_update_lock_sequence(rfs4_lo_state_t *);
 951 extern  void            rfs4_update_lock_resp(rfs4_lo_state_t *,
 952                                         nfs_resop4 *);
 953 
 954 /* rfs4_file_t handling */
 955 extern  rfs4_file_t     *rfs4_findfile(vnode_t *, nfs_fh4 *, bool_t *);
 956 extern  rfs4_file_t     *rfs4_findfile_withlock(vnode_t *, nfs_fh4 *,
 957                                                 bool_t *);
 958 extern  void            rfs4_file_rele(rfs4_file_t *);
 959 
 960 /* General collection of "get state" functions */
 961 extern  nfsstat4        rfs4_get_state(stateid4 *, rfs4_state_t **,
 962                                         rfs4_dbsearch_type_t);
 963 extern  nfsstat4        rfs4_get_deleg_state(stateid4 *,
 964                                         rfs4_deleg_state_t **);
 965 extern  nfsstat4        rfs4_get_lo_state(stateid4 *, rfs4_lo_state_t **,
 966                                         bool_t);
 967 extern  nfsstat4        rfs4_check_stateid(int, vnode_t *, stateid4 *,
 968                                         bool_t, bool_t *, bool_t,
 969                                         caller_context_t *);
 970 extern  int             rfs4_check_stateid_seqid(rfs4_state_t *, stateid4 *);
 971 extern  int             rfs4_check_lo_stateid_seqid(rfs4_lo_state_t *,
 972                                         stateid4 *);
 973 
 974 /* return values for rfs4_check_stateid_seqid() */
 975 #define NFS4_CHECK_STATEID_OKAY 1
 976 #define NFS4_CHECK_STATEID_OLD  2
 977 #define NFS4_CHECK_STATEID_BAD  3
 978 #define NFS4_CHECK_STATEID_EXPIRED      4
 979 #define NFS4_CHECK_STATEID_REPLAY       5
 980 #define NFS4_CHECK_STATEID_CLOSED       6
 981 #define NFS4_CHECK_STATEID_UNCONFIRMED  7
 982 
 983 /* delay() time that server is willing to briefly wait for a delegreturn */
 984 #define NFS4_DELEGATION_CONFLICT_DELAY  (hz/10)
 985 
 986 /*
 987  * Interfaces for handling of callback's client handle cache and
 988  * callback interfaces themselves.
 989  */
 990 extern  void            rfs4_cbinfo_free(rfs4_cbinfo_t *);
 991 extern  void            rfs4_client_setcb(rfs4_client_t *, cb_client4 *,
 992                                         uint32_t);
 993 extern  void            rfs4_deleg_cb_check(rfs4_client_t *);
 994 extern  nfsstat4        rfs4_vop_getattr(vnode_t *, vattr_t *, int, cred_t *);
 995 
 996 /* rfs4_deleg_state_t handling and other delegation interfaces */
 997 extern  rfs4_deleg_state_t *rfs4_finddeleg(rfs4_state_t *, bool_t *);
 998 extern  rfs4_deleg_state_t *rfs4_finddelegstate(stateid_t *);
 999 extern  bool_t          rfs4_check_recall(rfs4_state_t *, uint32_t);
1000 extern  void            rfs4_recall_deleg(rfs4_file_t *,
1001                                 bool_t, rfs4_client_t *);
1002 extern  int             rfs4_get_deleg(rfs4_state_t *,  open_delegation_type4,
1003                         open_delegation_type4 (*policy)(rfs4_state_t *,
1004                                 open_delegation_type4 dtype));
1005 extern  rfs4_deleg_state_t *rfs4_grant_delegation(delegreq_t, rfs4_state_t *,
1006                                 int *);
1007 extern  void            rfs4_set_deleg_response(rfs4_deleg_state_t *,
1008                                 open_delegation4 *, nfsace4 *, int);
1009 extern  void            rfs4_return_deleg(rfs4_deleg_state_t *, bool_t);
1010 extern  bool_t          rfs4_is_deleg(rfs4_state_t *);
1011 extern  void            rfs4_deleg_state_rele(rfs4_deleg_state_t *);
1012 extern  bool_t          rfs4_check_delegated_byfp(int, rfs4_file_t *,
1013                                         bool_t, bool_t, bool_t, clientid4 *);
1014 extern  void            rfs4_clear_dont_grant(rfs4_file_t *);
1015 
1016 /*
1017  * nfs4 monitored operations.
1018  */
1019 extern int deleg_rd_open(femarg_t *, int, cred_t *, caller_context_t *);
1020 extern int deleg_wr_open(femarg_t *, int, cred_t *, caller_context_t *);
1021 extern int deleg_wr_read(femarg_t *, uio_t *, int, cred_t *,
1022             caller_context_t *);
1023 extern int deleg_rd_write(femarg_t *, uio_t *, int, cred_t *,
1024             caller_context_t *);
1025 extern int deleg_wr_write(femarg_t *, uio_t *, int, cred_t *,
1026             caller_context_t *);
1027 extern int deleg_rd_setattr(femarg_t *, vattr_t *, int, cred_t *,
1028                 caller_context_t *);
1029 extern int deleg_wr_setattr(femarg_t *, vattr_t *, int, cred_t *,
1030                 caller_context_t *);
1031 extern int deleg_rd_rwlock(femarg_t *, int, caller_context_t *);
1032 extern int deleg_wr_rwlock(femarg_t *, int, caller_context_t *);
1033 extern int deleg_rd_space(femarg_t *, int, flock64_t *, int, offset_t, cred_t *,
1034                 caller_context_t *);
1035 extern int deleg_wr_space(femarg_t *, int, flock64_t *, int, offset_t, cred_t *,
1036                 caller_context_t *);
1037 extern int deleg_rd_setsecattr(femarg_t *, vsecattr_t *, int, cred_t *,
1038                 caller_context_t *);
1039 extern int deleg_wr_setsecattr(femarg_t *, vsecattr_t *, int, cred_t *,
1040                 caller_context_t *);
1041 extern int deleg_rd_vnevent(femarg_t *, vnevent_t, vnode_t *, char *,
1042                 caller_context_t *);
1043 extern int deleg_wr_vnevent(femarg_t *, vnevent_t, vnode_t *, char *,
1044                 caller_context_t *);
1045 
1046 extern void rfs4_mon_hold(void *);
1047 extern void rfs4_mon_rele(void *);
1048 
1049 extern fem_t    *deleg_rdops;
1050 extern fem_t    *deleg_wrops;
1051 
1052 extern int rfs4_share(rfs4_state_t *, uint32_t, uint32_t);
1053 extern int rfs4_unshare(rfs4_state_t *);
1054 extern void rfs4_set_deleg_policy(nfs4_srv_t *, srv_deleg_policy_t);
1055 extern void rfs4_hold_deleg_policy(nfs4_srv_t *);
1056 extern void rfs4_rele_deleg_policy(nfs4_srv_t *);
1057 
1058 #ifdef DEBUG
1059 #define NFS4_DEBUG(var, args) if (var) cmn_err args
1060 
1061 extern int rfs4_debug;
1062 extern int nfs4_client_attr_debug;
1063 extern int nfs4_client_state_debug;
1064 extern int nfs4_client_shadow_debug;
1065 extern int nfs4_client_lock_debug;
1066 extern int nfs4_client_lease_debug;
1067 extern int nfs4_seqid_sync;
1068 extern int nfs4_client_map_debug;
1069 extern int nfs4_client_inactive_debug;
1070 extern int nfs4_client_recov_debug;
1071 extern int nfs4_client_failover_debug;
1072 extern int nfs4_client_call_debug;
1073 extern int nfs4_client_foo_debug;
1074 extern int nfs4_client_zone_debug;
1075 extern int nfs4_lost_rqst_debug;
1076 extern int nfs4_open_stream_debug;
1077 extern int nfs4_client_open_dg;
1078 extern int nfs4_srvmnt_debug;
1079 extern int nfs4_utf8_debug;
1080 
1081 void rfs4_dbe_debug(rfs4_dbe_t *e);
1082 
1083 #ifdef NFS4_DEBUG_MUTEX
1084 void nfs4_debug_mutex_enter(kmutex_t *, char *, int);
1085 void nfs4_debug_mutex_exit(kmutex_t *, char *, int);
1086 
1087 #define mutex_enter(m) nfs4_debug_mutex_enter((m), __FILE__, __LINE__)
1088 #define mutex_exit(m) nfs4_debug_mutex_exit((m), __FILE__, __LINE__)
1089 #endif /* NFS4_DEBUG_MUTEX */
1090 
1091 #else  /* ! DEBUG */
1092 #define NFS4_DEBUG(var, args)
1093 #endif /* DEBUG */
1094 
1095 /*
1096  * XXX - temporary for testing of volatile fh
1097  */
1098 
1099 #ifdef VOLATILE_FH_TEST
1100 
1101 struct nfs_fh4_fmt {
1102         fhandle4_t      fh4_i;
1103         uint32_t        fh4_flag;
1104         uint32_t        fh4_volatile_id;
1105 };
1106 
1107 #else /* VOLATILE_FH_TEST */
1108 
1109 struct nfs_fh4_fmt {
1110         fhandle4_t      fh4_i;
1111         uint32_t        fh4_flag;
1112 };
1113 
1114 #endif /* VOLATILE_FH_TEST */
1115 
1116 #define FH4_NAMEDATTR   1
1117 #define FH4_ATTRDIR     2
1118 
1119 #define fh4_fsid        fh4_i.fhx_fsid
1120 #define fh4_len         fh4_i.fhx_len   /* fid length */
1121 #define fh4_data        fh4_i.fhx_data  /* fid bytes */
1122 #define fh4_xlen        fh4_i.fhx_xlen
1123 #define fh4_xdata       fh4_i.fhx_xdata
1124 typedef struct nfs_fh4_fmt nfs_fh4_fmt_t;
1125 
1126 #define fh4_to_fmt4(fh4p) ((nfs_fh4_fmt_t *)(fh4p)->nfs_fh4_val)
1127 #define get_fh4_flag(fh4p, flag) ((fh4_to_fmt4(fh4p)->fh4_flag) & (flag))
1128 #define set_fh4_flag(fh4p, flag) ((fh4_to_fmt4(fh4p)->fh4_flag) |= (flag))
1129 #define clr_fh4_flag(fh4p, flag) ((fh4_to_fmt4(fh4p)->fh4_flag) &= ~(flag))
1130 
1131 #define NFS_FH4_LEN     sizeof (nfs_fh4_fmt_t)
1132 
1133 /*
1134  * Copy fields from external (fhandle_t) to in-memory (nfs_fh4_fmt_t)
1135  * format to support export info checking.  It does not copy over
1136  * the complete filehandle, just the fsid, xlen and xdata.  It may
1137  * need to be changed to be used in other places.
1138  *
1139  * NOTE: The macro expects the space to be  pre-allocated for
1140  * the contents of nfs_fh4_fmt_t.
1141  */
1142 #define FH_TO_FMT4(exifh, nfs_fmt) {                            \
1143         bzero((nfs_fmt), NFS_FH4_LEN);                          \
1144         (nfs_fmt)->fh4_fsid = (exifh)->fh_fsid;                   \
1145         (nfs_fmt)->fh4_xlen = (exifh)->fh_xlen;                   \
1146         bcopy((exifh)->fh_xdata, (nfs_fmt)->fh4_xdata,            \
1147             (exifh)->fh_xlen);                                       \
1148 }
1149 
1150 /*
1151  * A few definitions of repeatedly used constructs for nfsv4
1152  */
1153 #define UTF8STRING_FREE(str)    {                               \
1154         kmem_free((str).utf8string_val, (str).utf8string_len);  \
1155         (str).utf8string_val = NULL;                            \
1156         (str).utf8string_len = 0;                               \
1157 }
1158 
1159 /*
1160  * NFS4_VOLATILE_FH yields non-zero if the filesystem uses non-persistent
1161  * filehandles.
1162  */
1163 #define NFS4_VOLATILE_FH(mi)                                    \
1164         ((mi)->mi_fh_expire_type &                               \
1165         (FH4_VOLATILE_ANY | FH4_VOL_MIGRATION | FH4_VOL_RENAME))
1166 
1167 /*
1168  * NFS_IS_DOTNAME checks if the name given represents a dot or dotdot entry
1169  */
1170 #define NFS_IS_DOTNAME(name)                                    \
1171         (((name)[0] == '.') &&                                  \
1172         (((name)[1] == '\0') || (((name)[1] == '.') && ((name)[2] == '\0'))))
1173 
1174 /*
1175  * Define the number of bits in a bitmap word (uint32)
1176  */
1177 #define NFS4_BITMAP4_BITSPERWORD        (sizeof (uint32_t) * 8)
1178 
1179 /*
1180  * Define the value for the access field of the compound_state structure
1181  * based on the result of nfsauth access checking.
1182  */
1183 #define CS_ACCESS_OK            0x1
1184 #define CS_ACCESS_DENIED        0x2
1185 #define CS_ACCESS_LIMITED       0x4
1186 
1187 /*
1188  * compound state in nfsv4 server
1189  */
1190 struct compound_state {
1191         struct exportinfo *exi;
1192         struct exportinfo *saved_exi;   /* export struct for saved_vp */
1193         cred_t          *basecr;        /* UNIX cred:  only RPC request */
1194         caddr_t         principal;
1195         int             nfsflavor;
1196         cred_t          *cr;            /* UNIX cred: RPC request and */
1197                                         /* target export */
1198         bool_t          cont;
1199         uint_t          access;         /* access perm on vp per request */
1200         bool_t          deleg;          /* TRUE if current fh has */
1201                                         /* write delegated */
1202         vnode_t         *vp;            /* modified by PUTFH, and by ops that */
1203                                         /* input to GETFH */
1204         bool_t          mandlock;       /* Is mandatory locking in effect */
1205                                         /* for vp */
1206         vnode_t         *saved_vp;      /* modified by SAVEFH, copied to */
1207                                         /* vp by RESTOREFH */
1208         nfsstat4        *statusp;
1209         nfs_fh4         fh;             /* ditto. valid only if vp != NULL */
1210         nfs_fh4         saved_fh;       /* ditto. valid only if */
1211                                         /*      saved_vp != NULL */
1212         struct svc_req  *req;
1213         char            fhbuf[NFS4_FHSIZE];
1214 };
1215 
1216 /*
1217  * Conversion commands for nfsv4 server attr checking
1218  */
1219 enum nfs4_attr_cmd {
1220         NFS4ATTR_SUPPORTED = 0,         /* check which attrs supported */
1221         NFS4ATTR_GETIT = 1,             /* getattr - sys to fattr4 (r) */
1222         NFS4ATTR_SETIT = 2,             /* setattr - fattr4 to sys (w) */
1223         NFS4ATTR_VERIT = 3,             /* verify - fattr4 to sys (r) */
1224         NFS4ATTR_FREEIT = 4             /* free any alloc'd space for attr */
1225 };
1226 
1227 typedef enum nfs4_attr_cmd nfs4_attr_cmd_t;
1228 
1229 struct nfs4_svgetit_arg {
1230         nfs4_attr_cmd_t op;             /* getit or setit */
1231         struct compound_state *cs;
1232         struct statvfs64 *sbp;
1233         uint_t          flag;           /* VOP_GETATTR/VOP_SETATTR flag */
1234         uint_t          xattr;          /* object is xattr */
1235         bool_t          rdattr_error_req; /* if readdir & client wants */
1236                                                 /* rdattr_error */
1237         nfsstat4        rdattr_error;   /* used for per-entry status */
1238                                         /* (if rdattr_err) */
1239         bool_t          is_referral;    /* because sometimes we tell lies */
1240         bool_t          mntdfid_set;
1241         fattr4_mounted_on_fileid
1242                         mounted_on_fileid;
1243                                         /* readdir op can always return */
1244                                         /* d_ino from server fs dirent  */
1245                                         /* for mounted_on_fileid attr.  */
1246                                         /* This field holds d_ino so    */
1247                                         /* srv attr conv code can avoid */
1248                                         /* doing an untraverse.         */
1249         vattr_t         vap[1];
1250 };
1251 
1252 struct nfs4_ntov_map {
1253         bitmap4         fbit;           /* FATTR4_XXX_MASKY */
1254         uint_t          vbit;           /* AT_XXX */
1255         bool_t          vfsstat;
1256         bool_t          mandatory;      /* attribute mandatory to implement? */
1257         uint_t          nval;
1258         int             xdr_size;       /* Size of XDR'd attr */
1259         xdrproc_t       xfunc;
1260         int (*sv_getit)(nfs4_attr_cmd_t, struct nfs4_svgetit_arg *,
1261                 union nfs4_attr_u *);   /* subroutine for getting attr. */
1262         char            *prtstr;        /* string attr for printing */
1263 };
1264 
1265 struct nfs4attr_to_vattr {
1266         vnode_t         *vp;
1267         vattr_t         *vap;
1268         nfs_fh4         *fhp;
1269         nfsstat4        rdattr_error;
1270         uint32_t        flag;
1271         fattr4_change   change;
1272         fattr4_fsid     srv_fsid;
1273         fattr4_mounted_on_fileid        mntd_fid;
1274 };
1275 
1276 typedef struct nfs4attr_to_vattr ntov4_t;
1277 
1278 /*
1279  * nfs4attr_to_vattr flags
1280  */
1281 #define NTOV_FHP_VALID                  0x01
1282 #define NTOV_RDATTR_ERROR_VALID         0x02
1283 #define NTOV_CHANGE_VALID               0x04
1284 #define NTOV_SUPP_VALID                 0x08
1285 #define NTOV_SRV_FSID_VALID             0x10
1286 #define NTOV_MOUNTED_ON_FILEID_VALID    0x20
1287 
1288 
1289 #define FATTR4_MANDATTR_MASK (          \
1290         FATTR4_SUPPORTED_ATTRS_MASK |   \
1291         FATTR4_TYPE_MASK |              \
1292         FATTR4_FH_EXPIRE_TYPE_MASK |    \
1293         FATTR4_CHANGE_MASK |            \
1294         FATTR4_SIZE_MASK |              \
1295         FATTR4_LINK_SUPPORT_MASK |      \
1296         FATTR4_SYMLINK_SUPPORT_MASK |   \
1297         FATTR4_NAMED_ATTR_MASK |        \
1298         FATTR4_FSID_MASK |              \
1299         FATTR4_UNIQUE_HANDLES_MASK |    \
1300         FATTR4_LEASE_TIME_MASK |        \
1301         FATTR4_RDATTR_ERROR_MASK |      \
1302         FATTR4_FILEHANDLE_MASK)
1303 
1304 
1305 struct nfs4attr_to_osattr {
1306         void *attrconv_arg;
1307         uint_t mask;
1308 };
1309 
1310 struct mntinfo4;
1311 
1312 /*
1313  * lkp4_attr_setup lists the different options for attributes when calling
1314  * nfs4lookup_setup - either no attributes (just lookups - e.g., secinfo),
1315  * one component only (normal component lookup), get attributes for the
1316  * last component (e.g., mount), attributes for each component (e.g.,
1317  * failovers later), just the filehandle for the last component (e.g.,
1318  * volatile filehandle recovery), or stuff that needs OPENATTR (e.g.
1319  * looking up a named attribute or it's hidden directory).
1320  */
1321 enum lkp4_attr_setup {
1322         LKP4_NO_ATTRIBUTES = 0,         /* no attrs or filehandles */
1323         LKP4_ALL_ATTRIBUTES = 3,        /* multi-comp: attrs for all comps */
1324         LKP4_LAST_NAMED_ATTR = 5,       /* multi-comp: named attr & attrdir */
1325         LKP4_LAST_ATTRDIR = 6,          /* multi-comp: just attrdir */
1326         LKP4_ALL_ATTR_SECINFO = 7       /* multi-comp: attrs for all comp and */
1327                                         /*      secinfo for last comp */
1328 };
1329 
1330 /*
1331  * lookup4_param a set of parameters to nfs4lookup_setup -
1332  * used to setup a path lookup compound request.
1333  */
1334 typedef struct lookup4_param {
1335         enum lkp4_attr_setup l4_getattrs; /* (in) get attrs in the lookup? */
1336         int             header_len;     /* (in) num ops before first lookup  */
1337         int             trailer_len;    /* (in) num ops after last      */
1338                                         /*      Lookup/Getattr          */
1339         bitmap4         ga_bits;        /* (in) Which attributes for Getattr */
1340         COMPOUND4args_clnt *argsp;      /* (in/out) args for compound struct */
1341         COMPOUND4res_clnt  *resp;       /* (in/out) res for compound  struct */
1342         int             arglen;         /* (out) argop buffer alloc'd length */
1343         struct mntinfo4 *mi;
1344 } lookup4_param_t;
1345 
1346 
1347 #define NFS4_FATTR4_FINISH      -1      /* fattr4 index indicating finish */
1348 
1349 typedef int (*nfs4attr_to_os_t)(int, union nfs4_attr_u *,
1350                 struct nfs4attr_to_osattr *);
1351 
1352 /*
1353  * The nfs4_error_t is the basic structure to return error values
1354  * from rfs4call.  It encapsulates the unix errno
1355  * value, the nfsstat4 value and the rpc status value into a single
1356  * structure.
1357  *
1358  * If error is set, then stat is ignored and rpc_status may be
1359  * set if the error occurred as the result of a CLNT_CALL.  If
1360  * stat is set, then rpc request succeeded, error and
1361  * rpc_status are set to 0 and stat contains the result of
1362  * operation, NFS4_OK or one of the NFS4ERR_* values.
1363  *
1364  * Functions which want to generate errors independently from
1365  * rfs4call should set error to the desired errno value and
1366  * set stat and rpc_status to 0.  nfs4_error_init() is a
1367  * convenient function to do this.
1368  */
1369 typedef struct {
1370         int             error;
1371         nfsstat4        stat;
1372         enum clnt_stat  rpc_status;
1373 } nfs4_error_t;
1374 
1375 /*
1376  * Shared functions
1377  */
1378 extern void     rfs4_op_readdir(nfs_argop4 *, nfs_resop4 *,
1379                         struct svc_req *, struct compound_state *);
1380 extern void     nfs_fh4_copy(nfs_fh4 *, nfs_fh4 *);
1381 
1382 extern void     nfs4_fattr4_free(fattr4 *);
1383 
1384 extern int      nfs4lookup_setup(char *, lookup4_param_t *, int);
1385 extern void     nfs4_getattr_otw_norecovery(vnode_t *,
1386                         nfs4_ga_res_t *, nfs4_error_t *, cred_t *, int);
1387 extern int      nfs4_getattr_otw(vnode_t *, nfs4_ga_res_t *, cred_t *, int);
1388 extern int      nfs4cmpfh(const nfs_fh4 *, const nfs_fh4 *);
1389 extern int      nfs4cmpfhandle(nfs4_fhandle_t *, nfs4_fhandle_t *);
1390 extern int      nfs4getattr(vnode_t *, struct vattr *, cred_t *);
1391 extern int      nfs4_waitfor_purge_complete(vnode_t *);
1392 extern int      nfs4_validate_caches(vnode_t *, cred_t *);
1393 extern int      nfs4init(int, char *);
1394 extern void     nfs4fini(void);
1395 extern int      nfs4_vfsinit(void);
1396 extern void     nfs4_vfsfini(void);
1397 
1398 extern void     nfs4_vnops_init(void);
1399 extern void     nfs4_vnops_fini(void);
1400 extern void     nfs_idmap_init(void);
1401 extern void     nfs_idmap_flush(int);
1402 extern void     nfs_idmap_fini(void);
1403 extern int      nfs4_rnode_init(void);
1404 extern int      nfs4_rnode_fini(void);
1405 extern int      nfs4_shadow_init(void);
1406 extern int      nfs4_shadow_fini(void);
1407 extern int      nfs4_acache_init(void);
1408 extern int      nfs4_acache_fini(void);
1409 extern int      nfs4_subr_init(void);
1410 extern int      nfs4_subr_fini(void);
1411 extern void     nfs4_acl_init(void);
1412 extern void     nfs4_acl_free_cache(vsecattr_t *);
1413 
1414 extern int      geterrno4(nfsstat4);
1415 extern nfsstat4 puterrno4(int);
1416 extern int      nfs4_need_to_bump_seqid(COMPOUND4res_clnt *);
1417 extern int      nfs4tsize(void);
1418 extern int      checkauth4(struct compound_state *, struct svc_req *);
1419 extern nfsstat4 call_checkauth4(struct compound_state *, struct svc_req *);
1420 extern int      is_exported_sec(int, struct exportinfo *);
1421 extern void     nfs4_vmask_to_nmask(uint_t, bitmap4 *);
1422 extern void     nfs4_vmask_to_nmask_set(uint_t, bitmap4 *);
1423 extern int      nfs_idmap_str_uid(utf8string *u8s, uid_t *, bool_t);
1424 extern int      nfs_idmap_str_gid(utf8string *u8s, gid_t *, bool_t);
1425 extern int      nfs_idmap_uid_str(uid_t, utf8string *u8s, bool_t);
1426 extern int      nfs_idmap_gid_str(gid_t gid, utf8string *u8s, bool_t);
1427 extern int      nfs4_time_ntov(nfstime4 *, timestruc_t *);
1428 extern int      nfs4_time_vton(timestruc_t *, nfstime4 *);
1429 extern char     *utf8_to_str(utf8string *, uint_t *, char *);
1430 extern char     *utf8_to_fn(utf8string *, uint_t *, char *);
1431 extern utf8string *str_to_utf8(char *, utf8string *);
1432 extern utf8string *utf8_copy(utf8string *, utf8string *);
1433 extern int      utf8_compare(const utf8string *, const utf8string *);
1434 extern nfsstat4 utf8_dir_verify(utf8string *);
1435 extern char     *utf8_strchr(utf8string *, const char);
1436 extern int      ln_ace4_cmp(nfsace4 *, nfsace4 *, int);
1437 extern int      vs_aent_to_ace4(vsecattr_t *, vsecattr_t *, int, int);
1438 extern int      vs_ace4_to_aent(vsecattr_t *, vsecattr_t *, uid_t, gid_t,
1439     int, int);
1440 extern int      vs_ace4_to_acet(vsecattr_t *, vsecattr_t *, uid_t, gid_t,
1441     int);
1442 extern int      vs_acet_to_ace4(vsecattr_t *, vsecattr_t *, int);
1443 extern void     vs_acet_destroy(vsecattr_t *);
1444 extern void     vs_ace4_destroy(vsecattr_t *);
1445 extern void     vs_aent_destroy(vsecattr_t *);
1446 
1447 extern int      vn_find_nfs_record(vnode_t *, nvlist_t **, char **, char **);
1448 extern int      vn_is_nfs_reparse(vnode_t *, cred_t *);
1449 extern fs_locations4 *fetch_referral(vnode_t *, cred_t *);
1450 extern char     *build_symlink(vnode_t *, cred_t *, size_t *);
1451 
1452 extern int      stateid4_cmp(stateid4 *, stateid4 *);
1453 
1454 extern vtype_t  nf4_to_vt[];
1455 
1456 extern struct nfs4_ntov_map nfs4_ntov_map[];
1457 extern uint_t nfs4_ntov_map_size;
1458 
1459 extern kstat_named_t    *rfsproccnt_v4_ptr;
1460 extern struct vfsops    *nfs4_vfsops;
1461 extern struct vnodeops  *nfs4_vnodeops;
1462 extern const struct     fs_operation_def nfs4_vnodeops_template[];
1463 extern vnodeops_t       *nfs4_trigger_vnodeops;
1464 extern const struct     fs_operation_def nfs4_trigger_vnodeops_template[];
1465 
1466 extern uint_t nfs4_tsize(struct knetconfig *);
1467 extern uint_t rfs4_tsize(struct svc_req *);
1468 
1469 extern bool_t   xdr_inline_decode_nfs_fh4(uint32_t *, nfs_fh4_fmt_t *,
1470                         uint32_t);
1471 extern bool_t   xdr_inline_encode_nfs_fh4(uint32_t **, uint32_t *,
1472                         nfs_fh4_fmt_t *);
1473 
1474 #ifdef DEBUG
1475 extern int              rfs4_do_pre_op_attr;
1476 extern int              rfs4_do_post_op_attr;
1477 #endif
1478 
1479 extern stateid4 clnt_special0;
1480 extern stateid4 clnt_special1;
1481 #define CLNT_ISSPECIAL(id) (stateid4_cmp(id, &clnt_special0) || \
1482                                 stateid4_cmp(id, &clnt_special1))
1483 
1484 /*
1485  * The NFS Version 4 service procedures.
1486  */
1487 
1488 extern void     rfs4_do_server_start(int, int, int);
1489 extern void     rfs4_compound(COMPOUND4args *, COMPOUND4res *,
1490                         struct exportinfo *, struct svc_req *, cred_t *, int *);
1491 extern void     rfs4_compound_free(COMPOUND4res *);
1492 extern void     rfs4_compound_flagproc(COMPOUND4args *, int *);
1493 
1494 extern void     rfs4_srvrinit(void);
1495 extern void     rfs4_srvrfini(void);
1496 extern void     rfs4_srv_zone_init(nfs_globals_t *);
1497 extern void     rfs4_srv_zone_fini(nfs_globals_t *);
1498 extern void     rfs4_state_g_init(void);
1499 extern void     rfs4_state_zone_init(nfs4_srv_t *);
1500 extern void     rfs4_state_g_fini(void);
1501 extern void     rfs4_state_zone_fini(void);
1502 extern nfs4_srv_t *nfs4_get_srv(void);
1503 
1504 #endif
1505 #ifdef  __cplusplus
1506 }
1507 #endif
1508 
1509 #endif /* _NFS4_H */