1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
  24  */
  25 
  26 /*
  27  *      Copyright (c) 1983,1984,1985,1986,1987,1988,1989  AT&T.
  28  *      All Rights Reserved
  29  */
  30 
  31 /*
  32  * Copyright 2019 Nexenta Systems, Inc.
  33  * Copyright (c) 2012, 2016 by Delphix. All rights reserved.
  34  */
  35 
  36 #include <sys/param.h>
  37 #include <sys/types.h>
  38 #include <sys/systm.h>
  39 #include <sys/cred.h>
  40 #include <sys/buf.h>
  41 #include <sys/vfs.h>
  42 #include <sys/vfs_opreg.h>
  43 #include <sys/vnode.h>
  44 #include <sys/uio.h>
  45 #include <sys/errno.h>
  46 #include <sys/sysmacros.h>
  47 #include <sys/statvfs.h>
  48 #include <sys/kmem.h>
  49 #include <sys/dirent.h>
  50 #include <sys/cmn_err.h>
  51 #include <sys/debug.h>
  52 #include <sys/systeminfo.h>
  53 #include <sys/flock.h>
  54 #include <sys/pathname.h>
  55 #include <sys/nbmlock.h>
  56 #include <sys/share.h>
  57 #include <sys/atomic.h>
  58 #include <sys/policy.h>
  59 #include <sys/fem.h>
  60 #include <sys/sdt.h>
  61 #include <sys/ddi.h>
  62 #include <sys/zone.h>
  63 #include <sys/kstat.h>
  64 
  65 #include <fs/fs_reparse.h>
  66 
  67 #include <rpc/types.h>
  68 #include <rpc/auth.h>
  69 #include <rpc/rpcsec_gss.h>
  70 #include <rpc/svc.h>
  71 
  72 #include <nfs/nfs.h>
  73 #include <nfs/nfssys.h>
  74 #include <nfs/export.h>
  75 #include <nfs/nfs_cmd.h>
  76 #include <nfs/lm.h>
  77 #include <nfs/nfs4.h>
  78 #include <nfs/nfs4_drc.h>
  79 
  80 #include <sys/strsubr.h>
  81 #include <sys/strsun.h>
  82 
  83 #include <inet/common.h>
  84 #include <inet/ip.h>
  85 #include <inet/ip6.h>
  86 
  87 #include <sys/tsol/label.h>
  88 #include <sys/tsol/tndb.h>
  89 
  90 #define RFS4_MAXLOCK_TRIES 4    /* Try to get the lock this many times */
  91 static int rfs4_maxlock_tries = RFS4_MAXLOCK_TRIES;
  92 #define RFS4_LOCK_DELAY 10      /* Milliseconds */
  93 static clock_t  rfs4_lock_delay = RFS4_LOCK_DELAY;
  94 extern struct svc_ops rdma_svc_ops;
  95 extern int nfs_loaned_buffers;
  96 /* End of Tunables */
  97 
  98 static int rdma_setup_read_data4(READ4args *, READ4res *);
  99 
 100 /*
 101  * Used to bump the stateid4.seqid value and show changes in the stateid
 102  */
 103 #define next_stateid(sp) (++(sp)->bits.chgseq)
 104 
 105 /*
 106  * RFS4_MINLEN_ENTRY4: XDR-encoded size of smallest possible dirent.
 107  *      This is used to return NFS4ERR_TOOSMALL when clients specify
 108  *      maxcount that isn't large enough to hold the smallest possible
 109  *      XDR encoded dirent.
 110  *
 111  *          sizeof cookie (8 bytes) +
 112  *          sizeof name_len (4 bytes) +
 113  *          sizeof smallest (padded) name (4 bytes) +
 114  *          sizeof bitmap4_len (12 bytes) +   NOTE: we always encode len=2 bm4
 115  *          sizeof attrlist4_len (4 bytes) +
 116  *          sizeof next boolean (4 bytes)
 117  *
 118  * RFS4_MINLEN_RDDIR4: XDR-encoded size of READDIR op reply containing
 119  * the smallest possible entry4 (assumes no attrs requested).
 120  *      sizeof nfsstat4 (4 bytes) +
 121  *      sizeof verifier4 (8 bytes) +
 122  *      sizeof entry4list bool (4 bytes) +
 123  *      sizeof entry4   (36 bytes) +
 124  *      sizeof eof bool  (4 bytes)
 125  *
 126  * RFS4_MINLEN_RDDIR_BUF: minimum length of buffer server will provide to
 127  *      VOP_READDIR.  Its value is the size of the maximum possible dirent
 128  *      for solaris.  The DIRENT64_RECLEN macro returns the size of dirent
 129  *      required for a given name length.  MAXNAMELEN is the maximum
 130  *      filename length allowed in Solaris.  The first two DIRENT64_RECLEN()
 131  *      macros are to allow for . and .. entries -- just a minor tweak to try
 132  *      and guarantee that buffer we give to VOP_READDIR will be large enough
 133  *      to hold ., .., and the largest possible solaris dirent64.
 134  */
 135 #define RFS4_MINLEN_ENTRY4 36
 136 #define RFS4_MINLEN_RDDIR4 (4 + NFS4_VERIFIER_SIZE + 4 + RFS4_MINLEN_ENTRY4 + 4)
 137 #define RFS4_MINLEN_RDDIR_BUF \
 138         (DIRENT64_RECLEN(1) + DIRENT64_RECLEN(2) + DIRENT64_RECLEN(MAXNAMELEN))
 139 
 140 /*
 141  * It would be better to pad to 4 bytes since that's what XDR would do,
 142  * but the dirents UFS gives us are already padded to 8, so just take
 143  * what we're given.  Dircount is only a hint anyway.  Currently the
 144  * solaris kernel is ASCII only, so there's no point in calling the
 145  * UTF8 functions.
 146  *
 147  * dirent64: named padded to provide 8 byte struct alignment
 148  *      d_ino(8) + d_off(8) + d_reclen(2) + d_name(namelen + null(1) + pad)
 149  *
 150  * cookie: uint64_t   +  utf8namelen: uint_t  +   utf8name padded to 8 bytes
 151  *
 152  */
 153 #define DIRENT64_TO_DIRCOUNT(dp) \
 154         (3 * BYTES_PER_XDR_UNIT + DIRENT64_NAMELEN((dp)->d_reclen))
 155 
 156 zone_key_t      rfs4_zone_key;
 157 
 158 static sysid_t          lockt_sysid;    /* dummy sysid for all LOCKT calls */
 159 
 160 u_longlong_t    nfs4_srv_caller_id;
 161 uint_t          nfs4_srv_vkey = 0;
 162 
 163 void    rfs4_init_compound_state(struct compound_state *);
 164 
 165 static void     nullfree(caddr_t);
 166 static void     rfs4_op_inval(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 167                     struct compound_state *);
 168 static void     rfs4_op_access(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 169                     struct compound_state *);
 170 static void     rfs4_op_close(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 171                     struct compound_state *);
 172 static void     rfs4_op_commit(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 173                     struct compound_state *);
 174 static void     rfs4_op_create(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 175                     struct compound_state *);
 176 static void     rfs4_op_create_free(nfs_resop4 *resop);
 177 static void     rfs4_op_delegreturn(nfs_argop4 *, nfs_resop4 *,
 178                     struct svc_req *, struct compound_state *);
 179 static void     rfs4_op_delegpurge(nfs_argop4 *, nfs_resop4 *,
 180                     struct svc_req *, struct compound_state *);
 181 static void     rfs4_op_getattr(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 182                     struct compound_state *);
 183 static void     rfs4_op_getattr_free(nfs_resop4 *);
 184 static void     rfs4_op_getfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 185                     struct compound_state *);
 186 static void     rfs4_op_getfh_free(nfs_resop4 *);
 187 static void     rfs4_op_illegal(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 188                     struct compound_state *);
 189 static void     rfs4_op_link(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 190                     struct compound_state *);
 191 static void     rfs4_op_lock(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 192                     struct compound_state *);
 193 static void     lock_denied_free(nfs_resop4 *);
 194 static void     rfs4_op_locku(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 195                     struct compound_state *);
 196 static void     rfs4_op_lockt(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 197                     struct compound_state *);
 198 static void     rfs4_op_lookup(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 199                     struct compound_state *);
 200 static void     rfs4_op_lookupp(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 201                     struct compound_state *);
 202 static void     rfs4_op_openattr(nfs_argop4 *argop, nfs_resop4 *resop,
 203                     struct svc_req *req, struct compound_state *cs);
 204 static void     rfs4_op_nverify(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 205                     struct compound_state *);
 206 static void     rfs4_op_open(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 207                     struct compound_state *);
 208 static void     rfs4_op_open_confirm(nfs_argop4 *, nfs_resop4 *,
 209                     struct svc_req *, struct compound_state *);
 210 static void     rfs4_op_open_downgrade(nfs_argop4 *, nfs_resop4 *,
 211                     struct svc_req *, struct compound_state *);
 212 static void     rfs4_op_putfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 213                     struct compound_state *);
 214 static void     rfs4_op_putpubfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 215                     struct compound_state *);
 216 static void     rfs4_op_putrootfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 217                     struct compound_state *);
 218 static void     rfs4_op_read(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 219                     struct compound_state *);
 220 static void     rfs4_op_read_free(nfs_resop4 *);
 221 static void     rfs4_op_readdir_free(nfs_resop4 *resop);
 222 static void     rfs4_op_readlink(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 223                     struct compound_state *);
 224 static void     rfs4_op_readlink_free(nfs_resop4 *);
 225 static void     rfs4_op_release_lockowner(nfs_argop4 *, nfs_resop4 *,
 226                     struct svc_req *, struct compound_state *);
 227 static void     rfs4_op_remove(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 228                     struct compound_state *);
 229 static void     rfs4_op_rename(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 230                     struct compound_state *);
 231 static void     rfs4_op_renew(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 232                     struct compound_state *);
 233 static void     rfs4_op_restorefh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 234                     struct compound_state *);
 235 static void     rfs4_op_savefh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 236                     struct compound_state *);
 237 static void     rfs4_op_setattr(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 238                     struct compound_state *);
 239 static void     rfs4_op_verify(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 240                     struct compound_state *);
 241 static void     rfs4_op_write(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 242                     struct compound_state *);
 243 static void     rfs4_op_setclientid(nfs_argop4 *, nfs_resop4 *,
 244                     struct svc_req *, struct compound_state *);
 245 static void     rfs4_op_setclientid_confirm(nfs_argop4 *, nfs_resop4 *,
 246                     struct svc_req *req, struct compound_state *);
 247 static void     rfs4_op_secinfo(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 248                     struct compound_state *);
 249 static void     rfs4_op_secinfo_free(nfs_resop4 *);
 250 
 251 static nfsstat4 check_open_access(uint32_t, struct compound_state *,
 252                     struct svc_req *);
 253 nfsstat4        rfs4_client_sysid(rfs4_client_t *, sysid_t *);
 254 void            rfs4_ss_clid(nfs4_srv_t *, rfs4_client_t *);
 255 
 256 
 257 /*
 258  * translation table for attrs
 259  */
 260 struct nfs4_ntov_table {
 261         union nfs4_attr_u *na;
 262         uint8_t amap[NFS4_MAXNUM_ATTRS];
 263         int attrcnt;
 264         bool_t vfsstat;
 265 };
 266 
 267 static void     nfs4_ntov_table_init(struct nfs4_ntov_table *ntovp);
 268 static void     nfs4_ntov_table_free(struct nfs4_ntov_table *ntovp,
 269                     struct nfs4_svgetit_arg *sargp);
 270 
 271 static nfsstat4 do_rfs4_set_attrs(bitmap4 *resp, fattr4 *fattrp,
 272                     struct compound_state *cs, struct nfs4_svgetit_arg *sargp,
 273                     struct nfs4_ntov_table *ntovp, nfs4_attr_cmd_t cmd);
 274 
 275 static void     hanfsv4_failover(nfs4_srv_t *);
 276 
 277 fem_t           *deleg_rdops;
 278 fem_t           *deleg_wrops;
 279 
 280 /*
 281  * NFS4 op dispatch table
 282  */
 283 
 284 struct rfsv4disp {
 285         void    (*dis_proc)();          /* proc to call */
 286         void    (*dis_resfree)();       /* frees space allocated by proc */
 287         int     dis_flags;              /* RPC_IDEMPOTENT, etc... */
 288         int     op_type;                /* operation type, see below */
 289 };
 290 
 291 /*
 292  * operation types; used primarily for the per-exportinfo kstat implementation
 293  */
 294 #define NFS4_OP_NOFH    0       /* The operation does not operate with any */
 295                                 /* particular filehandle; we cannot associate */
 296                                 /* it with any exportinfo. */
 297 
 298 #define NFS4_OP_CFH     1       /* The operation works with the current */
 299                                 /* filehandle; we associate the operation */
 300                                 /* with the exportinfo related to the current */
 301                                 /* filehandle (as set before the operation is */
 302                                 /* executed). */
 303 
 304 #define NFS4_OP_SFH     2       /* The operation works with the saved */
 305                                 /* filehandle; we associate the operation */
 306                                 /* with the exportinfo related to the saved */
 307                                 /* filehandle (as set before the operation is */
 308                                 /* executed). */
 309 
 310 #define NFS4_OP_POSTCFH 3       /* The operation ignores the current */
 311                                 /* filehandle, but sets the new current */
 312                                 /* filehandle instead; we associate the */
 313                                 /* operation with the exportinfo related to */
 314                                 /* the current filehandle as set after the */
 315                                 /* operation is successfuly executed.  Since */
 316                                 /* we do not know the particular exportinfo */
 317                                 /* (and thus the kstat) before the operation */
 318                                 /* is done, there is no simple way how to */
 319                                 /* update some I/O kstat statistics related */
 320                                 /* to kstat_queue(9F). */
 321 
 322 static struct rfsv4disp rfsv4disptab[] = {
 323         /*
 324          * NFS VERSION 4
 325          */
 326 
 327         /* RFS_NULL = 0 */
 328         {rfs4_op_illegal, nullfree, 0, NFS4_OP_NOFH},
 329 
 330         /* UNUSED = 1 */
 331         {rfs4_op_illegal, nullfree, 0, NFS4_OP_NOFH},
 332 
 333         /* UNUSED = 2 */
 334         {rfs4_op_illegal, nullfree, 0, NFS4_OP_NOFH},
 335 
 336         /* OP_ACCESS = 3 */
 337         {rfs4_op_access, nullfree, RPC_IDEMPOTENT, NFS4_OP_CFH},
 338 
 339         /* OP_CLOSE = 4 */
 340         {rfs4_op_close, nullfree, 0, NFS4_OP_CFH},
 341 
 342         /* OP_COMMIT = 5 */
 343         {rfs4_op_commit, nullfree, RPC_IDEMPOTENT, NFS4_OP_CFH},
 344 
 345         /* OP_CREATE = 6 */
 346         {rfs4_op_create, nullfree, 0, NFS4_OP_CFH},
 347 
 348         /* OP_DELEGPURGE = 7 */
 349         {rfs4_op_delegpurge, nullfree, 0, NFS4_OP_NOFH},
 350 
 351         /* OP_DELEGRETURN = 8 */
 352         {rfs4_op_delegreturn, nullfree, 0, NFS4_OP_CFH},
 353 
 354         /* OP_GETATTR = 9 */
 355         {rfs4_op_getattr, rfs4_op_getattr_free, RPC_IDEMPOTENT, NFS4_OP_CFH},
 356 
 357         /* OP_GETFH = 10 */
 358         {rfs4_op_getfh, rfs4_op_getfh_free, RPC_ALL, NFS4_OP_CFH},
 359 
 360         /* OP_LINK = 11 */
 361         {rfs4_op_link, nullfree, 0, NFS4_OP_CFH},
 362 
 363         /* OP_LOCK = 12 */
 364         {rfs4_op_lock, lock_denied_free, 0, NFS4_OP_CFH},
 365 
 366         /* OP_LOCKT = 13 */
 367         {rfs4_op_lockt, lock_denied_free, 0, NFS4_OP_CFH},
 368 
 369         /* OP_LOCKU = 14 */
 370         {rfs4_op_locku, nullfree, 0, NFS4_OP_CFH},
 371 
 372         /* OP_LOOKUP = 15 */
 373         {rfs4_op_lookup, nullfree, (RPC_IDEMPOTENT | RPC_PUBLICFH_OK),
 374             NFS4_OP_CFH},
 375 
 376         /* OP_LOOKUPP = 16 */
 377         {rfs4_op_lookupp, nullfree, (RPC_IDEMPOTENT | RPC_PUBLICFH_OK),
 378             NFS4_OP_CFH},
 379 
 380         /* OP_NVERIFY = 17 */
 381         {rfs4_op_nverify, nullfree, RPC_IDEMPOTENT, NFS4_OP_CFH},
 382 
 383         /* OP_OPEN = 18 */
 384         {rfs4_op_open, rfs4_free_reply, 0, NFS4_OP_CFH},
 385 
 386         /* OP_OPENATTR = 19 */
 387         {rfs4_op_openattr, nullfree, 0, NFS4_OP_CFH},
 388 
 389         /* OP_OPEN_CONFIRM = 20 */
 390         {rfs4_op_open_confirm, nullfree, 0, NFS4_OP_CFH},
 391 
 392         /* OP_OPEN_DOWNGRADE = 21 */
 393         {rfs4_op_open_downgrade, nullfree, 0, NFS4_OP_CFH},
 394 
 395         /* OP_OPEN_PUTFH = 22 */
 396         {rfs4_op_putfh, nullfree, RPC_ALL, NFS4_OP_POSTCFH},
 397 
 398         /* OP_PUTPUBFH = 23 */
 399         {rfs4_op_putpubfh, nullfree, RPC_ALL, NFS4_OP_POSTCFH},
 400 
 401         /* OP_PUTROOTFH = 24 */
 402         {rfs4_op_putrootfh, nullfree, RPC_ALL, NFS4_OP_POSTCFH},
 403 
 404         /* OP_READ = 25 */
 405         {rfs4_op_read, rfs4_op_read_free, RPC_IDEMPOTENT, NFS4_OP_CFH},
 406 
 407         /* OP_READDIR = 26 */
 408         {rfs4_op_readdir, rfs4_op_readdir_free, RPC_IDEMPOTENT, NFS4_OP_CFH},
 409 
 410         /* OP_READLINK = 27 */
 411         {rfs4_op_readlink, rfs4_op_readlink_free, RPC_IDEMPOTENT, NFS4_OP_CFH},
 412 
 413         /* OP_REMOVE = 28 */
 414         {rfs4_op_remove, nullfree, 0, NFS4_OP_CFH},
 415 
 416         /* OP_RENAME = 29 */
 417         {rfs4_op_rename, nullfree, 0, NFS4_OP_CFH},
 418 
 419         /* OP_RENEW = 30 */
 420         {rfs4_op_renew, nullfree, 0, NFS4_OP_NOFH},
 421 
 422         /* OP_RESTOREFH = 31 */
 423         {rfs4_op_restorefh, nullfree, RPC_ALL, NFS4_OP_SFH},
 424 
 425         /* OP_SAVEFH = 32 */
 426         {rfs4_op_savefh, nullfree, RPC_ALL, NFS4_OP_CFH},
 427 
 428         /* OP_SECINFO = 33 */
 429         {rfs4_op_secinfo, rfs4_op_secinfo_free, 0, NFS4_OP_CFH},
 430 
 431         /* OP_SETATTR = 34 */
 432         {rfs4_op_setattr, nullfree, 0, NFS4_OP_CFH},
 433 
 434         /* OP_SETCLIENTID = 35 */
 435         {rfs4_op_setclientid, nullfree, 0, NFS4_OP_NOFH},
 436 
 437         /* OP_SETCLIENTID_CONFIRM = 36 */
 438         {rfs4_op_setclientid_confirm, nullfree, 0, NFS4_OP_NOFH},
 439 
 440         /* OP_VERIFY = 37 */
 441         {rfs4_op_verify, nullfree, RPC_IDEMPOTENT, NFS4_OP_CFH},
 442 
 443         /* OP_WRITE = 38 */
 444         {rfs4_op_write, nullfree, 0, NFS4_OP_CFH},
 445 
 446         /* OP_RELEASE_LOCKOWNER = 39 */
 447         {rfs4_op_release_lockowner, nullfree, 0, NFS4_OP_NOFH},
 448 };
 449 
 450 static uint_t rfsv4disp_cnt = sizeof (rfsv4disptab) / sizeof (rfsv4disptab[0]);
 451 
 452 #define OP_ILLEGAL_IDX (rfsv4disp_cnt)
 453 
 454 #ifdef DEBUG
 455 
 456 int             rfs4_fillone_debug = 0;
 457 int             rfs4_no_stub_access = 1;
 458 int             rfs4_rddir_debug = 0;
 459 
 460 static char    *rfs4_op_string[] = {
 461         "rfs4_op_null",
 462         "rfs4_op_1 unused",
 463         "rfs4_op_2 unused",
 464         "rfs4_op_access",
 465         "rfs4_op_close",
 466         "rfs4_op_commit",
 467         "rfs4_op_create",
 468         "rfs4_op_delegpurge",
 469         "rfs4_op_delegreturn",
 470         "rfs4_op_getattr",
 471         "rfs4_op_getfh",
 472         "rfs4_op_link",
 473         "rfs4_op_lock",
 474         "rfs4_op_lockt",
 475         "rfs4_op_locku",
 476         "rfs4_op_lookup",
 477         "rfs4_op_lookupp",
 478         "rfs4_op_nverify",
 479         "rfs4_op_open",
 480         "rfs4_op_openattr",
 481         "rfs4_op_open_confirm",
 482         "rfs4_op_open_downgrade",
 483         "rfs4_op_putfh",
 484         "rfs4_op_putpubfh",
 485         "rfs4_op_putrootfh",
 486         "rfs4_op_read",
 487         "rfs4_op_readdir",
 488         "rfs4_op_readlink",
 489         "rfs4_op_remove",
 490         "rfs4_op_rename",
 491         "rfs4_op_renew",
 492         "rfs4_op_restorefh",
 493         "rfs4_op_savefh",
 494         "rfs4_op_secinfo",
 495         "rfs4_op_setattr",
 496         "rfs4_op_setclientid",
 497         "rfs4_op_setclient_confirm",
 498         "rfs4_op_verify",
 499         "rfs4_op_write",
 500         "rfs4_op_release_lockowner",
 501         "rfs4_op_illegal"
 502 };
 503 #endif
 504 
 505 void    rfs4_ss_chkclid(nfs4_srv_t *, rfs4_client_t *);
 506 
 507 extern size_t   strlcpy(char *dst, const char *src, size_t dstsize);
 508 
 509 extern void     rfs4_free_fs_locations4(fs_locations4 *);
 510 
 511 #ifdef  nextdp
 512 #undef nextdp
 513 #endif
 514 #define nextdp(dp)      ((struct dirent64 *)((char *)(dp) + (dp)->d_reclen))
 515 
 516 static const fs_operation_def_t nfs4_rd_deleg_tmpl[] = {
 517         VOPNAME_OPEN,           { .femop_open = deleg_rd_open },
 518         VOPNAME_WRITE,          { .femop_write = deleg_rd_write },
 519         VOPNAME_SETATTR,        { .femop_setattr = deleg_rd_setattr },
 520         VOPNAME_RWLOCK,         { .femop_rwlock = deleg_rd_rwlock },
 521         VOPNAME_SPACE,          { .femop_space = deleg_rd_space },
 522         VOPNAME_SETSECATTR,     { .femop_setsecattr = deleg_rd_setsecattr },
 523         VOPNAME_VNEVENT,        { .femop_vnevent = deleg_rd_vnevent },
 524         NULL,                   NULL
 525 };
 526 static const fs_operation_def_t nfs4_wr_deleg_tmpl[] = {
 527         VOPNAME_OPEN,           { .femop_open = deleg_wr_open },
 528         VOPNAME_READ,           { .femop_read = deleg_wr_read },
 529         VOPNAME_WRITE,          { .femop_write = deleg_wr_write },
 530         VOPNAME_SETATTR,        { .femop_setattr = deleg_wr_setattr },
 531         VOPNAME_RWLOCK,         { .femop_rwlock = deleg_wr_rwlock },
 532         VOPNAME_SPACE,          { .femop_space = deleg_wr_space },
 533         VOPNAME_SETSECATTR,     { .femop_setsecattr = deleg_wr_setsecattr },
 534         VOPNAME_VNEVENT,        { .femop_vnevent = deleg_wr_vnevent },
 535         NULL,                   NULL
 536 };
 537 
 538 /* ARGSUSED */
 539 static void *
 540 rfs4_zone_init(zoneid_t zoneid)
 541 {
 542         nfs4_srv_t *nsrv4;
 543         timespec32_t verf;
 544 
 545         nsrv4 = kmem_zalloc(sizeof (*nsrv4), KM_SLEEP);
 546 
 547         /*
 548          * The following algorithm attempts to find a unique verifier
 549          * to be used as the write verifier returned from the server
 550          * to the client.  It is important that this verifier change
 551          * whenever the server reboots.  Of secondary importance, it
 552          * is important for the verifier to be unique between two
 553          * different servers.
 554          *
 555          * Thus, an attempt is made to use the system hostid and the
 556          * current time in seconds when the nfssrv kernel module is
 557          * loaded.  It is assumed that an NFS server will not be able
 558          * to boot and then to reboot in less than a second.  If the
 559          * hostid has not been set, then the current high resolution
 560          * time is used.  This will ensure different verifiers each
 561          * time the server reboots and minimize the chances that two
 562          * different servers will have the same verifier.
 563          * XXX - this is broken on LP64 kernels.
 564          */
 565         verf.tv_sec = (time_t)zone_get_hostid(NULL);
 566         if (verf.tv_sec != 0) {
 567                 verf.tv_nsec = gethrestime_sec();
 568         } else {
 569                 timespec_t tverf;
 570 
 571                 gethrestime(&tverf);
 572                 verf.tv_sec = (time_t)tverf.tv_sec;
 573                 verf.tv_nsec = tverf.tv_nsec;
 574         }
 575         nsrv4->write4verf = *(uint64_t *)&verf;
 576 
 577         /* Used to manage create/destroy of server state */
 578         nsrv4->nfs4_server_state = NULL;
 579         nsrv4->nfs4_cur_servinst = NULL;
 580         nsrv4->nfs4_deleg_policy = SRV_NEVER_DELEGATE;
 581         mutex_init(&nsrv4->deleg_lock, NULL, MUTEX_DEFAULT, NULL);
 582         mutex_init(&nsrv4->state_lock, NULL, MUTEX_DEFAULT, NULL);
 583         mutex_init(&nsrv4->servinst_lock, NULL, MUTEX_DEFAULT, NULL);
 584         rw_init(&nsrv4->deleg_policy_lock, NULL, RW_DEFAULT, NULL);
 585 
 586         return (nsrv4);
 587 }
 588 
 589 /* ARGSUSED */
 590 static void
 591 rfs4_zone_fini(zoneid_t zoneid, void *data)
 592 {
 593         nfs4_srv_t *nsrv4 = data;
 594 
 595         mutex_destroy(&nsrv4->deleg_lock);
 596         mutex_destroy(&nsrv4->state_lock);
 597         mutex_destroy(&nsrv4->servinst_lock);
 598         rw_destroy(&nsrv4->deleg_policy_lock);
 599 
 600         kmem_free(nsrv4, sizeof (*nsrv4));
 601 }
 602 
 603 void
 604 rfs4_srvrinit(void)
 605 {
 606         extern void rfs4_attr_init();
 607 
 608         zone_key_create(&rfs4_zone_key, rfs4_zone_init, NULL, rfs4_zone_fini);
 609 
 610         rfs4_attr_init();
 611 
 612 
 613         if (fem_create("deleg_rdops", nfs4_rd_deleg_tmpl, &deleg_rdops) != 0) {
 614                 rfs4_disable_delegation();
 615         } else if (fem_create("deleg_wrops", nfs4_wr_deleg_tmpl,
 616             &deleg_wrops) != 0) {
 617                 rfs4_disable_delegation();
 618                 fem_free(deleg_rdops);
 619         }
 620 
 621         nfs4_srv_caller_id = fs_new_caller_id();
 622         lockt_sysid = lm_alloc_sysidt();
 623         vsd_create(&nfs4_srv_vkey, NULL);
 624         rfs4_state_g_init();
 625 }
 626 
 627 void
 628 rfs4_srvrfini(void)
 629 {
 630         if (lockt_sysid != LM_NOSYSID) {
 631                 lm_free_sysidt(lockt_sysid);
 632                 lockt_sysid = LM_NOSYSID;
 633         }
 634 
 635         rfs4_state_g_fini();
 636 
 637         fem_free(deleg_rdops);
 638         fem_free(deleg_wrops);
 639 
 640         (void) zone_key_delete(rfs4_zone_key);
 641 }
 642 
 643 void
 644 rfs4_do_server_start(int server_upordown,
 645     int srv_delegation, int cluster_booted)
 646 {
 647         nfs4_srv_t *nsrv4 = zone_getspecific(rfs4_zone_key, curzone);
 648 
 649         /* Is this a warm start? */
 650         if (server_upordown == NFS_SERVER_QUIESCED) {
 651                 cmn_err(CE_NOTE, "nfs4_srv: "
 652                     "server was previously quiesced; "
 653                     "existing NFSv4 state will be re-used");
 654 
 655                 /*
 656                  * HA-NFSv4: this is also the signal
 657                  * that a Resource Group failover has
 658                  * occurred.
 659                  */
 660                 if (cluster_booted)
 661                         hanfsv4_failover(nsrv4);
 662         } else {
 663                 /* Cold start */
 664                 nsrv4->rfs4_start_time = 0;
 665                 rfs4_state_zone_init(nsrv4);
 666                 nsrv4->nfs4_drc = rfs4_init_drc(nfs4_drc_max,
 667                     nfs4_drc_hash);
 668         }
 669 
 670         /* Check if delegation is to be enabled */
 671         if (srv_delegation != FALSE)
 672                 rfs4_set_deleg_policy(nsrv4, SRV_NORMAL_DELEGATE);
 673 }
 674 
 675 void
 676 rfs4_init_compound_state(struct compound_state *cs)
 677 {
 678         bzero(cs, sizeof (*cs));
 679         cs->cont = TRUE;
 680         cs->access = CS_ACCESS_DENIED;
 681         cs->deleg = FALSE;
 682         cs->mandlock = FALSE;
 683         cs->fh.nfs_fh4_val = cs->fhbuf;
 684         cs->statusp = NULL;
 685 }
 686 
 687 void
 688 rfs4_grace_start(rfs4_servinst_t *sip)
 689 {
 690         rw_enter(&sip->rwlock, RW_WRITER);
 691         sip->start_time = (time_t)TICK_TO_SEC(ddi_get_lbolt());
 692         sip->grace_period = rfs4_grace_period;
 693         rw_exit(&sip->rwlock);
 694 }
 695 
 696 /*
 697  * returns true if the instance's grace period has never been started
 698  */
 699 int
 700 rfs4_servinst_grace_new(rfs4_servinst_t *sip)
 701 {
 702         time_t start_time;
 703 
 704         rw_enter(&sip->rwlock, RW_READER);
 705         start_time = sip->start_time;
 706         rw_exit(&sip->rwlock);
 707 
 708         return (start_time == 0);
 709 }
 710 
 711 /*
 712  * Indicates if server instance is within the
 713  * grace period.
 714  */
 715 int
 716 rfs4_servinst_in_grace(rfs4_servinst_t *sip)
 717 {
 718         time_t grace_expiry;
 719 
 720         rw_enter(&sip->rwlock, RW_READER);
 721         grace_expiry = sip->start_time + sip->grace_period;
 722         rw_exit(&sip->rwlock);
 723 
 724         return (((time_t)TICK_TO_SEC(ddi_get_lbolt())) < grace_expiry);
 725 }
 726 
 727 int
 728 rfs4_clnt_in_grace(rfs4_client_t *cp)
 729 {
 730         ASSERT(rfs4_dbe_refcnt(cp->rc_dbe) > 0);
 731 
 732         return (rfs4_servinst_in_grace(cp->rc_server_instance));
 733 }
 734 
 735 /*
 736  * reset all currently active grace periods
 737  */
 738 void
 739 rfs4_grace_reset_all(nfs4_srv_t *nsrv4)
 740 {
 741         rfs4_servinst_t *sip;
 742 
 743         mutex_enter(&nsrv4->servinst_lock);
 744         for (sip = nsrv4->nfs4_cur_servinst; sip != NULL; sip = sip->prev)
 745                 if (rfs4_servinst_in_grace(sip))
 746                         rfs4_grace_start(sip);
 747         mutex_exit(&nsrv4->servinst_lock);
 748 }
 749 
 750 /*
 751  * start any new instances' grace periods
 752  */
 753 void
 754 rfs4_grace_start_new(nfs4_srv_t *nsrv4)
 755 {
 756         rfs4_servinst_t *sip;
 757 
 758         mutex_enter(&nsrv4->servinst_lock);
 759         for (sip = nsrv4->nfs4_cur_servinst; sip != NULL; sip = sip->prev)
 760                 if (rfs4_servinst_grace_new(sip))
 761                         rfs4_grace_start(sip);
 762         mutex_exit(&nsrv4->servinst_lock);
 763 }
 764 
 765 static rfs4_dss_path_t *
 766 rfs4_dss_newpath(nfs4_srv_t *nsrv4, rfs4_servinst_t *sip,
 767     char *path, unsigned index)
 768 {
 769         size_t len;
 770         rfs4_dss_path_t *dss_path;
 771 
 772         dss_path = kmem_alloc(sizeof (rfs4_dss_path_t), KM_SLEEP);
 773 
 774         /*
 775          * Take a copy of the string, since the original may be overwritten.
 776          * Sadly, no strdup() in the kernel.
 777          */
 778         /* allow for NUL */
 779         len = strlen(path) + 1;
 780         dss_path->path = kmem_alloc(len, KM_SLEEP);
 781         (void) strlcpy(dss_path->path, path, len);
 782 
 783         /* associate with servinst */
 784         dss_path->sip = sip;
 785         dss_path->index = index;
 786 
 787         /*
 788          * Add to list of served paths.
 789          * No locking required, as we're only ever called at startup.
 790          */
 791         if (nsrv4->dss_pathlist == NULL) {
 792                 /* this is the first dss_path_t */
 793 
 794                 /* needed for insque/remque */
 795                 dss_path->next = dss_path->prev = dss_path;
 796 
 797                 nsrv4->dss_pathlist = dss_path;
 798         } else {
 799                 insque(dss_path, nsrv4->dss_pathlist);
 800         }
 801 
 802         return (dss_path);
 803 }
 804 
 805 /*
 806  * Create a new server instance, and make it the currently active instance.
 807  * Note that starting the grace period too early will reduce the clients'
 808  * recovery window.
 809  */
 810 void
 811 rfs4_servinst_create(nfs4_srv_t *nsrv4, int start_grace,
 812     int dss_npaths, char **dss_paths)
 813 {
 814         unsigned i;
 815         rfs4_servinst_t *sip;
 816         rfs4_oldstate_t *oldstate;
 817 
 818         sip = kmem_alloc(sizeof (rfs4_servinst_t), KM_SLEEP);
 819         rw_init(&sip->rwlock, NULL, RW_DEFAULT, NULL);
 820 
 821         sip->start_time = (time_t)0;
 822         sip->grace_period = (time_t)0;
 823         sip->next = NULL;
 824         sip->prev = NULL;
 825 
 826         rw_init(&sip->oldstate_lock, NULL, RW_DEFAULT, NULL);
 827         /*
 828          * This initial dummy entry is required to setup for insque/remque.
 829          * It must be skipped over whenever the list is traversed.
 830          */
 831         oldstate = kmem_alloc(sizeof (rfs4_oldstate_t), KM_SLEEP);
 832         /* insque/remque require initial list entry to be self-terminated */
 833         oldstate->next = oldstate;
 834         oldstate->prev = oldstate;
 835         sip->oldstate = oldstate;
 836 
 837 
 838         sip->dss_npaths = dss_npaths;
 839         sip->dss_paths = kmem_alloc(dss_npaths *
 840             sizeof (rfs4_dss_path_t *), KM_SLEEP);
 841 
 842         for (i = 0; i < dss_npaths; i++) {
 843                 /* CSTYLED */
 844                 sip->dss_paths[i] = rfs4_dss_newpath(nsrv4, sip, dss_paths[i], i);
 845         }
 846 
 847         mutex_enter(&nsrv4->servinst_lock);
 848         if (nsrv4->nfs4_cur_servinst != NULL) {
 849                 /* add to linked list */
 850                 sip->prev = nsrv4->nfs4_cur_servinst;
 851                 nsrv4->nfs4_cur_servinst->next = sip;
 852         }
 853         if (start_grace)
 854                 rfs4_grace_start(sip);
 855         /* make the new instance "current" */
 856         nsrv4->nfs4_cur_servinst = sip;
 857 
 858         mutex_exit(&nsrv4->servinst_lock);
 859 }
 860 
 861 /*
 862  * In future, we might add a rfs4_servinst_destroy(sip) but, for now, destroy
 863  * all instances directly.
 864  */
 865 void
 866 rfs4_servinst_destroy_all(nfs4_srv_t *nsrv4)
 867 {
 868         rfs4_servinst_t *sip, *prev, *current;
 869 #ifdef DEBUG
 870         int n = 0;
 871 #endif
 872 
 873         mutex_enter(&nsrv4->servinst_lock);
 874         ASSERT(nsrv4->nfs4_cur_servinst != NULL);
 875         current = nsrv4->nfs4_cur_servinst;
 876         nsrv4->nfs4_cur_servinst = NULL;
 877         for (sip = current; sip != NULL; sip = prev) {
 878                 prev = sip->prev;
 879                 rw_destroy(&sip->rwlock);
 880                 if (sip->oldstate)
 881                         kmem_free(sip->oldstate, sizeof (rfs4_oldstate_t));
 882                 if (sip->dss_paths)
 883                         kmem_free(sip->dss_paths,
 884                             sip->dss_npaths * sizeof (rfs4_dss_path_t *));
 885                 kmem_free(sip, sizeof (rfs4_servinst_t));
 886 #ifdef DEBUG
 887                 n++;
 888 #endif
 889         }
 890         mutex_exit(&nsrv4->servinst_lock);
 891 }
 892 
 893 /*
 894  * Assign the current server instance to a client_t.
 895  * Should be called with cp->rc_dbe held.
 896  */
 897 void
 898 rfs4_servinst_assign(nfs4_srv_t *nsrv4, rfs4_client_t *cp,
 899     rfs4_servinst_t *sip)
 900 {
 901         ASSERT(rfs4_dbe_refcnt(cp->rc_dbe) > 0);
 902 
 903         /*
 904          * The lock ensures that if the current instance is in the process
 905          * of changing, we will see the new one.
 906          */
 907         mutex_enter(&nsrv4->servinst_lock);
 908         cp->rc_server_instance = sip;
 909         mutex_exit(&nsrv4->servinst_lock);
 910 }
 911 
 912 rfs4_servinst_t *
 913 rfs4_servinst(rfs4_client_t *cp)
 914 {
 915         ASSERT(rfs4_dbe_refcnt(cp->rc_dbe) > 0);
 916 
 917         return (cp->rc_server_instance);
 918 }
 919 
 920 /* ARGSUSED */
 921 static void
 922 nullfree(caddr_t resop)
 923 {
 924 }
 925 
 926 /*
 927  * This is a fall-through for invalid or not implemented (yet) ops
 928  */
 929 /* ARGSUSED */
 930 static void
 931 rfs4_op_inval(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
 932     struct compound_state *cs)
 933 {
 934         *cs->statusp = *((nfsstat4 *)&(resop)->nfs_resop4_u) = NFS4ERR_INVAL;
 935 }
 936 
 937 /*
 938  * Check if the security flavor, nfsnum, is in the flavor_list.
 939  */
 940 bool_t
 941 in_flavor_list(int nfsnum, int *flavor_list, int count)
 942 {
 943         int i;
 944 
 945         for (i = 0; i < count; i++) {
 946                 if (nfsnum == flavor_list[i])
 947                         return (TRUE);
 948         }
 949         return (FALSE);
 950 }
 951 
 952 /*
 953  * Used by rfs4_op_secinfo to get the security information from the
 954  * export structure associated with the component.
 955  */
 956 /* ARGSUSED */
 957 static nfsstat4
 958 do_rfs4_op_secinfo(struct compound_state *cs, char *nm, SECINFO4res *resp)
 959 {
 960         int error, different_export = 0;
 961         vnode_t *dvp, *vp;
 962         struct exportinfo *exi = NULL;
 963         fid_t fid;
 964         uint_t count, i;
 965         secinfo4 *resok_val;
 966         struct secinfo *secp;
 967         seconfig_t *si;
 968         bool_t did_traverse = FALSE;
 969         int dotdot, walk;
 970         nfs_export_t *ne = nfs_get_export();
 971 
 972         dvp = cs->vp;
 973         dotdot = (nm[0] == '.' && nm[1] == '.' && nm[2] == '\0');
 974 
 975         /*
 976          * If dotdotting, then need to check whether it's above the
 977          * root of a filesystem, or above an export point.
 978          */
 979         if (dotdot) {
 980 
 981                 /*
 982                  * If dotdotting at the root of a filesystem, then
 983                  * need to traverse back to the mounted-on filesystem
 984                  * and do the dotdot lookup there.
 985                  */
 986                 if (cs->vp->v_flag & VROOT) {
 987 
 988                         /*
 989                          * If at the system root, then can
 990                          * go up no further.
 991                          */
 992                         if (VN_CMP(dvp, ZONE_ROOTVP()))
 993                                 return (puterrno4(ENOENT));
 994 
 995                         /*
 996                          * Traverse back to the mounted-on filesystem
 997                          */
 998                         dvp = untraverse(cs->vp);
 999 
1000                         /*
1001                          * Set the different_export flag so we remember
1002                          * to pick up a new exportinfo entry for
1003                          * this new filesystem.
1004                          */
1005                         different_export = 1;
1006                 } else {
1007 
1008                         /*
1009                          * If dotdotting above an export point then set
1010                          * the different_export to get new export info.
1011                          */
1012                         different_export = nfs_exported(cs->exi, cs->vp);
1013                 }
1014         }
1015 
1016         /*
1017          * Get the vnode for the component "nm".
1018          */
1019         error = VOP_LOOKUP(dvp, nm, &vp, NULL, 0, NULL, cs->cr,
1020             NULL, NULL, NULL);
1021         if (error)
1022                 return (puterrno4(error));
1023 
1024         /*
1025          * If the vnode is in a pseudo filesystem, or if the security flavor
1026          * used in the request is valid but not an explicitly shared flavor,
1027          * or the access bit indicates that this is a limited access,
1028          * check whether this vnode is visible.
1029          */
1030         if (!different_export &&
1031             (PSEUDO(cs->exi) || ! is_exported_sec(cs->nfsflavor, cs->exi) ||
1032             cs->access & CS_ACCESS_LIMITED)) {
1033                 if (! nfs_visible(cs->exi, vp, &different_export)) {
1034                         VN_RELE(vp);
1035                         return (puterrno4(ENOENT));
1036                 }
1037         }
1038 
1039         /*
1040          * If it's a mountpoint, then traverse it.
1041          */
1042         if (vn_ismntpt(vp)) {
1043                 if ((error = traverse(&vp)) != 0) {
1044                         VN_RELE(vp);
1045                         return (puterrno4(error));
1046                 }
1047                 /* remember that we had to traverse mountpoint */
1048                 did_traverse = TRUE;
1049                 different_export = 1;
1050         } else if (vp->v_vfsp != dvp->v_vfsp) {
1051                 /*
1052                  * If vp isn't a mountpoint and the vfs ptrs aren't the same,
1053                  * then vp is probably an LOFS object.  We don't need the
1054                  * realvp, we just need to know that we might have crossed
1055                  * a server fs boundary and need to call checkexport4.
1056                  * (LOFS lookup hides server fs mountpoints, and actually calls
1057                  * traverse)
1058                  */
1059                 different_export = 1;
1060         }
1061 
1062         /*
1063          * Get the export information for it.
1064          */
1065         if (different_export) {
1066 
1067                 bzero(&fid, sizeof (fid));
1068                 fid.fid_len = MAXFIDSZ;
1069                 error = vop_fid_pseudo(vp, &fid);
1070                 if (error) {
1071                         VN_RELE(vp);
1072                         return (puterrno4(error));
1073                 }
1074 
1075                 if (dotdot)
1076                         exi = nfs_vptoexi(NULL, vp, cs->cr, &walk, NULL, TRUE);
1077                 else
1078                         exi = checkexport4(&vp->v_vfsp->vfs_fsid, &fid, vp);
1079 
1080                 if (exi == NULL) {
1081                         if (did_traverse == TRUE) {
1082                                 /*
1083                                  * If this vnode is a mounted-on vnode,
1084                                  * but the mounted-on file system is not
1085                                  * exported, send back the secinfo for
1086                                  * the exported node that the mounted-on
1087                                  * vnode lives in.
1088                                  */
1089                                 exi = cs->exi;
1090                         } else {
1091                                 VN_RELE(vp);
1092                                 return (puterrno4(EACCES));
1093                         }
1094                 }
1095         } else {
1096                 exi = cs->exi;
1097         }
1098         ASSERT(exi != NULL);
1099 
1100 
1101         /*
1102          * Create the secinfo result based on the security information
1103          * from the exportinfo structure (exi).
1104          *
1105          * Return all flavors for a pseudo node.
1106          * For a real export node, return the flavor that the client
1107          * has access with.
1108          */
1109         ASSERT(RW_LOCK_HELD(&ne->exported_lock));
1110         if (PSEUDO(exi)) {
1111                 count = exi->exi_export.ex_seccnt; /* total sec count */
1112                 resok_val = kmem_alloc(count * sizeof (secinfo4), KM_SLEEP);
1113                 secp = exi->exi_export.ex_secinfo;
1114 
1115                 for (i = 0; i < count; i++) {
1116                         si = &secp[i].s_secinfo;
1117                         resok_val[i].flavor = si->sc_rpcnum;
1118                         if (resok_val[i].flavor == RPCSEC_GSS) {
1119                                 rpcsec_gss_info *info;
1120 
1121                                 info = &resok_val[i].flavor_info;
1122                                 info->qop = si->sc_qop;
1123                                 info->service = (rpc_gss_svc_t)si->sc_service;
1124 
1125                                 /* get oid opaque data */
1126                                 info->oid.sec_oid4_len =
1127                                     si->sc_gss_mech_type->length;
1128                                 info->oid.sec_oid4_val = kmem_alloc(
1129                                     si->sc_gss_mech_type->length, KM_SLEEP);
1130                                 bcopy(
1131                                     si->sc_gss_mech_type->elements,
1132                                     info->oid.sec_oid4_val,
1133                                     info->oid.sec_oid4_len);
1134                         }
1135                 }
1136                 resp->SECINFO4resok_len = count;
1137                 resp->SECINFO4resok_val = resok_val;
1138         } else {
1139                 int ret_cnt = 0, k = 0;
1140                 int *flavor_list;
1141 
1142                 count = exi->exi_export.ex_seccnt; /* total sec count */
1143                 secp = exi->exi_export.ex_secinfo;
1144 
1145                 flavor_list = kmem_alloc(count * sizeof (int), KM_SLEEP);
1146                 /* find out which flavors to return */
1147                 for (i = 0; i < count; i ++) {
1148                         int access, flavor, perm;
1149 
1150                         flavor = secp[i].s_secinfo.sc_nfsnum;
1151                         perm = secp[i].s_flags;
1152 
1153                         access = nfsauth4_secinfo_access(exi, cs->req,
1154                             flavor, perm, cs->basecr);
1155 
1156                         if (! (access & NFSAUTH_DENIED) &&
1157                             ! (access & NFSAUTH_WRONGSEC)) {
1158                                 flavor_list[ret_cnt] = flavor;
1159                                 ret_cnt++;
1160                         }
1161                 }
1162 
1163                 /* Create the returning SECINFO value */
1164                 resok_val = kmem_alloc(ret_cnt * sizeof (secinfo4), KM_SLEEP);
1165 
1166                 for (i = 0; i < count; i++) {
1167                         /*
1168                          * If the flavor is in the flavor list,
1169                          * fill in resok_val.
1170                          */
1171                         si = &secp[i].s_secinfo;
1172                         if (in_flavor_list(si->sc_nfsnum,
1173                             flavor_list, ret_cnt)) {
1174                                 resok_val[k].flavor = si->sc_rpcnum;
1175                                 if (resok_val[k].flavor == RPCSEC_GSS) {
1176                                         rpcsec_gss_info *info;
1177 
1178                                         info = &resok_val[k].flavor_info;
1179                                         info->qop = si->sc_qop;
1180                                         info->service = (rpc_gss_svc_t)
1181                                             si->sc_service;
1182 
1183                                         /* get oid opaque data */
1184                                         info->oid.sec_oid4_len =
1185                                             si->sc_gss_mech_type->length;
1186                                         info->oid.sec_oid4_val = kmem_alloc(
1187                                             si->sc_gss_mech_type->length,
1188                                             KM_SLEEP);
1189                                         bcopy(si->sc_gss_mech_type->elements,
1190                                             info->oid.sec_oid4_val,
1191                                             info->oid.sec_oid4_len);
1192                                 }
1193                                 k++;
1194                         }
1195                         if (k >= ret_cnt)
1196                                 break;
1197                 }
1198                 resp->SECINFO4resok_len = ret_cnt;
1199                 resp->SECINFO4resok_val = resok_val;
1200                 kmem_free(flavor_list, count * sizeof (int));
1201         }
1202 
1203         VN_RELE(vp);
1204         return (NFS4_OK);
1205 }
1206 
1207 /*
1208  * SECINFO (Operation 33): Obtain required security information on
1209  * the component name in the format of (security-mechanism-oid, qop, service)
1210  * triplets.
1211  */
1212 /* ARGSUSED */
1213 static void
1214 rfs4_op_secinfo(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1215     struct compound_state *cs)
1216 {
1217         SECINFO4args *args = &argop->nfs_argop4_u.opsecinfo;
1218         SECINFO4res *resp = &resop->nfs_resop4_u.opsecinfo;
1219         utf8string *utfnm = &args->name;
1220         uint_t len;
1221         char *nm;
1222         struct sockaddr *ca;
1223         char *name = NULL;
1224         nfsstat4 status = NFS4_OK;
1225 
1226         DTRACE_NFSV4_2(op__secinfo__start, struct compound_state *, cs,
1227             SECINFO4args *, args);
1228 
1229         /*
1230          * Current file handle (cfh) should have been set before getting
1231          * into this function. If not, return error.
1232          */
1233         if (cs->vp == NULL) {
1234                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1235                 goto out;
1236         }
1237 
1238         if (cs->vp->v_type != VDIR) {
1239                 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
1240                 goto out;
1241         }
1242 
1243         /*
1244          * Verify the component name. If failed, error out, but
1245          * do not error out if the component name is a "..".
1246          * SECINFO will return its parents secinfo data for SECINFO "..".
1247          */
1248         status = utf8_dir_verify(utfnm);
1249         if (status != NFS4_OK) {
1250                 if (utfnm->utf8string_len != 2 ||
1251                     utfnm->utf8string_val[0] != '.' ||
1252                     utfnm->utf8string_val[1] != '.') {
1253                         *cs->statusp = resp->status = status;
1254                         goto out;
1255                 }
1256         }
1257 
1258         nm = utf8_to_str(utfnm, &len, NULL);
1259         if (nm == NULL) {
1260                 *cs->statusp = resp->status = NFS4ERR_INVAL;
1261                 goto out;
1262         }
1263 
1264         if (len > MAXNAMELEN) {
1265                 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
1266                 kmem_free(nm, len);
1267                 goto out;
1268         }
1269 
1270         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
1271         name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
1272             MAXPATHLEN  + 1);
1273 
1274         if (name == NULL) {
1275                 *cs->statusp = resp->status = NFS4ERR_INVAL;
1276                 kmem_free(nm, len);
1277                 goto out;
1278         }
1279 
1280 
1281         *cs->statusp = resp->status = do_rfs4_op_secinfo(cs, name, resp);
1282 
1283         if (name != nm)
1284                 kmem_free(name, MAXPATHLEN + 1);
1285         kmem_free(nm, len);
1286 
1287 out:
1288         DTRACE_NFSV4_2(op__secinfo__done, struct compound_state *, cs,
1289             SECINFO4res *, resp);
1290 }
1291 
1292 /*
1293  * Free SECINFO result.
1294  */
1295 /* ARGSUSED */
1296 static void
1297 rfs4_op_secinfo_free(nfs_resop4 *resop)
1298 {
1299         SECINFO4res *resp = &resop->nfs_resop4_u.opsecinfo;
1300         int count, i;
1301         secinfo4 *resok_val;
1302 
1303         /* If this is not an Ok result, nothing to free. */
1304         if (resp->status != NFS4_OK) {
1305                 return;
1306         }
1307 
1308         count = resp->SECINFO4resok_len;
1309         resok_val = resp->SECINFO4resok_val;
1310 
1311         for (i = 0; i < count; i++) {
1312                 if (resok_val[i].flavor == RPCSEC_GSS) {
1313                         rpcsec_gss_info *info;
1314 
1315                         info = &resok_val[i].flavor_info;
1316                         kmem_free(info->oid.sec_oid4_val,
1317                             info->oid.sec_oid4_len);
1318                 }
1319         }
1320         kmem_free(resok_val, count * sizeof (secinfo4));
1321         resp->SECINFO4resok_len = 0;
1322         resp->SECINFO4resok_val = NULL;
1323 }
1324 
1325 /* ARGSUSED */
1326 static void
1327 rfs4_op_access(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1328     struct compound_state *cs)
1329 {
1330         ACCESS4args *args = &argop->nfs_argop4_u.opaccess;
1331         ACCESS4res *resp = &resop->nfs_resop4_u.opaccess;
1332         int error;
1333         vnode_t *vp;
1334         struct vattr va;
1335         int checkwriteperm;
1336         cred_t *cr = cs->cr;
1337         bslabel_t *clabel, *slabel;
1338         ts_label_t *tslabel;
1339         boolean_t admin_low_client;
1340 
1341         DTRACE_NFSV4_2(op__access__start, struct compound_state *, cs,
1342             ACCESS4args *, args);
1343 
1344 #if 0   /* XXX allow access even if !cs->access. Eventually only pseudo fs */
1345         if (cs->access == CS_ACCESS_DENIED) {
1346                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
1347                 goto out;
1348         }
1349 #endif
1350         if (cs->vp == NULL) {
1351                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1352                 goto out;
1353         }
1354 
1355         ASSERT(cr != NULL);
1356 
1357         vp = cs->vp;
1358 
1359         /*
1360          * If the file system is exported read only, it is not appropriate
1361          * to check write permissions for regular files and directories.
1362          * Special files are interpreted by the client, so the underlying
1363          * permissions are sent back to the client for interpretation.
1364          */
1365         if (rdonly4(req, cs) &&
1366             (vp->v_type == VREG || vp->v_type == VDIR))
1367                 checkwriteperm = 0;
1368         else
1369                 checkwriteperm = 1;
1370 
1371         /*
1372          * XXX
1373          * We need the mode so that we can correctly determine access
1374          * permissions relative to a mandatory lock file.  Access to
1375          * mandatory lock files is denied on the server, so it might
1376          * as well be reflected to the server during the open.
1377          */
1378         va.va_mask = AT_MODE;
1379         error = VOP_GETATTR(vp, &va, 0, cr, NULL);
1380         if (error) {
1381                 *cs->statusp = resp->status = puterrno4(error);
1382                 goto out;
1383         }
1384         resp->access = 0;
1385         resp->supported = 0;
1386 
1387         if (is_system_labeled()) {
1388                 ASSERT(req->rq_label != NULL);
1389                 clabel = req->rq_label;
1390                 DTRACE_PROBE2(tx__rfs4__log__info__opaccess__clabel, char *,
1391                     "got client label from request(1)",
1392                     struct svc_req *, req);
1393                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
1394                         if ((tslabel = nfs_getflabel(vp, cs->exi)) == NULL) {
1395                                 *cs->statusp = resp->status = puterrno4(EACCES);
1396                                 goto out;
1397                         }
1398                         slabel = label2bslabel(tslabel);
1399                         DTRACE_PROBE3(tx__rfs4__log__info__opaccess__slabel,
1400                             char *, "got server label(1) for vp(2)",
1401                             bslabel_t *, slabel, vnode_t *, vp);
1402 
1403                         admin_low_client = B_FALSE;
1404                 } else
1405                         admin_low_client = B_TRUE;
1406         }
1407 
1408         if (args->access & ACCESS4_READ) {
1409                 error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
1410                 if (!error && !MANDLOCK(vp, va.va_mode) &&
1411                     (!is_system_labeled() || admin_low_client ||
1412                     bldominates(clabel, slabel)))
1413                         resp->access |= ACCESS4_READ;
1414                 resp->supported |= ACCESS4_READ;
1415         }
1416         if ((args->access & ACCESS4_LOOKUP) && vp->v_type == VDIR) {
1417                 error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL);
1418                 if (!error && (!is_system_labeled() || admin_low_client ||
1419                     bldominates(clabel, slabel)))
1420                         resp->access |= ACCESS4_LOOKUP;
1421                 resp->supported |= ACCESS4_LOOKUP;
1422         }
1423         if (checkwriteperm &&
1424             (args->access & (ACCESS4_MODIFY|ACCESS4_EXTEND))) {
1425                 error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
1426                 if (!error && !MANDLOCK(vp, va.va_mode) &&
1427                     (!is_system_labeled() || admin_low_client ||
1428                     blequal(clabel, slabel)))
1429                         resp->access |=
1430                             (args->access & (ACCESS4_MODIFY | ACCESS4_EXTEND));
1431                 resp->supported |=
1432                     resp->access & (ACCESS4_MODIFY | ACCESS4_EXTEND);
1433         }
1434 
1435         if (checkwriteperm &&
1436             (args->access & ACCESS4_DELETE) && vp->v_type == VDIR) {
1437                 error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
1438                 if (!error && (!is_system_labeled() || admin_low_client ||
1439                     blequal(clabel, slabel)))
1440                         resp->access |= ACCESS4_DELETE;
1441                 resp->supported |= ACCESS4_DELETE;
1442         }
1443         if (args->access & ACCESS4_EXECUTE && vp->v_type != VDIR) {
1444                 error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL);
1445                 if (!error && !MANDLOCK(vp, va.va_mode) &&
1446                     (!is_system_labeled() || admin_low_client ||
1447                     bldominates(clabel, slabel)))
1448                         resp->access |= ACCESS4_EXECUTE;
1449                 resp->supported |= ACCESS4_EXECUTE;
1450         }
1451 
1452         if (is_system_labeled() && !admin_low_client)
1453                 label_rele(tslabel);
1454 
1455         *cs->statusp = resp->status = NFS4_OK;
1456 out:
1457         DTRACE_NFSV4_2(op__access__done, struct compound_state *, cs,
1458             ACCESS4res *, resp);
1459 }
1460 
1461 /* ARGSUSED */
1462 static void
1463 rfs4_op_commit(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1464     struct compound_state *cs)
1465 {
1466         COMMIT4args *args = &argop->nfs_argop4_u.opcommit;
1467         COMMIT4res *resp = &resop->nfs_resop4_u.opcommit;
1468         int error;
1469         vnode_t *vp = cs->vp;
1470         cred_t *cr = cs->cr;
1471         vattr_t va;
1472         nfs4_srv_t *nsrv4;
1473 
1474         DTRACE_NFSV4_2(op__commit__start, struct compound_state *, cs,
1475             COMMIT4args *, args);
1476 
1477         if (vp == NULL) {
1478                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1479                 goto out;
1480         }
1481         if (cs->access == CS_ACCESS_DENIED) {
1482                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
1483                 goto out;
1484         }
1485 
1486         if (args->offset + args->count < args->offset) {
1487                 *cs->statusp = resp->status = NFS4ERR_INVAL;
1488                 goto out;
1489         }
1490 
1491         va.va_mask = AT_UID;
1492         error = VOP_GETATTR(vp, &va, 0, cr, NULL);
1493 
1494         /*
1495          * If we can't get the attributes, then we can't do the
1496          * right access checking.  So, we'll fail the request.
1497          */
1498         if (error) {
1499                 *cs->statusp = resp->status = puterrno4(error);
1500                 goto out;
1501         }
1502         if (rdonly4(req, cs)) {
1503                 *cs->statusp = resp->status = NFS4ERR_ROFS;
1504                 goto out;
1505         }
1506 
1507         if (vp->v_type != VREG) {
1508                 if (vp->v_type == VDIR)
1509                         resp->status = NFS4ERR_ISDIR;
1510                 else
1511                         resp->status = NFS4ERR_INVAL;
1512                 *cs->statusp = resp->status;
1513                 goto out;
1514         }
1515 
1516         if (crgetuid(cr) != va.va_uid &&
1517             (error = VOP_ACCESS(vp, VWRITE, 0, cs->cr, NULL))) {
1518                 *cs->statusp = resp->status = puterrno4(error);
1519                 goto out;
1520         }
1521 
1522         error = VOP_FSYNC(vp, FSYNC, cr, NULL);
1523 
1524         if (error) {
1525                 *cs->statusp = resp->status = puterrno4(error);
1526                 goto out;
1527         }
1528 
1529         nsrv4 = zone_getspecific(rfs4_zone_key, curzone);
1530         *cs->statusp = resp->status = NFS4_OK;
1531         resp->writeverf = nsrv4->write4verf;
1532 out:
1533         DTRACE_NFSV4_2(op__commit__done, struct compound_state *, cs,
1534             COMMIT4res *, resp);
1535 }
1536 
1537 /*
1538  * rfs4_op_mknod is called from rfs4_op_create after all initial verification
1539  * was completed. It does the nfsv4 create for special files.
1540  */
1541 /* ARGSUSED */
1542 static vnode_t *
1543 do_rfs4_op_mknod(CREATE4args *args, CREATE4res *resp, struct svc_req *req,
1544     struct compound_state *cs, vattr_t *vap, char *nm)
1545 {
1546         int error;
1547         cred_t *cr = cs->cr;
1548         vnode_t *dvp = cs->vp;
1549         vnode_t *vp = NULL;
1550         int mode;
1551         enum vcexcl excl;
1552 
1553         switch (args->type) {
1554         case NF4CHR:
1555         case NF4BLK:
1556                 if (secpolicy_sys_devices(cr) != 0) {
1557                         *cs->statusp = resp->status = NFS4ERR_PERM;
1558                         return (NULL);
1559                 }
1560                 if (args->type == NF4CHR)
1561                         vap->va_type = VCHR;
1562                 else
1563                         vap->va_type = VBLK;
1564                 vap->va_rdev = makedevice(args->ftype4_u.devdata.specdata1,
1565                     args->ftype4_u.devdata.specdata2);
1566                 vap->va_mask |= AT_RDEV;
1567                 break;
1568         case NF4SOCK:
1569                 vap->va_type = VSOCK;
1570                 break;
1571         case NF4FIFO:
1572                 vap->va_type = VFIFO;
1573                 break;
1574         default:
1575                 *cs->statusp = resp->status = NFS4ERR_BADTYPE;
1576                 return (NULL);
1577         }
1578 
1579         /*
1580          * Must specify the mode.
1581          */
1582         if (!(vap->va_mask & AT_MODE)) {
1583                 *cs->statusp = resp->status = NFS4ERR_INVAL;
1584                 return (NULL);
1585         }
1586 
1587         excl = EXCL;
1588 
1589         mode = 0;
1590 
1591         error = VOP_CREATE(dvp, nm, vap, excl, mode, &vp, cr, 0, NULL, NULL);
1592         if (error) {
1593                 *cs->statusp = resp->status = puterrno4(error);
1594                 return (NULL);
1595         }
1596         return (vp);
1597 }
1598 
1599 /*
1600  * nfsv4 create is used to create non-regular files. For regular files,
1601  * use nfsv4 open.
1602  */
1603 /* ARGSUSED */
1604 static void
1605 rfs4_op_create(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1606     struct compound_state *cs)
1607 {
1608         CREATE4args *args = &argop->nfs_argop4_u.opcreate;
1609         CREATE4res *resp = &resop->nfs_resop4_u.opcreate;
1610         int error;
1611         struct vattr bva, iva, iva2, ava, *vap;
1612         cred_t *cr = cs->cr;
1613         vnode_t *dvp = cs->vp;
1614         vnode_t *vp = NULL;
1615         vnode_t *realvp;
1616         char *nm, *lnm;
1617         uint_t len, llen;
1618         int syncval = 0;
1619         struct nfs4_svgetit_arg sarg;
1620         struct nfs4_ntov_table ntov;
1621         struct statvfs64 sb;
1622         nfsstat4 status;
1623         struct sockaddr *ca;
1624         char *name = NULL;
1625         char *lname = NULL;
1626 
1627         DTRACE_NFSV4_2(op__create__start, struct compound_state *, cs,
1628             CREATE4args *, args);
1629 
1630         resp->attrset = 0;
1631 
1632         if (dvp == NULL) {
1633                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1634                 goto out;
1635         }
1636 
1637         /*
1638          * If there is an unshared filesystem mounted on this vnode,
1639          * do not allow to create an object in this directory.
1640          */
1641         if (vn_ismntpt(dvp)) {
1642                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
1643                 goto out;
1644         }
1645 
1646         /* Verify that type is correct */
1647         switch (args->type) {
1648         case NF4LNK:
1649         case NF4BLK:
1650         case NF4CHR:
1651         case NF4SOCK:
1652         case NF4FIFO:
1653         case NF4DIR:
1654                 break;
1655         default:
1656                 *cs->statusp = resp->status = NFS4ERR_BADTYPE;
1657                 goto out;
1658         };
1659 
1660         if (cs->access == CS_ACCESS_DENIED) {
1661                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
1662                 goto out;
1663         }
1664         if (dvp->v_type != VDIR) {
1665                 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
1666                 goto out;
1667         }
1668         status = utf8_dir_verify(&args->objname);
1669         if (status != NFS4_OK) {
1670                 *cs->statusp = resp->status = status;
1671                 goto out;
1672         }
1673 
1674         if (rdonly4(req, cs)) {
1675                 *cs->statusp = resp->status = NFS4ERR_ROFS;
1676                 goto out;
1677         }
1678 
1679         /*
1680          * Name of newly created object
1681          */
1682         nm = utf8_to_fn(&args->objname, &len, NULL);
1683         if (nm == NULL) {
1684                 *cs->statusp = resp->status = NFS4ERR_INVAL;
1685                 goto out;
1686         }
1687 
1688         if (len > MAXNAMELEN) {
1689                 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
1690                 kmem_free(nm, len);
1691                 goto out;
1692         }
1693 
1694         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
1695         name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
1696             MAXPATHLEN  + 1);
1697 
1698         if (name == NULL) {
1699                 *cs->statusp = resp->status = NFS4ERR_INVAL;
1700                 kmem_free(nm, len);
1701                 goto out;
1702         }
1703 
1704         resp->attrset = 0;
1705 
1706         sarg.sbp = &sb;
1707         sarg.is_referral = B_FALSE;
1708         nfs4_ntov_table_init(&ntov);
1709 
1710         status = do_rfs4_set_attrs(&resp->attrset,
1711             &args->createattrs, cs, &sarg, &ntov, NFS4ATTR_SETIT);
1712 
1713         if (sarg.vap->va_mask == 0 && status == NFS4_OK)
1714                 status = NFS4ERR_INVAL;
1715 
1716         if (status != NFS4_OK) {
1717                 *cs->statusp = resp->status = status;
1718                 if (name != nm)
1719                         kmem_free(name, MAXPATHLEN + 1);
1720                 kmem_free(nm, len);
1721                 nfs4_ntov_table_free(&ntov, &sarg);
1722                 resp->attrset = 0;
1723                 goto out;
1724         }
1725 
1726         /* Get "before" change value */
1727         bva.va_mask = AT_CTIME|AT_SEQ|AT_MODE;
1728         error = VOP_GETATTR(dvp, &bva, 0, cr, NULL);
1729         if (error) {
1730                 *cs->statusp = resp->status = puterrno4(error);
1731                 if (name != nm)
1732                         kmem_free(name, MAXPATHLEN + 1);
1733                 kmem_free(nm, len);
1734                 nfs4_ntov_table_free(&ntov, &sarg);
1735                 resp->attrset = 0;
1736                 goto out;
1737         }
1738         NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bva.va_ctime)
1739 
1740         vap = sarg.vap;
1741 
1742         /*
1743          * Set the default initial values for attributes when the parent
1744          * directory does not have the VSUID/VSGID bit set and they have
1745          * not been specified in createattrs.
1746          */
1747         if (!(bva.va_mode & VSUID) && (vap->va_mask & AT_UID) == 0) {
1748                 vap->va_uid = crgetuid(cr);
1749                 vap->va_mask |= AT_UID;
1750         }
1751         if (!(bva.va_mode & VSGID) && (vap->va_mask & AT_GID) == 0) {
1752                 vap->va_gid = crgetgid(cr);
1753                 vap->va_mask |= AT_GID;
1754         }
1755 
1756         vap->va_mask |= AT_TYPE;
1757         switch (args->type) {
1758         case NF4DIR:
1759                 vap->va_type = VDIR;
1760                 if ((vap->va_mask & AT_MODE) == 0) {
1761                         vap->va_mode = 0700; /* default: owner rwx only */
1762                         vap->va_mask |= AT_MODE;
1763                 }
1764                 error = VOP_MKDIR(dvp, name, vap, &vp, cr, NULL, 0, NULL);
1765                 if (error)
1766                         break;
1767 
1768                 /*
1769                  * Get the initial "after" sequence number, if it fails,
1770                  * set to zero
1771                  */
1772                 iva.va_mask = AT_SEQ;
1773                 if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL))
1774                         iva.va_seq = 0;
1775                 break;
1776         case NF4LNK:
1777                 vap->va_type = VLNK;
1778                 if ((vap->va_mask & AT_MODE) == 0) {
1779                         vap->va_mode = 0700; /* default: owner rwx only */
1780                         vap->va_mask |= AT_MODE;
1781                 }
1782 
1783                 /*
1784                  * symlink names must be treated as data
1785                  */
1786                 lnm = utf8_to_str((utf8string *)&args->ftype4_u.linkdata,
1787                     &llen, NULL);
1788 
1789                 if (lnm == NULL) {
1790                         *cs->statusp = resp->status = NFS4ERR_INVAL;
1791                         if (name != nm)
1792                                 kmem_free(name, MAXPATHLEN + 1);
1793                         kmem_free(nm, len);
1794                         nfs4_ntov_table_free(&ntov, &sarg);
1795                         resp->attrset = 0;
1796                         goto out;
1797                 }
1798 
1799                 if (llen > MAXPATHLEN) {
1800                         *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
1801                         if (name != nm)
1802                                 kmem_free(name, MAXPATHLEN + 1);
1803                         kmem_free(nm, len);
1804                         kmem_free(lnm, llen);
1805                         nfs4_ntov_table_free(&ntov, &sarg);
1806                         resp->attrset = 0;
1807                         goto out;
1808                 }
1809 
1810                 lname = nfscmd_convname(ca, cs->exi, lnm,
1811                     NFSCMD_CONV_INBOUND, MAXPATHLEN  + 1);
1812 
1813                 if (lname == NULL) {
1814                         *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
1815                         if (name != nm)
1816                                 kmem_free(name, MAXPATHLEN + 1);
1817                         kmem_free(nm, len);
1818                         kmem_free(lnm, llen);
1819                         nfs4_ntov_table_free(&ntov, &sarg);
1820                         resp->attrset = 0;
1821                         goto out;
1822                 }
1823 
1824                 error = VOP_SYMLINK(dvp, name, vap, lname, cr, NULL, 0);
1825                 if (lname != lnm)
1826                         kmem_free(lname, MAXPATHLEN + 1);
1827                 kmem_free(lnm, llen);
1828                 if (error)
1829                         break;
1830 
1831                 /*
1832                  * Get the initial "after" sequence number, if it fails,
1833                  * set to zero
1834                  */
1835                 iva.va_mask = AT_SEQ;
1836                 if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL))
1837                         iva.va_seq = 0;
1838 
1839                 error = VOP_LOOKUP(dvp, name, &vp, NULL, 0, NULL, cr,
1840                     NULL, NULL, NULL);
1841                 if (error)
1842                         break;
1843 
1844                 /*
1845                  * va_seq is not safe over VOP calls, check it again
1846                  * if it has changed zero out iva to force atomic = FALSE.
1847                  */
1848                 iva2.va_mask = AT_SEQ;
1849                 if (VOP_GETATTR(dvp, &iva2, 0, cs->cr, NULL) ||
1850                     iva2.va_seq != iva.va_seq)
1851                         iva.va_seq = 0;
1852                 break;
1853         default:
1854                 /*
1855                  * probably a special file.
1856                  */
1857                 if ((vap->va_mask & AT_MODE) == 0) {
1858                         vap->va_mode = 0600; /* default: owner rw only */
1859                         vap->va_mask |= AT_MODE;
1860                 }
1861                 syncval = FNODSYNC;
1862                 /*
1863                  * We know this will only generate one VOP call
1864                  */
1865                 vp = do_rfs4_op_mknod(args, resp, req, cs, vap, name);
1866 
1867                 if (vp == NULL) {
1868                         if (name != nm)
1869                                 kmem_free(name, MAXPATHLEN + 1);
1870                         kmem_free(nm, len);
1871                         nfs4_ntov_table_free(&ntov, &sarg);
1872                         resp->attrset = 0;
1873                         goto out;
1874                 }
1875 
1876                 /*
1877                  * Get the initial "after" sequence number, if it fails,
1878                  * set to zero
1879                  */
1880                 iva.va_mask = AT_SEQ;
1881                 if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL))
1882                         iva.va_seq = 0;
1883 
1884                 break;
1885         }
1886         if (name != nm)
1887                 kmem_free(name, MAXPATHLEN + 1);
1888         kmem_free(nm, len);
1889 
1890         if (error) {
1891                 *cs->statusp = resp->status = puterrno4(error);
1892         }
1893 
1894         /*
1895          * Force modified data and metadata out to stable storage.
1896          */
1897         (void) VOP_FSYNC(dvp, 0, cr, NULL);
1898 
1899         if (resp->status != NFS4_OK) {
1900                 if (vp != NULL)
1901                         VN_RELE(vp);
1902                 nfs4_ntov_table_free(&ntov, &sarg);
1903                 resp->attrset = 0;
1904                 goto out;
1905         }
1906 
1907         /*
1908          * Finish setup of cinfo response, "before" value already set.
1909          * Get "after" change value, if it fails, simply return the
1910          * before value.
1911          */
1912         ava.va_mask = AT_CTIME|AT_SEQ;
1913         if (VOP_GETATTR(dvp, &ava, 0, cr, NULL)) {
1914                 ava.va_ctime = bva.va_ctime;
1915                 ava.va_seq = 0;
1916         }
1917         NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, ava.va_ctime);
1918 
1919         /*
1920          * True verification that object was created with correct
1921          * attrs is impossible.  The attrs could have been changed
1922          * immediately after object creation.  If attributes did
1923          * not verify, the only recourse for the server is to
1924          * destroy the object.  Maybe if some attrs (like gid)
1925          * are set incorrectly, the object should be destroyed;
1926          * however, seems bad as a default policy.  Do we really
1927          * want to destroy an object over one of the times not
1928          * verifying correctly?  For these reasons, the server
1929          * currently sets bits in attrset for createattrs
1930          * that were set; however, no verification is done.
1931          *
1932          * vmask_to_nmask accounts for vattr bits set on create
1933          *      [do_rfs4_set_attrs() only sets resp bits for
1934          *       non-vattr/vfs bits.]
1935          * Mask off any bits set by default so as not to return
1936          * more attrset bits than were requested in createattrs
1937          */
1938         nfs4_vmask_to_nmask(sarg.vap->va_mask, &resp->attrset);
1939         resp->attrset &= args->createattrs.attrmask;
1940         nfs4_ntov_table_free(&ntov, &sarg);
1941 
1942         error = makefh4(&cs->fh, vp, cs->exi);
1943         if (error) {
1944                 *cs->statusp = resp->status = puterrno4(error);
1945         }
1946 
1947         /*
1948          * The cinfo.atomic = TRUE only if we got no errors, we have
1949          * non-zero va_seq's, and it has incremented by exactly one
1950          * during the creation and it didn't change during the VOP_LOOKUP
1951          * or VOP_FSYNC.
1952          */
1953         if (!error && bva.va_seq && iva.va_seq && ava.va_seq &&
1954             iva.va_seq == (bva.va_seq + 1) && iva.va_seq == ava.va_seq)
1955                 resp->cinfo.atomic = TRUE;
1956         else
1957                 resp->cinfo.atomic = FALSE;
1958 
1959         /*
1960          * Force modified metadata out to stable storage.
1961          *
1962          * if a underlying vp exists, pass it to VOP_FSYNC
1963          */
1964         if (VOP_REALVP(vp, &realvp, NULL) == 0)
1965                 (void) VOP_FSYNC(realvp, syncval, cr, NULL);
1966         else
1967                 (void) VOP_FSYNC(vp, syncval, cr, NULL);
1968 
1969         if (resp->status != NFS4_OK) {
1970                 VN_RELE(vp);
1971                 goto out;
1972         }
1973         if (cs->vp)
1974                 VN_RELE(cs->vp);
1975 
1976         cs->vp = vp;
1977         *cs->statusp = resp->status = NFS4_OK;
1978 out:
1979         DTRACE_NFSV4_2(op__create__done, struct compound_state *, cs,
1980             CREATE4res *, resp);
1981 }
1982 
1983 /*ARGSUSED*/
1984 static void
1985 rfs4_op_delegpurge(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1986     struct compound_state *cs)
1987 {
1988         DTRACE_NFSV4_2(op__delegpurge__start, struct compound_state *, cs,
1989             DELEGPURGE4args *, &argop->nfs_argop4_u.opdelegpurge);
1990 
1991         rfs4_op_inval(argop, resop, req, cs);
1992 
1993         DTRACE_NFSV4_2(op__delegpurge__done, struct compound_state *, cs,
1994             DELEGPURGE4res *, &resop->nfs_resop4_u.opdelegpurge);
1995 }
1996 
1997 /*ARGSUSED*/
1998 static void
1999 rfs4_op_delegreturn(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2000     struct compound_state *cs)
2001 {
2002         DELEGRETURN4args *args = &argop->nfs_argop4_u.opdelegreturn;
2003         DELEGRETURN4res *resp = &resop->nfs_resop4_u.opdelegreturn;
2004         rfs4_deleg_state_t *dsp;
2005         nfsstat4 status;
2006 
2007         DTRACE_NFSV4_2(op__delegreturn__start, struct compound_state *, cs,
2008             DELEGRETURN4args *, args);
2009 
2010         status = rfs4_get_deleg_state(&args->deleg_stateid, &dsp);
2011         resp->status = *cs->statusp = status;
2012         if (status != NFS4_OK)
2013                 goto out;
2014 
2015         /* Ensure specified filehandle matches */
2016         if (cs->vp != dsp->rds_finfo->rf_vp) {
2017                 resp->status = *cs->statusp = NFS4ERR_BAD_STATEID;
2018         } else
2019                 rfs4_return_deleg(dsp, FALSE);
2020 
2021         rfs4_update_lease(dsp->rds_client);
2022 
2023         rfs4_deleg_state_rele(dsp);
2024 out:
2025         DTRACE_NFSV4_2(op__delegreturn__done, struct compound_state *, cs,
2026             DELEGRETURN4res *, resp);
2027 }
2028 
2029 /*
2030  * Check to see if a given "flavor" is an explicitly shared flavor.
2031  * The assumption of this routine is the "flavor" is already a valid
2032  * flavor in the secinfo list of "exi".
2033  *
2034  *      e.g.
2035  *              # share -o sec=flavor1 /export
2036  *              # share -o sec=flavor2 /export/home
2037  *
2038  *              flavor2 is not an explicitly shared flavor for /export,
2039  *              however it is in the secinfo list for /export thru the
2040  *              server namespace setup.
2041  */
2042 int
2043 is_exported_sec(int flavor, struct exportinfo *exi)
2044 {
2045         int     i;
2046         struct secinfo *sp;
2047 
2048         sp = exi->exi_export.ex_secinfo;
2049         for (i = 0; i < exi->exi_export.ex_seccnt; i++) {
2050                 if (flavor == sp[i].s_secinfo.sc_nfsnum ||
2051                     sp[i].s_secinfo.sc_nfsnum == AUTH_NONE) {
2052                         return (SEC_REF_EXPORTED(&sp[i]));
2053                 }
2054         }
2055 
2056         /* Should not reach this point based on the assumption */
2057         return (0);
2058 }
2059 
2060 /*
2061  * Check if the security flavor used in the request matches what is
2062  * required at the export point or at the root pseudo node (exi_root).
2063  *
2064  * returns 1 if there's a match or if exported with AUTH_NONE; 0 otherwise.
2065  *
2066  */
2067 static int
2068 secinfo_match_or_authnone(struct compound_state *cs)
2069 {
2070         int     i;
2071         struct secinfo *sp;
2072 
2073         /*
2074          * Check cs->nfsflavor (from the request) against
2075          * the current export data in cs->exi.
2076          */
2077         sp = cs->exi->exi_export.ex_secinfo;
2078         for (i = 0; i < cs->exi->exi_export.ex_seccnt; i++) {
2079                 if (cs->nfsflavor == sp[i].s_secinfo.sc_nfsnum ||
2080                     sp[i].s_secinfo.sc_nfsnum == AUTH_NONE)
2081                         return (1);
2082         }
2083 
2084         return (0);
2085 }
2086 
2087 /*
2088  * Check the access authority for the client and return the correct error.
2089  */
2090 nfsstat4
2091 call_checkauth4(struct compound_state *cs, struct svc_req *req)
2092 {
2093         int     authres;
2094 
2095         /*
2096          * First, check if the security flavor used in the request
2097          * are among the flavors set in the server namespace.
2098          */
2099         if (!secinfo_match_or_authnone(cs)) {
2100                 *cs->statusp = NFS4ERR_WRONGSEC;
2101                 return (*cs->statusp);
2102         }
2103 
2104         authres = checkauth4(cs, req);
2105 
2106         if (authres > 0) {
2107                 *cs->statusp = NFS4_OK;
2108                 if (! (cs->access & CS_ACCESS_LIMITED))
2109                         cs->access = CS_ACCESS_OK;
2110         } else if (authres == 0) {
2111                 *cs->statusp = NFS4ERR_ACCESS;
2112         } else if (authres == -2) {
2113                 *cs->statusp = NFS4ERR_WRONGSEC;
2114         } else {
2115                 *cs->statusp = NFS4ERR_DELAY;
2116         }
2117         return (*cs->statusp);
2118 }
2119 
2120 /*
2121  * bitmap4_to_attrmask is called by getattr and readdir.
2122  * It sets up the vattr mask and determines whether vfsstat call is needed
2123  * based on the input bitmap.
2124  * Returns nfsv4 status.
2125  */
2126 static nfsstat4
2127 bitmap4_to_attrmask(bitmap4 breq, struct nfs4_svgetit_arg *sargp)
2128 {
2129         int i;
2130         uint_t  va_mask;
2131         struct statvfs64 *sbp = sargp->sbp;
2132 
2133         sargp->sbp = NULL;
2134         sargp->flag = 0;
2135         sargp->rdattr_error = NFS4_OK;
2136         sargp->mntdfid_set = FALSE;
2137         if (sargp->cs->vp)
2138                 sargp->xattr = get_fh4_flag(&sargp->cs->fh,
2139                     FH4_ATTRDIR | FH4_NAMEDATTR);
2140         else
2141                 sargp->xattr = 0;
2142 
2143         /*
2144          * Set rdattr_error_req to true if return error per
2145          * failed entry rather than fail the readdir.
2146          */
2147         if (breq & FATTR4_RDATTR_ERROR_MASK)
2148                 sargp->rdattr_error_req = 1;
2149         else
2150                 sargp->rdattr_error_req = 0;
2151 
2152         /*
2153          * generate the va_mask
2154          * Handle the easy cases first
2155          */
2156         switch (breq) {
2157         case NFS4_NTOV_ATTR_MASK:
2158                 sargp->vap->va_mask = NFS4_NTOV_ATTR_AT_MASK;
2159                 return (NFS4_OK);
2160 
2161         case NFS4_FS_ATTR_MASK:
2162                 sargp->vap->va_mask = NFS4_FS_ATTR_AT_MASK;
2163                 sargp->sbp = sbp;
2164                 return (NFS4_OK);
2165 
2166         case NFS4_NTOV_ATTR_CACHE_MASK:
2167                 sargp->vap->va_mask = NFS4_NTOV_ATTR_CACHE_AT_MASK;
2168                 return (NFS4_OK);
2169 
2170         case FATTR4_LEASE_TIME_MASK:
2171                 sargp->vap->va_mask = 0;
2172                 return (NFS4_OK);
2173 
2174         default:
2175                 va_mask = 0;
2176                 for (i = 0; i < nfs4_ntov_map_size; i++) {
2177                         if ((breq & nfs4_ntov_map[i].fbit) &&
2178                             nfs4_ntov_map[i].vbit)
2179                                 va_mask |= nfs4_ntov_map[i].vbit;
2180                 }
2181 
2182                 /*
2183                  * Check is vfsstat is needed
2184                  */
2185                 if (breq & NFS4_FS_ATTR_MASK)
2186                         sargp->sbp = sbp;
2187 
2188                 sargp->vap->va_mask = va_mask;
2189                 return (NFS4_OK);
2190         }
2191         /* NOTREACHED */
2192 }
2193 
2194 /*
2195  * bitmap4_get_sysattrs is called by getattr and readdir.
2196  * It calls both VOP_GETATTR and VFS_STATVFS calls to get the attrs.
2197  * Returns nfsv4 status.
2198  */
2199 static nfsstat4
2200 bitmap4_get_sysattrs(struct nfs4_svgetit_arg *sargp)
2201 {
2202         int error;
2203         struct compound_state *cs = sargp->cs;
2204         vnode_t *vp = cs->vp;
2205 
2206         if (sargp->sbp != NULL) {
2207                 if (error = VFS_STATVFS(vp->v_vfsp, sargp->sbp)) {
2208                         sargp->sbp = NULL;   /* to identify error */
2209                         return (puterrno4(error));
2210                 }
2211         }
2212 
2213         return (rfs4_vop_getattr(vp, sargp->vap, 0, cs->cr));
2214 }
2215 
2216 static void
2217 nfs4_ntov_table_init(struct nfs4_ntov_table *ntovp)
2218 {
2219         ntovp->na = kmem_zalloc(sizeof (union nfs4_attr_u) * nfs4_ntov_map_size,
2220             KM_SLEEP);
2221         ntovp->attrcnt = 0;
2222         ntovp->vfsstat = FALSE;
2223 }
2224 
2225 static void
2226 nfs4_ntov_table_free(struct nfs4_ntov_table *ntovp,
2227     struct nfs4_svgetit_arg *sargp)
2228 {
2229         int i;
2230         union nfs4_attr_u *na;
2231         uint8_t *amap;
2232 
2233         /*
2234          * XXX Should do the same checks for whether the bit is set
2235          */
2236         for (i = 0, na = ntovp->na, amap = ntovp->amap;
2237             i < ntovp->attrcnt; i++, na++, amap++) {
2238                 (void) (*nfs4_ntov_map[*amap].sv_getit)(
2239                     NFS4ATTR_FREEIT, sargp, na);
2240         }
2241         if ((sargp->op == NFS4ATTR_SETIT) || (sargp->op == NFS4ATTR_VERIT)) {
2242                 /*
2243                  * xdr_free for getattr will be done later
2244                  */
2245                 for (i = 0, na = ntovp->na, amap = ntovp->amap;
2246                     i < ntovp->attrcnt; i++, na++, amap++) {
2247                         xdr_free(nfs4_ntov_map[*amap].xfunc, (caddr_t)na);
2248                 }
2249         }
2250         kmem_free(ntovp->na, sizeof (union nfs4_attr_u) * nfs4_ntov_map_size);
2251 }
2252 
2253 /*
2254  * do_rfs4_op_getattr gets the system attrs and converts into fattr4.
2255  */
2256 static nfsstat4
2257 do_rfs4_op_getattr(bitmap4 breq, fattr4 *fattrp,
2258     struct nfs4_svgetit_arg *sargp)
2259 {
2260         int error = 0;
2261         int i, k;
2262         struct nfs4_ntov_table ntov;
2263         XDR xdr;
2264         ulong_t xdr_size;
2265         char *xdr_attrs;
2266         nfsstat4 status = NFS4_OK;
2267         nfsstat4 prev_rdattr_error = sargp->rdattr_error;
2268         union nfs4_attr_u *na;
2269         uint8_t *amap;
2270 
2271         sargp->op = NFS4ATTR_GETIT;
2272         sargp->flag = 0;
2273 
2274         fattrp->attrmask = 0;
2275         /* if no bits requested, then return empty fattr4 */
2276         if (breq == 0) {
2277                 fattrp->attrlist4_len = 0;
2278                 fattrp->attrlist4 = NULL;
2279                 return (NFS4_OK);
2280         }
2281 
2282         /*
2283          * return NFS4ERR_INVAL when client requests write-only attrs
2284          */
2285         if (breq & (FATTR4_TIME_ACCESS_SET_MASK | FATTR4_TIME_MODIFY_SET_MASK))
2286                 return (NFS4ERR_INVAL);
2287 
2288         nfs4_ntov_table_init(&ntov);
2289         na = ntov.na;
2290         amap = ntov.amap;
2291 
2292         /*
2293          * Now loop to get or verify the attrs
2294          */
2295         for (i = 0; i < nfs4_ntov_map_size; i++) {
2296                 if (breq & nfs4_ntov_map[i].fbit) {
2297                         if ((*nfs4_ntov_map[i].sv_getit)(
2298                             NFS4ATTR_SUPPORTED, sargp, NULL) == 0) {
2299 
2300                                 error = (*nfs4_ntov_map[i].sv_getit)(
2301                                     NFS4ATTR_GETIT, sargp, na);
2302 
2303                                 /*
2304                                  * Possible error values:
2305                                  * >0 if sv_getit failed to
2306                                  * get the attr; 0 if succeeded;
2307                                  * <0 if rdattr_error and the
2308                                  * attribute cannot be returned.
2309                                  */
2310                                 if (error && !(sargp->rdattr_error_req))
2311                                         goto done;
2312                                 /*
2313                                  * If error then just for entry
2314                                  */
2315                                 if (error == 0) {
2316                                         fattrp->attrmask |=
2317                                             nfs4_ntov_map[i].fbit;
2318                                         *amap++ =
2319                                             (uint8_t)nfs4_ntov_map[i].nval;
2320                                         na++;
2321                                         (ntov.attrcnt)++;
2322                                 } else if ((error > 0) &&
2323                                     (sargp->rdattr_error == NFS4_OK)) {
2324                                         sargp->rdattr_error = puterrno4(error);
2325                                 }
2326                                 error = 0;
2327                         }
2328                 }
2329         }
2330 
2331         /*
2332          * If rdattr_error was set after the return value for it was assigned,
2333          * update it.
2334          */
2335         if (prev_rdattr_error != sargp->rdattr_error) {
2336                 na = ntov.na;
2337                 amap = ntov.amap;
2338                 for (i = 0; i < ntov.attrcnt; i++, na++, amap++) {
2339                         k = *amap;
2340                         if (k < FATTR4_RDATTR_ERROR) {
2341                                 continue;
2342                         }
2343                         if ((k == FATTR4_RDATTR_ERROR) &&
2344                             ((*nfs4_ntov_map[k].sv_getit)(
2345                             NFS4ATTR_SUPPORTED, sargp, NULL) == 0)) {
2346 
2347                                 (void) (*nfs4_ntov_map[k].sv_getit)(
2348                                     NFS4ATTR_GETIT, sargp, na);
2349                         }
2350                         break;
2351                 }
2352         }
2353 
2354         xdr_size = 0;
2355         na = ntov.na;
2356         amap = ntov.amap;
2357         for (i = 0; i < ntov.attrcnt; i++, na++, amap++) {
2358                 xdr_size += xdr_sizeof(nfs4_ntov_map[*amap].xfunc, na);
2359         }
2360 
2361         fattrp->attrlist4_len = xdr_size;
2362         if (xdr_size) {
2363                 /* freed by rfs4_op_getattr_free() */
2364                 fattrp->attrlist4 = xdr_attrs = kmem_zalloc(xdr_size, KM_SLEEP);
2365 
2366                 xdrmem_create(&xdr, xdr_attrs, xdr_size, XDR_ENCODE);
2367 
2368                 na = ntov.na;
2369                 amap = ntov.amap;
2370                 for (i = 0; i < ntov.attrcnt; i++, na++, amap++) {
2371                         if (!(*nfs4_ntov_map[*amap].xfunc)(&xdr, na)) {
2372                                 DTRACE_PROBE1(nfss__e__getattr4_encfail,
2373                                     int, *amap);
2374                                 status = NFS4ERR_SERVERFAULT;
2375                                 break;
2376                         }
2377                 }
2378                 /* xdrmem_destroy(&xdrs); */        /* NO-OP */
2379         } else {
2380                 fattrp->attrlist4 = NULL;
2381         }
2382 done:
2383 
2384         nfs4_ntov_table_free(&ntov, sargp);
2385 
2386         if (error != 0)
2387                 status = puterrno4(error);
2388 
2389         return (status);
2390 }
2391 
2392 /* ARGSUSED */
2393 static void
2394 rfs4_op_getattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2395     struct compound_state *cs)
2396 {
2397         GETATTR4args *args = &argop->nfs_argop4_u.opgetattr;
2398         GETATTR4res *resp = &resop->nfs_resop4_u.opgetattr;
2399         struct nfs4_svgetit_arg sarg;
2400         struct statvfs64 sb;
2401         nfsstat4 status;
2402 
2403         DTRACE_NFSV4_2(op__getattr__start, struct compound_state *, cs,
2404             GETATTR4args *, args);
2405 
2406         if (cs->vp == NULL) {
2407                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2408                 goto out;
2409         }
2410 
2411         if (cs->access == CS_ACCESS_DENIED) {
2412                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
2413                 goto out;
2414         }
2415 
2416         sarg.sbp = &sb;
2417         sarg.cs = cs;
2418         sarg.is_referral = B_FALSE;
2419 
2420         status = bitmap4_to_attrmask(args->attr_request, &sarg);
2421         if (status == NFS4_OK) {
2422 
2423                 status = bitmap4_get_sysattrs(&sarg);
2424                 if (status == NFS4_OK) {
2425 
2426                         /* Is this a referral? */
2427                         if (vn_is_nfs_reparse(cs->vp, cs->cr)) {
2428                                 /* Older V4 Solaris client sees a link */
2429                                 if (client_is_downrev(req))
2430                                         sarg.vap->va_type = VLNK;
2431                                 else
2432                                         sarg.is_referral = B_TRUE;
2433                         }
2434 
2435                         status = do_rfs4_op_getattr(args->attr_request,
2436                             &resp->obj_attributes, &sarg);
2437                 }
2438         }
2439         *cs->statusp = resp->status = status;
2440 out:
2441         DTRACE_NFSV4_2(op__getattr__done, struct compound_state *, cs,
2442             GETATTR4res *, resp);
2443 }
2444 
2445 static void
2446 rfs4_op_getattr_free(nfs_resop4 *resop)
2447 {
2448         GETATTR4res *resp = &resop->nfs_resop4_u.opgetattr;
2449 
2450         nfs4_fattr4_free(&resp->obj_attributes);
2451 }
2452 
2453 /* ARGSUSED */
2454 static void
2455 rfs4_op_getfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2456     struct compound_state *cs)
2457 {
2458         GETFH4res *resp = &resop->nfs_resop4_u.opgetfh;
2459 
2460         DTRACE_NFSV4_1(op__getfh__start, struct compound_state *, cs);
2461 
2462         if (cs->vp == NULL) {
2463                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2464                 goto out;
2465         }
2466         if (cs->access == CS_ACCESS_DENIED) {
2467                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
2468                 goto out;
2469         }
2470 
2471         /* check for reparse point at the share point */
2472         if (cs->exi->exi_moved || vn_is_nfs_reparse(cs->exi->exi_vp, cs->cr)) {
2473                 /* it's all bad */
2474                 cs->exi->exi_moved = 1;
2475                 *cs->statusp = resp->status = NFS4ERR_MOVED;
2476                 DTRACE_PROBE2(nfs4serv__func__referral__shared__moved,
2477                     vnode_t *, cs->vp, char *, "rfs4_op_getfh");
2478                 return;
2479         }
2480 
2481         /* check for reparse point at vp */
2482         if (vn_is_nfs_reparse(cs->vp, cs->cr) && !client_is_downrev(req)) {
2483                 /* it's not all bad */
2484                 *cs->statusp = resp->status = NFS4ERR_MOVED;
2485                 DTRACE_PROBE2(nfs4serv__func__referral__moved,
2486                     vnode_t *, cs->vp, char *, "rfs4_op_getfh");
2487                 return;
2488         }
2489 
2490         resp->object.nfs_fh4_val =
2491             kmem_alloc(cs->fh.nfs_fh4_len, KM_SLEEP);
2492         nfs_fh4_copy(&cs->fh, &resp->object);
2493         *cs->statusp = resp->status = NFS4_OK;
2494 out:
2495         DTRACE_NFSV4_2(op__getfh__done, struct compound_state *, cs,
2496             GETFH4res *, resp);
2497 }
2498 
2499 static void
2500 rfs4_op_getfh_free(nfs_resop4 *resop)
2501 {
2502         GETFH4res *resp = &resop->nfs_resop4_u.opgetfh;
2503 
2504         if (resp->status == NFS4_OK &&
2505             resp->object.nfs_fh4_val != NULL) {
2506                 kmem_free(resp->object.nfs_fh4_val, resp->object.nfs_fh4_len);
2507                 resp->object.nfs_fh4_val = NULL;
2508                 resp->object.nfs_fh4_len = 0;
2509         }
2510 }
2511 
2512 /*
2513  * illegal: args: void
2514  *          res : status (NFS4ERR_OP_ILLEGAL)
2515  */
2516 /* ARGSUSED */
2517 static void
2518 rfs4_op_illegal(nfs_argop4 *argop, nfs_resop4 *resop,
2519     struct svc_req *req, struct compound_state *cs)
2520 {
2521         ILLEGAL4res *resp = &resop->nfs_resop4_u.opillegal;
2522 
2523         resop->resop = OP_ILLEGAL;
2524         *cs->statusp = resp->status = NFS4ERR_OP_ILLEGAL;
2525 }
2526 
2527 /*
2528  * link: args: SAVED_FH: file, CURRENT_FH: target directory
2529  *       res: status. If success - CURRENT_FH unchanged, return change_info
2530  */
2531 /* ARGSUSED */
2532 static void
2533 rfs4_op_link(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2534     struct compound_state *cs)
2535 {
2536         LINK4args *args = &argop->nfs_argop4_u.oplink;
2537         LINK4res *resp = &resop->nfs_resop4_u.oplink;
2538         int error;
2539         vnode_t *vp;
2540         vnode_t *dvp;
2541         struct vattr bdva, idva, adva;
2542         char *nm;
2543         uint_t  len;
2544         struct sockaddr *ca;
2545         char *name = NULL;
2546         nfsstat4 status;
2547 
2548         DTRACE_NFSV4_2(op__link__start, struct compound_state *, cs,
2549             LINK4args *, args);
2550 
2551         /* SAVED_FH: source object */
2552         vp = cs->saved_vp;
2553         if (vp == NULL) {
2554                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2555                 goto out;
2556         }
2557 
2558         /* CURRENT_FH: target directory */
2559         dvp = cs->vp;
2560         if (dvp == NULL) {
2561                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2562                 goto out;
2563         }
2564 
2565         /*
2566          * If there is a non-shared filesystem mounted on this vnode,
2567          * do not allow to link any file in this directory.
2568          */
2569         if (vn_ismntpt(dvp)) {
2570                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
2571                 goto out;
2572         }
2573 
2574         if (cs->access == CS_ACCESS_DENIED) {
2575                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
2576                 goto out;
2577         }
2578 
2579         /* Check source object's type validity */
2580         if (vp->v_type == VDIR) {
2581                 *cs->statusp = resp->status = NFS4ERR_ISDIR;
2582                 goto out;
2583         }
2584 
2585         /* Check target directory's type */
2586         if (dvp->v_type != VDIR) {
2587                 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
2588                 goto out;
2589         }
2590 
2591         if (cs->saved_exi != cs->exi) {
2592                 *cs->statusp = resp->status = NFS4ERR_XDEV;
2593                 goto out;
2594         }
2595 
2596         status = utf8_dir_verify(&args->newname);
2597         if (status != NFS4_OK) {
2598                 *cs->statusp = resp->status = status;
2599                 goto out;
2600         }
2601 
2602         nm = utf8_to_fn(&args->newname, &len, NULL);
2603         if (nm == NULL) {
2604                 *cs->statusp = resp->status = NFS4ERR_INVAL;
2605                 goto out;
2606         }
2607 
2608         if (len > MAXNAMELEN) {
2609                 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
2610                 kmem_free(nm, len);
2611                 goto out;
2612         }
2613 
2614         if (rdonly4(req, cs)) {
2615                 *cs->statusp = resp->status = NFS4ERR_ROFS;
2616                 kmem_free(nm, len);
2617                 goto out;
2618         }
2619 
2620         /* Get "before" change value */
2621         bdva.va_mask = AT_CTIME|AT_SEQ;
2622         error = VOP_GETATTR(dvp, &bdva, 0, cs->cr, NULL);
2623         if (error) {
2624                 *cs->statusp = resp->status = puterrno4(error);
2625                 kmem_free(nm, len);
2626                 goto out;
2627         }
2628 
2629         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2630         name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
2631             MAXPATHLEN  + 1);
2632 
2633         if (name == NULL) {
2634                 *cs->statusp = resp->status = NFS4ERR_INVAL;
2635                 kmem_free(nm, len);
2636                 goto out;
2637         }
2638 
2639         NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bdva.va_ctime)
2640 
2641         error = VOP_LINK(dvp, vp, name, cs->cr, NULL, 0);
2642 
2643         if (nm != name)
2644                 kmem_free(name, MAXPATHLEN + 1);
2645         kmem_free(nm, len);
2646 
2647         /*
2648          * Get the initial "after" sequence number, if it fails, set to zero
2649          */
2650         idva.va_mask = AT_SEQ;
2651         if (VOP_GETATTR(dvp, &idva, 0, cs->cr, NULL))
2652                 idva.va_seq = 0;
2653 
2654         /*
2655          * Force modified data and metadata out to stable storage.
2656          */
2657         (void) VOP_FSYNC(vp, FNODSYNC, cs->cr, NULL);
2658         (void) VOP_FSYNC(dvp, 0, cs->cr, NULL);
2659 
2660         if (error) {
2661                 *cs->statusp = resp->status = puterrno4(error);
2662                 goto out;
2663         }
2664 
2665         /*
2666          * Get "after" change value, if it fails, simply return the
2667          * before value.
2668          */
2669         adva.va_mask = AT_CTIME|AT_SEQ;
2670         if (VOP_GETATTR(dvp, &adva, 0, cs->cr, NULL)) {
2671                 adva.va_ctime = bdva.va_ctime;
2672                 adva.va_seq = 0;
2673         }
2674 
2675         NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, adva.va_ctime)
2676 
2677         /*
2678          * The cinfo.atomic = TRUE only if we have
2679          * non-zero va_seq's, and it has incremented by exactly one
2680          * during the VOP_LINK and it didn't change during the VOP_FSYNC.
2681          */
2682         if (bdva.va_seq && idva.va_seq && adva.va_seq &&
2683             idva.va_seq == (bdva.va_seq + 1) && idva.va_seq == adva.va_seq)
2684                 resp->cinfo.atomic = TRUE;
2685         else
2686                 resp->cinfo.atomic = FALSE;
2687 
2688         *cs->statusp = resp->status = NFS4_OK;
2689 out:
2690         DTRACE_NFSV4_2(op__link__done, struct compound_state *, cs,
2691             LINK4res *, resp);
2692 }
2693 
2694 /*
2695  * Used by rfs4_op_lookup and rfs4_op_lookupp to do the actual work.
2696  */
2697 
2698 /* ARGSUSED */
2699 static nfsstat4
2700 do_rfs4_op_lookup(char *nm, struct svc_req *req, struct compound_state *cs)
2701 {
2702         int error;
2703         int different_export = 0;
2704         vnode_t *vp, *pre_tvp = NULL, *oldvp = NULL;
2705         struct exportinfo *exi = NULL, *pre_exi = NULL;
2706         nfsstat4 stat;
2707         fid_t fid;
2708         int attrdir, dotdot, walk;
2709         bool_t is_newvp = FALSE;
2710 
2711         if (cs->vp->v_flag & V_XATTRDIR) {
2712                 attrdir = 1;
2713                 ASSERT(get_fh4_flag(&cs->fh, FH4_ATTRDIR));
2714         } else {
2715                 attrdir = 0;
2716                 ASSERT(! get_fh4_flag(&cs->fh, FH4_ATTRDIR));
2717         }
2718 
2719         dotdot = (nm[0] == '.' && nm[1] == '.' && nm[2] == '\0');
2720 
2721         /*
2722          * If dotdotting, then need to check whether it's
2723          * above the root of a filesystem, or above an
2724          * export point.
2725          */
2726         if (dotdot) {
2727 
2728                 /*
2729                  * If dotdotting at the root of a filesystem, then
2730                  * need to traverse back to the mounted-on filesystem
2731                  * and do the dotdot lookup there.
2732                  */
2733                 if (cs->vp->v_flag & VROOT) {
2734 
2735                         /*
2736                          * If at the system root, then can
2737                          * go up no further.
2738                          */
2739                         if (VN_CMP(cs->vp, ZONE_ROOTVP()))
2740                                 return (puterrno4(ENOENT));
2741 
2742                         /*
2743                          * Traverse back to the mounted-on filesystem
2744                          */
2745                         cs->vp = untraverse(cs->vp);
2746 
2747                         /*
2748                          * Set the different_export flag so we remember
2749                          * to pick up a new exportinfo entry for
2750                          * this new filesystem.
2751                          */
2752                         different_export = 1;
2753                 } else {
2754 
2755                         /*
2756                          * If dotdotting above an export point then set
2757                          * the different_export to get new export info.
2758                          */
2759                         different_export = nfs_exported(cs->exi, cs->vp);
2760                 }
2761         }
2762 
2763         error = VOP_LOOKUP(cs->vp, nm, &vp, NULL, 0, NULL, cs->cr,
2764             NULL, NULL, NULL);
2765         if (error)
2766                 return (puterrno4(error));
2767 
2768         /*
2769          * If the vnode is in a pseudo filesystem, check whether it is visible.
2770          *
2771          * XXX if the vnode is a symlink and it is not visible in
2772          * a pseudo filesystem, return ENOENT (not following symlink).
2773          * V4 client can not mount such symlink. This is a regression
2774          * from V2/V3.
2775          *
2776          * In the same exported filesystem, if the security flavor used
2777          * is not an explicitly shared flavor, limit the view to the visible
2778          * list entries only. This is not a WRONGSEC case because it's already
2779          * checked via PUTROOTFH/PUTPUBFH or PUTFH.
2780          */
2781         if (!different_export &&
2782             (PSEUDO(cs->exi) || ! is_exported_sec(cs->nfsflavor, cs->exi) ||
2783             cs->access & CS_ACCESS_LIMITED)) {
2784                 if (! nfs_visible(cs->exi, vp, &different_export)) {
2785                         VN_RELE(vp);
2786                         return (puterrno4(ENOENT));
2787                 }
2788         }
2789 
2790         /*
2791          * If it's a mountpoint, then traverse it.
2792          */
2793         if (vn_ismntpt(vp)) {
2794                 pre_exi = cs->exi;   /* save pre-traversed exportinfo */
2795                 pre_tvp = vp;           /* save pre-traversed vnode     */
2796 
2797                 /*
2798                  * hold pre_tvp to counteract rele by traverse.  We will
2799                  * need pre_tvp below if checkexport4 fails
2800                  */
2801                 VN_HOLD(pre_tvp);
2802                 if ((error = traverse(&vp)) != 0) {
2803                         VN_RELE(vp);
2804                         VN_RELE(pre_tvp);
2805                         return (puterrno4(error));
2806                 }
2807                 different_export = 1;
2808         } else if (vp->v_vfsp != cs->vp->v_vfsp) {
2809                 /*
2810                  * The vfsp comparison is to handle the case where
2811                  * a LOFS mount is shared.  lo_lookup traverses mount points,
2812                  * and NFS is unaware of local fs transistions because
2813                  * v_vfsmountedhere isn't set.  For this special LOFS case,
2814                  * the dir and the obj returned by lookup will have different
2815                  * vfs ptrs.
2816                  */
2817                 different_export = 1;
2818         }
2819 
2820         if (different_export) {
2821 
2822                 bzero(&fid, sizeof (fid));
2823                 fid.fid_len = MAXFIDSZ;
2824                 error = vop_fid_pseudo(vp, &fid);
2825                 if (error) {
2826                         VN_RELE(vp);
2827                         if (pre_tvp)
2828                                 VN_RELE(pre_tvp);
2829                         return (puterrno4(error));
2830                 }
2831 
2832                 if (dotdot)
2833                         exi = nfs_vptoexi(NULL, vp, cs->cr, &walk, NULL, TRUE);
2834                 else
2835                         exi = checkexport4(&vp->v_vfsp->vfs_fsid, &fid, vp);
2836 
2837                 if (exi == NULL) {
2838                         if (pre_tvp) {
2839                                 /*
2840                                  * If this vnode is a mounted-on vnode,
2841                                  * but the mounted-on file system is not
2842                                  * exported, send back the filehandle for
2843                                  * the mounted-on vnode, not the root of
2844                                  * the mounted-on file system.
2845                                  */
2846                                 VN_RELE(vp);
2847                                 vp = pre_tvp;
2848                                 exi = pre_exi;
2849                         } else {
2850                                 VN_RELE(vp);
2851                                 return (puterrno4(EACCES));
2852                         }
2853                 } else if (pre_tvp) {
2854                         /* we're done with pre_tvp now. release extra hold */
2855                         VN_RELE(pre_tvp);
2856                 }
2857 
2858                 cs->exi = exi;
2859 
2860                 /*
2861                  * Now we do a checkauth4. The reason is that
2862                  * this client/user may not have access to the new
2863                  * exported file system, and if they do,
2864                  * the client/user may be mapped to a different uid.
2865                  *
2866                  * We start with a new cr, because the checkauth4 done
2867                  * in the PUT*FH operation over wrote the cred's uid,
2868                  * gid, etc, and we want the real thing before calling
2869                  * checkauth4()
2870                  */
2871                 crfree(cs->cr);
2872                 cs->cr = crdup(cs->basecr);
2873 
2874                 oldvp = cs->vp;
2875                 cs->vp = vp;
2876                 is_newvp = TRUE;
2877 
2878                 stat = call_checkauth4(cs, req);
2879                 if (stat != NFS4_OK) {
2880                         VN_RELE(cs->vp);
2881                         cs->vp = oldvp;
2882                         return (stat);
2883                 }
2884         }
2885 
2886         /*
2887          * After various NFS checks, do a label check on the path
2888          * component. The label on this path should either be the
2889          * global zone's label or a zone's label. We are only
2890          * interested in the zone's label because exported files
2891          * in global zone is accessible (though read-only) to
2892          * clients. The exportability/visibility check is already
2893          * done before reaching this code.
2894          */
2895         if (is_system_labeled()) {
2896                 bslabel_t *clabel;
2897 
2898                 ASSERT(req->rq_label != NULL);
2899                 clabel = req->rq_label;
2900                 DTRACE_PROBE2(tx__rfs4__log__info__oplookup__clabel, char *,
2901                     "got client label from request(1)", struct svc_req *, req);
2902 
2903                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2904                         if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
2905                             cs->exi)) {
2906                                 error = EACCES;
2907                                 goto err_out;
2908                         }
2909                 } else {
2910                         /*
2911                          * We grant access to admin_low label clients
2912                          * only if the client is trusted, i.e. also
2913                          * running Solaris Trusted Extension.
2914                          */
2915                         struct sockaddr *ca;
2916                         int             addr_type;
2917                         void            *ipaddr;
2918                         tsol_tpc_t      *tp;
2919 
2920                         ca = (struct sockaddr *)svc_getrpccaller(
2921                             req->rq_xprt)->buf;
2922                         if (ca->sa_family == AF_INET) {
2923                                 addr_type = IPV4_VERSION;
2924                                 ipaddr = &((struct sockaddr_in *)ca)->sin_addr;
2925                         } else if (ca->sa_family == AF_INET6) {
2926                                 addr_type = IPV6_VERSION;
2927                                 ipaddr = &((struct sockaddr_in6 *)
2928                                     ca)->sin6_addr;
2929                         }
2930                         tp = find_tpc(ipaddr, addr_type, B_FALSE);
2931                         if (tp == NULL || tp->tpc_tp.tp_doi !=
2932                             l_admin_low->tsl_doi || tp->tpc_tp.host_type !=
2933                             SUN_CIPSO) {
2934                                 if (tp != NULL)
2935                                         TPC_RELE(tp);
2936                                 error = EACCES;
2937                                 goto err_out;
2938                         }
2939                         TPC_RELE(tp);
2940                 }
2941         }
2942 
2943         error = makefh4(&cs->fh, vp, cs->exi);
2944 
2945 err_out:
2946         if (error) {
2947                 if (is_newvp) {
2948                         VN_RELE(cs->vp);
2949                         cs->vp = oldvp;
2950                 } else
2951                         VN_RELE(vp);
2952                 return (puterrno4(error));
2953         }
2954 
2955         if (!is_newvp) {
2956                 if (cs->vp)
2957                         VN_RELE(cs->vp);
2958                 cs->vp = vp;
2959         } else if (oldvp)
2960                 VN_RELE(oldvp);
2961 
2962         /*
2963          * if did lookup on attrdir and didn't lookup .., set named
2964          * attr fh flag
2965          */
2966         if (attrdir && ! dotdot)
2967                 set_fh4_flag(&cs->fh, FH4_NAMEDATTR);
2968 
2969         /* Assume false for now, open proc will set this */
2970         cs->mandlock = FALSE;
2971 
2972         return (NFS4_OK);
2973 }
2974 
2975 /* ARGSUSED */
2976 static void
2977 rfs4_op_lookup(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2978     struct compound_state *cs)
2979 {
2980         LOOKUP4args *args = &argop->nfs_argop4_u.oplookup;
2981         LOOKUP4res *resp = &resop->nfs_resop4_u.oplookup;
2982         char *nm;
2983         uint_t len;
2984         struct sockaddr *ca;
2985         char *name = NULL;
2986         nfsstat4 status;
2987 
2988         DTRACE_NFSV4_2(op__lookup__start, struct compound_state *, cs,
2989             LOOKUP4args *, args);
2990 
2991         if (cs->vp == NULL) {
2992                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2993                 goto out;
2994         }
2995 
2996         if (cs->vp->v_type == VLNK) {
2997                 *cs->statusp = resp->status = NFS4ERR_SYMLINK;
2998                 goto out;
2999         }
3000 
3001         if (cs->vp->v_type != VDIR) {
3002                 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
3003                 goto out;
3004         }
3005 
3006         status = utf8_dir_verify(&args->objname);
3007         if (status != NFS4_OK) {
3008                 *cs->statusp = resp->status = status;
3009                 goto out;
3010         }
3011 
3012         nm = utf8_to_str(&args->objname, &len, NULL);
3013         if (nm == NULL) {
3014                 *cs->statusp = resp->status = NFS4ERR_INVAL;
3015                 goto out;
3016         }
3017 
3018         if (len > MAXNAMELEN) {
3019                 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
3020                 kmem_free(nm, len);
3021                 goto out;
3022         }
3023 
3024         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
3025         name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
3026             MAXPATHLEN  + 1);
3027 
3028         if (name == NULL) {
3029                 *cs->statusp = resp->status = NFS4ERR_INVAL;
3030                 kmem_free(nm, len);
3031                 goto out;
3032         }
3033 
3034         *cs->statusp = resp->status = do_rfs4_op_lookup(name, req, cs);
3035 
3036         if (name != nm)
3037                 kmem_free(name, MAXPATHLEN + 1);
3038         kmem_free(nm, len);
3039 
3040 out:
3041         DTRACE_NFSV4_2(op__lookup__done, struct compound_state *, cs,
3042             LOOKUP4res *, resp);
3043 }
3044 
3045 /* ARGSUSED */
3046 static void
3047 rfs4_op_lookupp(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req,
3048     struct compound_state *cs)
3049 {
3050         LOOKUPP4res *resp = &resop->nfs_resop4_u.oplookupp;
3051 
3052         DTRACE_NFSV4_1(op__lookupp__start, struct compound_state *, cs);
3053 
3054         if (cs->vp == NULL) {
3055                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3056                 goto out;
3057         }
3058 
3059         if (cs->vp->v_type != VDIR) {
3060                 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
3061                 goto out;
3062         }
3063 
3064         *cs->statusp = resp->status = do_rfs4_op_lookup("..", req, cs);
3065 
3066         /*
3067          * From NFSV4 Specification, LOOKUPP should not check for
3068          * NFS4ERR_WRONGSEC. Retrun NFS4_OK instead.
3069          */
3070         if (resp->status == NFS4ERR_WRONGSEC) {
3071                 *cs->statusp = resp->status = NFS4_OK;
3072         }
3073 
3074 out:
3075         DTRACE_NFSV4_2(op__lookupp__done, struct compound_state *, cs,
3076             LOOKUPP4res *, resp);
3077 }
3078 
3079 
3080 /*ARGSUSED2*/
3081 static void
3082 rfs4_op_openattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3083     struct compound_state *cs)
3084 {
3085         OPENATTR4args   *args = &argop->nfs_argop4_u.opopenattr;
3086         OPENATTR4res    *resp = &resop->nfs_resop4_u.opopenattr;
3087         vnode_t         *avp = NULL;
3088         int             lookup_flags = LOOKUP_XATTR, error;
3089         int             exp_ro = 0;
3090 
3091         DTRACE_NFSV4_2(op__openattr__start, struct compound_state *, cs,
3092             OPENATTR4args *, args);
3093 
3094         if (cs->vp == NULL) {
3095                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3096                 goto out;
3097         }
3098 
3099         if ((cs->vp->v_vfsp->vfs_flag & VFS_XATTR) == 0 &&
3100             !vfs_has_feature(cs->vp->v_vfsp, VFSFT_SYSATTR_VIEWS)) {
3101                 *cs->statusp = resp->status = puterrno4(ENOTSUP);
3102                 goto out;
3103         }
3104 
3105         /*
3106          * If file system supports passing ACE mask to VOP_ACCESS then
3107          * check for ACE_READ_NAMED_ATTRS, otherwise do legacy checks
3108          */
3109 
3110         if (vfs_has_feature(cs->vp->v_vfsp, VFSFT_ACEMASKONACCESS))
3111                 error = VOP_ACCESS(cs->vp, ACE_READ_NAMED_ATTRS,
3112                     V_ACE_MASK, cs->cr, NULL);
3113         else
3114                 error = ((VOP_ACCESS(cs->vp, VREAD, 0, cs->cr, NULL) != 0) &&
3115                     (VOP_ACCESS(cs->vp, VWRITE, 0, cs->cr, NULL) != 0) &&
3116                     (VOP_ACCESS(cs->vp, VEXEC, 0, cs->cr, NULL) != 0));
3117 
3118         if (error) {
3119                 *cs->statusp = resp->status = puterrno4(EACCES);
3120                 goto out;
3121         }
3122 
3123         /*
3124          * The CREATE_XATTR_DIR VOP flag cannot be specified if
3125          * the file system is exported read-only -- regardless of
3126          * createdir flag.  Otherwise the attrdir would be created
3127          * (assuming server fs isn't mounted readonly locally).  If
3128          * VOP_LOOKUP returns ENOENT in this case, the error will
3129          * be translated into EROFS.  ENOSYS is mapped to ENOTSUP
3130          * because specfs has no VOP_LOOKUP op, so the macro would
3131          * return ENOSYS.  EINVAL is returned by all (current)
3132          * Solaris file system implementations when any of their
3133          * restrictions are violated (xattr(dir) can't have xattrdir).
3134          * Returning NOTSUPP is more appropriate in this case
3135          * because the object will never be able to have an attrdir.
3136          */
3137         if (args->createdir && ! (exp_ro = rdonly4(req, cs)))
3138                 lookup_flags |= CREATE_XATTR_DIR;
3139 
3140         error = VOP_LOOKUP(cs->vp, "", &avp, NULL, lookup_flags, NULL, cs->cr,
3141             NULL, NULL, NULL);
3142 
3143         if (error) {
3144                 if (error == ENOENT && args->createdir && exp_ro)
3145                         *cs->statusp = resp->status = puterrno4(EROFS);
3146                 else if (error == EINVAL || error == ENOSYS)
3147                         *cs->statusp = resp->status = puterrno4(ENOTSUP);
3148                 else
3149                         *cs->statusp = resp->status = puterrno4(error);
3150                 goto out;
3151         }
3152 
3153         ASSERT(avp->v_flag & V_XATTRDIR);
3154 
3155         error = makefh4(&cs->fh, avp, cs->exi);
3156 
3157         if (error) {
3158                 VN_RELE(avp);
3159                 *cs->statusp = resp->status = puterrno4(error);
3160                 goto out;
3161         }
3162 
3163         VN_RELE(cs->vp);
3164         cs->vp = avp;
3165 
3166         /*
3167          * There is no requirement for an attrdir fh flag
3168          * because the attrdir has a vnode flag to distinguish
3169          * it from regular (non-xattr) directories.  The
3170          * FH4_ATTRDIR flag is set for future sanity checks.
3171          */
3172         set_fh4_flag(&cs->fh, FH4_ATTRDIR);
3173         *cs->statusp = resp->status = NFS4_OK;
3174 
3175 out:
3176         DTRACE_NFSV4_2(op__openattr__done, struct compound_state *, cs,
3177             OPENATTR4res *, resp);
3178 }
3179 
3180 static int
3181 do_io(int direction, vnode_t *vp, struct uio *uio, int ioflag, cred_t *cred,
3182     caller_context_t *ct)
3183 {
3184         int error;
3185         int i;
3186         clock_t delaytime;
3187 
3188         delaytime = MSEC_TO_TICK_ROUNDUP(rfs4_lock_delay);
3189 
3190         /*
3191          * Don't block on mandatory locks. If this routine returns
3192          * EAGAIN, the caller should return NFS4ERR_LOCKED.
3193          */
3194         uio->uio_fmode = FNONBLOCK;
3195 
3196         for (i = 0; i < rfs4_maxlock_tries; i++) {
3197 
3198 
3199                 if (direction == FREAD) {
3200                         (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, ct);
3201                         error = VOP_READ(vp, uio, ioflag, cred, ct);
3202                         VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, ct);
3203                 } else {
3204                         (void) VOP_RWLOCK(vp, V_WRITELOCK_TRUE, ct);
3205                         error = VOP_WRITE(vp, uio, ioflag, cred, ct);
3206                         VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, ct);
3207                 }
3208 
3209                 if (error != EAGAIN)
3210                         break;
3211 
3212                 if (i < rfs4_maxlock_tries - 1) {
3213                         delay(delaytime);
3214                         delaytime *= 2;
3215                 }
3216         }
3217 
3218         return (error);
3219 }
3220 
3221 /* ARGSUSED */
3222 static void
3223 rfs4_op_read(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3224     struct compound_state *cs)
3225 {
3226         READ4args *args = &argop->nfs_argop4_u.opread;
3227         READ4res *resp = &resop->nfs_resop4_u.opread;
3228         int error;
3229         int verror;
3230         vnode_t *vp;
3231         struct vattr va;
3232         struct iovec iov, *iovp = NULL;
3233         int iovcnt;
3234         struct uio uio;
3235         u_offset_t offset;
3236         bool_t *deleg = &cs->deleg;
3237         nfsstat4 stat;
3238         int in_crit = 0;
3239         mblk_t *mp = NULL;
3240         int alloc_err = 0;
3241         int rdma_used = 0;
3242         int loaned_buffers;
3243         caller_context_t ct;
3244         struct uio *uiop;
3245 
3246         DTRACE_NFSV4_2(op__read__start, struct compound_state *, cs,
3247             READ4args, args);
3248 
3249         vp = cs->vp;
3250         if (vp == NULL) {
3251                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3252                 goto out;
3253         }
3254         if (cs->access == CS_ACCESS_DENIED) {
3255                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
3256                 goto out;
3257         }
3258 
3259         if ((stat = rfs4_check_stateid(FREAD, vp, &args->stateid, FALSE,
3260             deleg, TRUE, &ct)) != NFS4_OK) {
3261                 *cs->statusp = resp->status = stat;
3262                 goto out;
3263         }
3264 
3265         /*
3266          * Enter the critical region before calling VOP_RWLOCK
3267          * to avoid a deadlock with write requests.
3268          */
3269         if (nbl_need_check(vp)) {
3270                 nbl_start_crit(vp, RW_READER);
3271                 in_crit = 1;
3272                 if (nbl_conflict(vp, NBL_READ, args->offset, args->count, 0,
3273                     &ct)) {
3274                         *cs->statusp = resp->status = NFS4ERR_LOCKED;
3275                         goto out;
3276                 }
3277         }
3278 
3279         if (args->wlist) {
3280                 if (args->count > clist_len(args->wlist)) {
3281                         *cs->statusp = resp->status = NFS4ERR_INVAL;
3282                         goto out;
3283                 }
3284                 rdma_used = 1;
3285         }
3286 
3287         /* use loaned buffers for TCP */
3288         loaned_buffers = (nfs_loaned_buffers && !rdma_used) ? 1 : 0;
3289 
3290         va.va_mask = AT_MODE|AT_SIZE|AT_UID;
3291         verror = VOP_GETATTR(vp, &va, 0, cs->cr, &ct);
3292 
3293         /*
3294          * If we can't get the attributes, then we can't do the
3295          * right access checking.  So, we'll fail the request.
3296          */
3297         if (verror) {
3298                 *cs->statusp = resp->status = puterrno4(verror);
3299                 goto out;
3300         }
3301 
3302         if (vp->v_type != VREG) {
3303                 *cs->statusp = resp->status =
3304                     ((vp->v_type == VDIR) ? NFS4ERR_ISDIR : NFS4ERR_INVAL);
3305                 goto out;
3306         }
3307 
3308         if (crgetuid(cs->cr) != va.va_uid &&
3309             (error = VOP_ACCESS(vp, VREAD, 0, cs->cr, &ct)) &&
3310             (error = VOP_ACCESS(vp, VEXEC, 0, cs->cr, &ct))) {
3311                 *cs->statusp = resp->status = puterrno4(error);
3312                 goto out;
3313         }
3314 
3315         if (MANDLOCK(vp, va.va_mode)) { /* XXX - V4 supports mand locking */
3316                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
3317                 goto out;
3318         }
3319 
3320         offset = args->offset;
3321         if (offset >= va.va_size) {
3322                 *cs->statusp = resp->status = NFS4_OK;
3323                 resp->eof = TRUE;
3324                 resp->data_len = 0;
3325                 resp->data_val = NULL;
3326                 resp->mblk = NULL;
3327                 /* RDMA */
3328                 resp->wlist = args->wlist;
3329                 resp->wlist_len = resp->data_len;
3330                 *cs->statusp = resp->status = NFS4_OK;
3331                 if (resp->wlist)
3332                         clist_zero_len(resp->wlist);
3333                 goto out;
3334         }
3335 
3336         if (args->count == 0) {
3337                 *cs->statusp = resp->status = NFS4_OK;
3338                 resp->eof = FALSE;
3339                 resp->data_len = 0;
3340                 resp->data_val = NULL;
3341                 resp->mblk = NULL;
3342                 /* RDMA */
3343                 resp->wlist = args->wlist;
3344                 resp->wlist_len = resp->data_len;
3345                 if (resp->wlist)
3346                         clist_zero_len(resp->wlist);
3347                 goto out;
3348         }
3349 
3350         /*
3351          * Do not allocate memory more than maximum allowed
3352          * transfer size
3353          */
3354         if (args->count > rfs4_tsize(req))
3355                 args->count = rfs4_tsize(req);
3356 
3357         if (loaned_buffers) {
3358                 uiop = (uio_t *)rfs_setup_xuio(vp);
3359                 ASSERT(uiop != NULL);
3360                 uiop->uio_segflg = UIO_SYSSPACE;
3361                 uiop->uio_loffset = args->offset;
3362                 uiop->uio_resid = args->count;
3363 
3364                 /* Jump to do the read if successful */
3365                 if (!VOP_REQZCBUF(vp, UIO_READ, (xuio_t *)uiop, cs->cr, &ct)) {
3366                         /*
3367                          * Need to hold the vnode until after VOP_RETZCBUF()
3368                          * is called.
3369                          */
3370                         VN_HOLD(vp);
3371                         goto doio_read;
3372                 }
3373 
3374                 DTRACE_PROBE2(nfss__i__reqzcbuf_failed, int,
3375                     uiop->uio_loffset, int, uiop->uio_resid);
3376 
3377                 uiop->uio_extflg = 0;
3378 
3379                 /* failure to setup for zero copy */
3380                 rfs_free_xuio((void *)uiop);
3381                 loaned_buffers = 0;
3382         }
3383 
3384         /*
3385          * If returning data via RDMA Write, then grab the chunk list. If we
3386          * aren't returning READ data w/RDMA_WRITE, then grab a mblk.
3387          */
3388         if (rdma_used) {
3389                 mp = NULL;
3390                 (void) rdma_get_wchunk(req, &iov, args->wlist);
3391                 uio.uio_iov = &iov;
3392                 uio.uio_iovcnt = 1;
3393         } else {
3394                 /*
3395                  * mp will contain the data to be sent out in the read reply.
3396                  * It will be freed after the reply has been sent.
3397                  */
3398                 mp = rfs_read_alloc(args->count, &iovp, &iovcnt);
3399                 ASSERT(mp != NULL);
3400                 ASSERT(alloc_err == 0);
3401                 uio.uio_iov = iovp;
3402                 uio.uio_iovcnt = iovcnt;
3403         }
3404 
3405         uio.uio_segflg = UIO_SYSSPACE;
3406         uio.uio_extflg = UIO_COPY_CACHED;
3407         uio.uio_loffset = args->offset;
3408         uio.uio_resid = args->count;
3409         uiop = &uio;
3410 
3411 doio_read:
3412         error = do_io(FREAD, vp, uiop, 0, cs->cr, &ct);
3413 
3414         va.va_mask = AT_SIZE;
3415         verror = VOP_GETATTR(vp, &va, 0, cs->cr, &ct);
3416 
3417         if (error) {
3418                 if (mp)
3419                         freemsg(mp);
3420                 *cs->statusp = resp->status = puterrno4(error);
3421                 goto out;
3422         }
3423 
3424         /* make mblk using zc buffers */
3425         if (loaned_buffers) {
3426                 mp = uio_to_mblk(uiop);
3427                 ASSERT(mp != NULL);
3428         }
3429 
3430         *cs->statusp = resp->status = NFS4_OK;
3431 
3432         ASSERT(uiop->uio_resid >= 0);
3433         resp->data_len = args->count - uiop->uio_resid;
3434         if (mp) {
3435                 resp->data_val = (char *)mp->b_datap->db_base;
3436                 rfs_rndup_mblks(mp, resp->data_len, loaned_buffers);
3437         } else {
3438                 resp->data_val = (caddr_t)iov.iov_base;
3439         }
3440 
3441         resp->mblk = mp;
3442 
3443         if (!verror && offset + resp->data_len == va.va_size)
3444                 resp->eof = TRUE;
3445         else
3446                 resp->eof = FALSE;
3447 
3448         if (rdma_used) {
3449                 if (!rdma_setup_read_data4(args, resp)) {
3450                         *cs->statusp = resp->status = NFS4ERR_INVAL;
3451                 }
3452         } else {
3453                 resp->wlist = NULL;
3454         }
3455 
3456 out:
3457         if (in_crit)
3458                 nbl_end_crit(vp);
3459 
3460         if (iovp != NULL)
3461                 kmem_free(iovp, iovcnt * sizeof (struct iovec));
3462 
3463         DTRACE_NFSV4_2(op__read__done, struct compound_state *, cs,
3464             READ4res *, resp);
3465 }
3466 
3467 static void
3468 rfs4_op_read_free(nfs_resop4 *resop)
3469 {
3470         READ4res        *resp = &resop->nfs_resop4_u.opread;
3471 
3472         if (resp->status == NFS4_OK && resp->mblk != NULL) {
3473                 freemsg(resp->mblk);
3474                 resp->mblk = NULL;
3475                 resp->data_val = NULL;
3476                 resp->data_len = 0;
3477         }
3478 }
3479 
3480 static void
3481 rfs4_op_readdir_free(nfs_resop4 * resop)
3482 {
3483         READDIR4res    *resp = &resop->nfs_resop4_u.opreaddir;
3484 
3485         if (resp->status == NFS4_OK && resp->mblk != NULL) {
3486                 freeb(resp->mblk);
3487                 resp->mblk = NULL;
3488                 resp->data_len = 0;
3489         }
3490 }
3491 
3492 
3493 /* ARGSUSED */
3494 static void
3495 rfs4_op_putpubfh(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req,
3496     struct compound_state *cs)
3497 {
3498         PUTPUBFH4res    *resp = &resop->nfs_resop4_u.opputpubfh;
3499         int             error;
3500         vnode_t         *vp;
3501         struct exportinfo *exi, *sav_exi;
3502         nfs_fh4_fmt_t   *fh_fmtp;
3503         nfs_export_t *ne = nfs_get_export();
3504 
3505         DTRACE_NFSV4_1(op__putpubfh__start, struct compound_state *, cs);
3506 
3507         if (cs->vp) {
3508                 VN_RELE(cs->vp);
3509                 cs->vp = NULL;
3510         }
3511 
3512         if (cs->cr)
3513                 crfree(cs->cr);
3514 
3515         cs->cr = crdup(cs->basecr);
3516 
3517         vp = ne->exi_public->exi_vp;
3518         if (vp == NULL) {
3519                 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
3520                 goto out;
3521         }
3522 
3523         error = makefh4(&cs->fh, vp, ne->exi_public);
3524         if (error != 0) {
3525                 *cs->statusp = resp->status = puterrno4(error);
3526                 goto out;
3527         }
3528         sav_exi = cs->exi;
3529         if (ne->exi_public == ne->exi_root) {
3530                 /*
3531                  * No filesystem is actually shared public, so we default
3532                  * to exi_root. In this case, we must check whether root
3533                  * is exported.
3534                  */
3535                 fh_fmtp = (nfs_fh4_fmt_t *)cs->fh.nfs_fh4_val;
3536 
3537                 /*
3538                  * if root filesystem is exported, the exportinfo struct that we
3539                  * should use is what checkexport4 returns, because root_exi is
3540                  * actually a mostly empty struct.
3541                  */
3542                 exi = checkexport4(&fh_fmtp->fh4_fsid,
3543                     (fid_t *)&fh_fmtp->fh4_xlen, NULL);
3544                 cs->exi = ((exi != NULL) ? exi : ne->exi_public);
3545         } else {
3546                 /*
3547                  * it's a properly shared filesystem
3548                  */
3549                 cs->exi = ne->exi_public;
3550         }
3551 
3552         if (is_system_labeled()) {
3553                 bslabel_t *clabel;
3554 
3555                 ASSERT(req->rq_label != NULL);
3556                 clabel = req->rq_label;
3557                 DTRACE_PROBE2(tx__rfs4__log__info__opputpubfh__clabel, char *,
3558                     "got client label from request(1)",
3559                     struct svc_req *, req);
3560                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3561                         if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3562                             cs->exi)) {
3563                                 *cs->statusp = resp->status =
3564                                     NFS4ERR_SERVERFAULT;
3565                                 goto out;
3566                         }
3567                 }
3568         }
3569 
3570         VN_HOLD(vp);
3571         cs->vp = vp;
3572 
3573         if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) {
3574                 VN_RELE(cs->vp);
3575                 cs->vp = NULL;
3576                 cs->exi = sav_exi;
3577                 goto out;
3578         }
3579 
3580         *cs->statusp = resp->status = NFS4_OK;
3581 out:
3582         DTRACE_NFSV4_2(op__putpubfh__done, struct compound_state *, cs,
3583             PUTPUBFH4res *, resp);
3584 }
3585 
3586 /*
3587  * XXX - issue with put*fh operations. Suppose /export/home is exported.
3588  * Suppose an NFS client goes to mount /export/home/joe. If /export, home,
3589  * or joe have restrictive search permissions, then we shouldn't let
3590  * the client get a file handle. This is easy to enforce. However, we
3591  * don't know what security flavor should be used until we resolve the
3592  * path name. Another complication is uid mapping. If root is
3593  * the user, then it will be mapped to the anonymous user by default,
3594  * but we won't know that till we've resolved the path name. And we won't
3595  * know what the anonymous user is.
3596  * Luckily, SECINFO is specified to take a full filename.
3597  * So what we will have to in rfs4_op_lookup is check that flavor of
3598  * the target object matches that of the request, and if root was the
3599  * caller, check for the root= and anon= options, and if necessary,
3600  * repeat the lookup using the right cred_t. But that's not done yet.
3601  */
3602 /* ARGSUSED */
3603 static void
3604 rfs4_op_putfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3605     struct compound_state *cs)
3606 {
3607         PUTFH4args *args = &argop->nfs_argop4_u.opputfh;
3608         PUTFH4res *resp = &resop->nfs_resop4_u.opputfh;
3609         nfs_fh4_fmt_t *fh_fmtp;
3610 
3611         DTRACE_NFSV4_2(op__putfh__start, struct compound_state *, cs,
3612             PUTFH4args *, args);
3613 
3614         if (cs->vp) {
3615                 VN_RELE(cs->vp);
3616                 cs->vp = NULL;
3617         }
3618 
3619         if (cs->cr) {
3620                 crfree(cs->cr);
3621                 cs->cr = NULL;
3622         }
3623 
3624         if (args->object.nfs_fh4_len < NFS_FH4_LEN) {
3625                 *cs->statusp = resp->status = NFS4ERR_BADHANDLE;
3626                 goto out;
3627         }
3628 
3629         fh_fmtp = (nfs_fh4_fmt_t *)args->object.nfs_fh4_val;
3630         cs->exi = checkexport4(&fh_fmtp->fh4_fsid, (fid_t *)&fh_fmtp->fh4_xlen,
3631             NULL);
3632 
3633         if (cs->exi == NULL) {
3634                 *cs->statusp = resp->status = NFS4ERR_STALE;
3635                 goto out;
3636         }
3637 
3638         cs->cr = crdup(cs->basecr);
3639 
3640         ASSERT(cs->cr != NULL);
3641 
3642         if (! (cs->vp = nfs4_fhtovp(&args->object, cs->exi, &resp->status))) {
3643                 *cs->statusp = resp->status;
3644                 goto out;
3645         }
3646 
3647         if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) {
3648                 VN_RELE(cs->vp);
3649                 cs->vp = NULL;
3650                 goto out;
3651         }
3652 
3653         nfs_fh4_copy(&args->object, &cs->fh);
3654         *cs->statusp = resp->status = NFS4_OK;
3655         cs->deleg = FALSE;
3656 
3657 out:
3658         DTRACE_NFSV4_2(op__putfh__done, struct compound_state *, cs,
3659             PUTFH4res *, resp);
3660 }
3661 
3662 /* ARGSUSED */
3663 static void
3664 rfs4_op_putrootfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3665     struct compound_state *cs)
3666 {
3667         PUTROOTFH4res *resp = &resop->nfs_resop4_u.opputrootfh;
3668         int error;
3669         fid_t fid;
3670         struct exportinfo *exi, *sav_exi;
3671 
3672         DTRACE_NFSV4_1(op__putrootfh__start, struct compound_state *, cs);
3673 
3674         if (cs->vp) {
3675                 VN_RELE(cs->vp);
3676                 cs->vp = NULL;
3677         }
3678 
3679         if (cs->cr)
3680                 crfree(cs->cr);
3681 
3682         cs->cr = crdup(cs->basecr);
3683 
3684         /*
3685          * Using rootdir, the system root vnode,
3686          * get its fid.
3687          */
3688         bzero(&fid, sizeof (fid));
3689         fid.fid_len = MAXFIDSZ;
3690         error = vop_fid_pseudo(ZONE_ROOTVP(), &fid);
3691         if (error != 0) {
3692                 *cs->statusp = resp->status = puterrno4(error);
3693                 goto out;
3694         }
3695 
3696         /*
3697          * Then use the root fsid & fid it to find out if it's exported
3698          *
3699          * If the server root isn't exported directly, then
3700          * it should at least be a pseudo export based on
3701          * one or more exports further down in the server's
3702          * file tree.
3703          */
3704         exi = checkexport4(&ZONE_ROOTVP()->v_vfsp->vfs_fsid, &fid, NULL);
3705         if (exi == NULL || exi->exi_export.ex_flags & EX_PUBLIC) {
3706                 NFS4_DEBUG(rfs4_debug,
3707                     (CE_WARN, "rfs4_op_putrootfh: export check failure"));
3708                 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
3709                 goto out;
3710         }
3711 
3712         /*
3713          * Now make a filehandle based on the root
3714          * export and root vnode.
3715          */
3716         error = makefh4(&cs->fh, ZONE_ROOTVP(), exi);
3717         if (error != 0) {
3718                 *cs->statusp = resp->status = puterrno4(error);
3719                 goto out;
3720         }
3721 
3722         sav_exi = cs->exi;
3723         cs->exi = exi;
3724 
3725         VN_HOLD(ZONE_ROOTVP());
3726         cs->vp = ZONE_ROOTVP();
3727 
3728         if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) {
3729                 VN_RELE(cs->vp);
3730                 cs->vp = NULL;
3731                 cs->exi = sav_exi;
3732                 goto out;
3733         }
3734 
3735         *cs->statusp = resp->status = NFS4_OK;
3736         cs->deleg = FALSE;
3737 out:
3738         DTRACE_NFSV4_2(op__putrootfh__done, struct compound_state *, cs,
3739             PUTROOTFH4res *, resp);
3740 }
3741 
3742 /*
3743  * set_rdattr_params sets up the variables used to manage what information
3744  * to get for each directory entry.
3745  */
3746 static nfsstat4
3747 set_rdattr_params(struct nfs4_svgetit_arg *sargp,
3748     bitmap4 attrs, bool_t *need_to_lookup)
3749 {
3750         uint_t  va_mask;
3751         nfsstat4 status;
3752         bitmap4 objbits;
3753 
3754         status = bitmap4_to_attrmask(attrs, sargp);
3755         if (status != NFS4_OK) {
3756                 /*
3757                  * could not even figure attr mask
3758                  */
3759                 return (status);
3760         }
3761         va_mask = sargp->vap->va_mask;
3762 
3763         /*
3764          * dirent's d_ino is always correct value for mounted_on_fileid.
3765          * mntdfid_set is set once here, but mounted_on_fileid is
3766          * set in main dirent processing loop for each dirent.
3767          * The mntdfid_set is a simple optimization that lets the
3768          * server attr code avoid work when caller is readdir.
3769          */
3770         sargp->mntdfid_set = TRUE;
3771 
3772         /*
3773          * Lookup entry only if client asked for any of the following:
3774          * a) vattr attrs
3775          * b) vfs attrs
3776          * c) attrs w/per-object scope requested (change, filehandle, etc)
3777          *    other than mounted_on_fileid (which we can take from dirent)
3778          */
3779         objbits = attrs ? attrs & NFS4_VP_ATTR_MASK : 0;
3780 
3781         if (va_mask || sargp->sbp || (objbits & ~FATTR4_MOUNTED_ON_FILEID_MASK))
3782                 *need_to_lookup = TRUE;
3783         else
3784                 *need_to_lookup = FALSE;
3785 
3786         if (sargp->sbp == NULL)
3787                 return (NFS4_OK);
3788 
3789         /*
3790          * If filesystem attrs are requested, get them now from the
3791          * directory vp, as most entries will have same filesystem. The only
3792          * exception are mounted over entries but we handle
3793          * those as we go (XXX mounted over detection not yet implemented).
3794          */
3795         sargp->vap->va_mask = 0;  /* to avoid VOP_GETATTR */
3796         status = bitmap4_get_sysattrs(sargp);
3797         sargp->vap->va_mask = va_mask;
3798 
3799         if ((status != NFS4_OK) && sargp->rdattr_error_req) {
3800                 /*
3801                  * Failed to get filesystem attributes.
3802                  * Return a rdattr_error for each entry, but don't fail.
3803                  * However, don't get any obj-dependent attrs.
3804                  */
3805                 sargp->rdattr_error = status;        /* for rdattr_error */
3806                 *need_to_lookup = FALSE;
3807                 /*
3808                  * At least get fileid for regular readdir output
3809                  */
3810                 sargp->vap->va_mask &= AT_NODEID;
3811                 status = NFS4_OK;
3812         }
3813 
3814         return (status);
3815 }
3816 
3817 /*
3818  * readlink: args: CURRENT_FH.
3819  *      res: status. If success - CURRENT_FH unchanged, return linktext.
3820  */
3821 
3822 /* ARGSUSED */
3823 static void
3824 rfs4_op_readlink(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3825     struct compound_state *cs)
3826 {
3827         READLINK4res *resp = &resop->nfs_resop4_u.opreadlink;
3828         int error;
3829         vnode_t *vp;
3830         struct iovec iov;
3831         struct vattr va;
3832         struct uio uio;
3833         char *data;
3834         struct sockaddr *ca;
3835         char *name = NULL;
3836         int is_referral;
3837 
3838         DTRACE_NFSV4_1(op__readlink__start, struct compound_state *, cs);
3839 
3840         /* CURRENT_FH: directory */
3841         vp = cs->vp;
3842         if (vp == NULL) {
3843                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3844                 goto out;
3845         }
3846 
3847         if (cs->access == CS_ACCESS_DENIED) {
3848                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
3849                 goto out;
3850         }
3851 
3852         /* Is it a referral? */
3853         if (vn_is_nfs_reparse(vp, cs->cr) && client_is_downrev(req)) {
3854 
3855                 is_referral = 1;
3856 
3857         } else {
3858 
3859                 is_referral = 0;
3860 
3861                 if (vp->v_type == VDIR) {
3862                         *cs->statusp = resp->status = NFS4ERR_ISDIR;
3863                         goto out;
3864                 }
3865 
3866                 if (vp->v_type != VLNK) {
3867                         *cs->statusp = resp->status = NFS4ERR_INVAL;
3868                         goto out;
3869                 }
3870 
3871         }
3872 
3873         va.va_mask = AT_MODE;
3874         error = VOP_GETATTR(vp, &va, 0, cs->cr, NULL);
3875         if (error) {
3876                 *cs->statusp = resp->status = puterrno4(error);
3877                 goto out;
3878         }
3879 
3880         if (MANDLOCK(vp, va.va_mode)) {
3881                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
3882                 goto out;
3883         }
3884 
3885         data = kmem_alloc(MAXPATHLEN + 1, KM_SLEEP);
3886 
3887         if (is_referral) {
3888                 char *s;
3889                 size_t strsz;
3890 
3891                 /* Get an artificial symlink based on a referral */
3892                 s = build_symlink(vp, cs->cr, &strsz);
3893                 global_svstat_ptr[4][NFS_REFERLINKS].value.ui64++;
3894                 DTRACE_PROBE2(nfs4serv__func__referral__reflink,
3895                     vnode_t *, vp, char *, s);
3896                 if (s == NULL)
3897                         error = EINVAL;
3898                 else {
3899                         error = 0;
3900                         (void) strlcpy(data, s, MAXPATHLEN + 1);
3901                         kmem_free(s, strsz);
3902                 }
3903 
3904         } else {
3905 
3906                 iov.iov_base = data;
3907                 iov.iov_len = MAXPATHLEN;
3908                 uio.uio_iov = &iov;
3909                 uio.uio_iovcnt = 1;
3910                 uio.uio_segflg = UIO_SYSSPACE;
3911                 uio.uio_extflg = UIO_COPY_CACHED;
3912                 uio.uio_loffset = 0;
3913                 uio.uio_resid = MAXPATHLEN;
3914 
3915                 error = VOP_READLINK(vp, &uio, cs->cr, NULL);
3916 
3917                 if (!error)
3918                         *(data + MAXPATHLEN - uio.uio_resid) = '\0';
3919         }
3920 
3921         if (error) {
3922                 kmem_free((caddr_t)data, (uint_t)MAXPATHLEN + 1);
3923                 *cs->statusp = resp->status = puterrno4(error);
3924                 goto out;
3925         }
3926 
3927         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
3928         name = nfscmd_convname(ca, cs->exi, data, NFSCMD_CONV_OUTBOUND,
3929             MAXPATHLEN  + 1);
3930 
3931         if (name == NULL) {
3932                 /*
3933                  * Even though the conversion failed, we return
3934                  * something. We just don't translate it.
3935                  */
3936                 name = data;
3937         }
3938 
3939         /*
3940          * treat link name as data
3941          */
3942         (void) str_to_utf8(name, (utf8string *)&resp->link);
3943 
3944         if (name != data)
3945                 kmem_free(name, MAXPATHLEN + 1);
3946         kmem_free((caddr_t)data, (uint_t)MAXPATHLEN + 1);
3947         *cs->statusp = resp->status = NFS4_OK;
3948 
3949 out:
3950         DTRACE_NFSV4_2(op__readlink__done, struct compound_state *, cs,
3951             READLINK4res *, resp);
3952 }
3953 
3954 static void
3955 rfs4_op_readlink_free(nfs_resop4 *resop)
3956 {
3957         READLINK4res *resp = &resop->nfs_resop4_u.opreadlink;
3958         utf8string *symlink = (utf8string *)&resp->link;
3959 
3960         if (symlink->utf8string_val) {
3961                 UTF8STRING_FREE(*symlink)
3962         }
3963 }
3964 
3965 /*
3966  * release_lockowner:
3967  *      Release any state associated with the supplied
3968  *      lockowner. Note if any lo_state is holding locks we will not
3969  *      rele that lo_state and thus the lockowner will not be destroyed.
3970  *      A client using lock after the lock owner stateid has been released
3971  *      will suffer the consequence of NFS4ERR_BAD_STATEID and would have
3972  *      to reissue the lock with new_lock_owner set to TRUE.
3973  *      args: lock_owner
3974  *      res:  status
3975  */
3976 /* ARGSUSED */
3977 static void
3978 rfs4_op_release_lockowner(nfs_argop4 *argop, nfs_resop4 *resop,
3979     struct svc_req *req, struct compound_state *cs)
3980 {
3981         RELEASE_LOCKOWNER4args *ap = &argop->nfs_argop4_u.oprelease_lockowner;
3982         RELEASE_LOCKOWNER4res *resp = &resop->nfs_resop4_u.oprelease_lockowner;
3983         rfs4_lockowner_t *lo;
3984         rfs4_openowner_t *oo;
3985         rfs4_state_t *sp;
3986         rfs4_lo_state_t *lsp;
3987         rfs4_client_t *cp;
3988         bool_t create = FALSE;
3989         locklist_t *llist;
3990         sysid_t sysid;
3991 
3992         DTRACE_NFSV4_2(op__release__lockowner__start, struct compound_state *,
3993             cs, RELEASE_LOCKOWNER4args *, ap);
3994 
3995         /* Make sure there is a clientid around for this request */
3996         cp = rfs4_findclient_by_id(ap->lock_owner.clientid, FALSE);
3997 
3998         if (cp == NULL) {
3999                 *cs->statusp = resp->status =
4000                     rfs4_check_clientid(&ap->lock_owner.clientid, 0);
4001                 goto out;
4002         }
4003         rfs4_client_rele(cp);
4004 
4005         lo = rfs4_findlockowner(&ap->lock_owner, &create);
4006         if (lo == NULL) {
4007                 *cs->statusp = resp->status = NFS4_OK;
4008                 goto out;
4009         }
4010         ASSERT(lo->rl_client != NULL);
4011 
4012         /*
4013          * Check for EXPIRED client. If so will reap state with in a lease
4014          * period or on next set_clientid_confirm step
4015          */
4016         if (rfs4_lease_expired(lo->rl_client)) {
4017                 rfs4_lockowner_rele(lo);
4018                 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
4019                 goto out;
4020         }
4021 
4022         /*
4023          * If no sysid has been assigned, then no locks exist; just return.
4024          */
4025         rfs4_dbe_lock(lo->rl_client->rc_dbe);
4026         if (lo->rl_client->rc_sysidt == LM_NOSYSID) {
4027                 rfs4_lockowner_rele(lo);
4028                 rfs4_dbe_unlock(lo->rl_client->rc_dbe);
4029                 goto out;
4030         }
4031 
4032         sysid = lo->rl_client->rc_sysidt;
4033         rfs4_dbe_unlock(lo->rl_client->rc_dbe);
4034 
4035         /*
4036          * Mark the lockowner invalid.
4037          */
4038         rfs4_dbe_hide(lo->rl_dbe);
4039 
4040         /*
4041          * sysid-pid pair should now not be used since the lockowner is
4042          * invalid. If the client were to instantiate the lockowner again
4043          * it would be assigned a new pid. Thus we can get the list of
4044          * current locks.
4045          */
4046 
4047         llist = flk_get_active_locks(sysid, lo->rl_pid);
4048         /* If we are still holding locks fail */
4049         if (llist != NULL) {
4050 
4051                 *cs->statusp = resp->status = NFS4ERR_LOCKS_HELD;
4052 
4053                 flk_free_locklist(llist);
4054                 /*
4055                  * We need to unhide the lockowner so the client can
4056                  * try it again. The bad thing here is if the client
4057                  * has a logic error that took it here in the first place
4058                  * they probably have lost accounting of the locks that it
4059                  * is holding. So we may have dangling state until the
4060                  * open owner state is reaped via close. One scenario
4061                  * that could possibly occur is that the client has
4062                  * sent the unlock request(s) in separate threads
4063                  * and has not waited for the replies before sending the
4064                  * RELEASE_LOCKOWNER request. Presumably, it would expect
4065                  * and deal appropriately with NFS4ERR_LOCKS_HELD, by
4066                  * reissuing the request.
4067                  */
4068                 rfs4_dbe_unhide(lo->rl_dbe);
4069                 rfs4_lockowner_rele(lo);
4070                 goto out;
4071         }
4072 
4073         /*
4074          * For the corresponding client we need to check each open
4075          * owner for any opens that have lockowner state associated
4076          * with this lockowner.
4077          */
4078 
4079         rfs4_dbe_lock(lo->rl_client->rc_dbe);
4080         for (oo = list_head(&lo->rl_client->rc_openownerlist); oo != NULL;
4081             oo = list_next(&lo->rl_client->rc_openownerlist, oo)) {
4082 
4083                 rfs4_dbe_lock(oo->ro_dbe);
4084                 for (sp = list_head(&oo->ro_statelist); sp != NULL;
4085                     sp = list_next(&oo->ro_statelist, sp)) {
4086 
4087                         rfs4_dbe_lock(sp->rs_dbe);
4088                         for (lsp = list_head(&sp->rs_lostatelist);
4089                             lsp != NULL;
4090                             lsp = list_next(&sp->rs_lostatelist, lsp)) {
4091                                 if (lsp->rls_locker == lo) {
4092                                         rfs4_dbe_lock(lsp->rls_dbe);
4093                                         rfs4_dbe_invalidate(lsp->rls_dbe);
4094                                         rfs4_dbe_unlock(lsp->rls_dbe);
4095                                 }
4096                         }
4097                         rfs4_dbe_unlock(sp->rs_dbe);
4098                 }
4099                 rfs4_dbe_unlock(oo->ro_dbe);
4100         }
4101         rfs4_dbe_unlock(lo->rl_client->rc_dbe);
4102 
4103         rfs4_lockowner_rele(lo);
4104 
4105         *cs->statusp = resp->status = NFS4_OK;
4106 
4107 out:
4108         DTRACE_NFSV4_2(op__release__lockowner__done, struct compound_state *,
4109             cs, RELEASE_LOCKOWNER4res *, resp);
4110 }
4111 
4112 /*
4113  * short utility function to lookup a file and recall the delegation
4114  */
4115 static rfs4_file_t *
4116 rfs4_lookup_and_findfile(vnode_t *dvp, char *nm, vnode_t **vpp,
4117     int *lkup_error, cred_t *cr)
4118 {
4119         vnode_t *vp;
4120         rfs4_file_t *fp = NULL;
4121         bool_t fcreate = FALSE;
4122         int error;
4123 
4124         if (vpp)
4125                 *vpp = NULL;
4126 
4127         if ((error = VOP_LOOKUP(dvp, nm, &vp, NULL, 0, NULL, cr, NULL, NULL,
4128             NULL)) == 0) {
4129                 if (vp->v_type == VREG)
4130                         fp = rfs4_findfile(vp, NULL, &fcreate);
4131                 if (vpp)
4132                         *vpp = vp;
4133                 else
4134                         VN_RELE(vp);
4135         }
4136 
4137         if (lkup_error)
4138                 *lkup_error = error;
4139 
4140         return (fp);
4141 }
4142 
4143 /*
4144  * remove: args: CURRENT_FH: directory; name.
4145  *      res: status. If success - CURRENT_FH unchanged, return change_info
4146  *              for directory.
4147  */
4148 /* ARGSUSED */
4149 static void
4150 rfs4_op_remove(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4151     struct compound_state *cs)
4152 {
4153         REMOVE4args *args = &argop->nfs_argop4_u.opremove;
4154         REMOVE4res *resp = &resop->nfs_resop4_u.opremove;
4155         int error;
4156         vnode_t *dvp, *vp;
4157         struct vattr bdva, idva, adva;
4158         char *nm;
4159         uint_t len;
4160         rfs4_file_t *fp;
4161         int in_crit = 0;
4162         bslabel_t *clabel;
4163         struct sockaddr *ca;
4164         char *name = NULL;
4165         nfsstat4 status;
4166 
4167         DTRACE_NFSV4_2(op__remove__start, struct compound_state *, cs,
4168             REMOVE4args *, args);
4169 
4170         /* CURRENT_FH: directory */
4171         dvp = cs->vp;
4172         if (dvp == NULL) {
4173                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4174                 goto out;
4175         }
4176 
4177         if (cs->access == CS_ACCESS_DENIED) {
4178                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4179                 goto out;
4180         }
4181 
4182         /*
4183          * If there is an unshared filesystem mounted on this vnode,
4184          * Do not allow to remove anything in this directory.
4185          */
4186         if (vn_ismntpt(dvp)) {
4187                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4188                 goto out;
4189         }
4190 
4191         if (dvp->v_type != VDIR) {
4192                 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
4193                 goto out;
4194         }
4195 
4196         status = utf8_dir_verify(&args->target);
4197         if (status != NFS4_OK) {
4198                 *cs->statusp = resp->status = status;
4199                 goto out;
4200         }
4201 
4202         /*
4203          * Lookup the file so that we can check if it's a directory
4204          */
4205         nm = utf8_to_fn(&args->target, &len, NULL);
4206         if (nm == NULL) {
4207                 *cs->statusp = resp->status = NFS4ERR_INVAL;
4208                 goto out;
4209         }
4210 
4211         if (len > MAXNAMELEN) {
4212                 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
4213                 kmem_free(nm, len);
4214                 goto out;
4215         }
4216 
4217         if (rdonly4(req, cs)) {
4218                 *cs->statusp = resp->status = NFS4ERR_ROFS;
4219                 kmem_free(nm, len);
4220                 goto out;
4221         }
4222 
4223         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
4224         name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
4225             MAXPATHLEN  + 1);
4226 
4227         if (name == NULL) {
4228                 *cs->statusp = resp->status = NFS4ERR_INVAL;
4229                 kmem_free(nm, len);
4230                 goto out;
4231         }
4232 
4233         /*
4234          * Lookup the file to determine type and while we are see if
4235          * there is a file struct around and check for delegation.
4236          * We don't need to acquire va_seq before this lookup, if
4237          * it causes an update, cinfo.before will not match, which will
4238          * trigger a cache flush even if atomic is TRUE.
4239          */
4240         if (fp = rfs4_lookup_and_findfile(dvp, name, &vp, &error, cs->cr)) {
4241                 if (rfs4_check_delegated_byfp(FWRITE, fp, TRUE, TRUE, TRUE,
4242                     NULL)) {
4243                         VN_RELE(vp);
4244                         rfs4_file_rele(fp);
4245                         *cs->statusp = resp->status = NFS4ERR_DELAY;
4246                         if (nm != name)
4247                                 kmem_free(name, MAXPATHLEN + 1);
4248                         kmem_free(nm, len);
4249                         goto out;
4250                 }
4251         }
4252 
4253         /* Didn't find anything to remove */
4254         if (vp == NULL) {
4255                 *cs->statusp = resp->status = error;
4256                 if (nm != name)
4257                         kmem_free(name, MAXPATHLEN + 1);
4258                 kmem_free(nm, len);
4259                 goto out;
4260         }
4261 
4262         if (nbl_need_check(vp)) {
4263                 nbl_start_crit(vp, RW_READER);
4264                 in_crit = 1;
4265                 if (nbl_conflict(vp, NBL_REMOVE, 0, 0, 0, NULL)) {
4266                         *cs->statusp = resp->status = NFS4ERR_FILE_OPEN;
4267                         if (nm != name)
4268                                 kmem_free(name, MAXPATHLEN + 1);
4269                         kmem_free(nm, len);
4270                         nbl_end_crit(vp);
4271                         VN_RELE(vp);
4272                         if (fp) {
4273                                 rfs4_clear_dont_grant(fp);
4274                                 rfs4_file_rele(fp);
4275                         }
4276                         goto out;
4277                 }
4278         }
4279 
4280         /* check label before allowing removal */
4281         if (is_system_labeled()) {
4282                 ASSERT(req->rq_label != NULL);
4283                 clabel = req->rq_label;
4284                 DTRACE_PROBE2(tx__rfs4__log__info__opremove__clabel, char *,
4285                     "got client label from request(1)",
4286                     struct svc_req *, req);
4287                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
4288                         if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
4289                             cs->exi)) {
4290                                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4291                                 if (name != nm)
4292                                         kmem_free(name, MAXPATHLEN + 1);
4293                                 kmem_free(nm, len);
4294                                 if (in_crit)
4295                                         nbl_end_crit(vp);
4296                                 VN_RELE(vp);
4297                                 if (fp) {
4298                                         rfs4_clear_dont_grant(fp);
4299                                         rfs4_file_rele(fp);
4300                                 }
4301                                 goto out;
4302                         }
4303                 }
4304         }
4305 
4306         /* Get dir "before" change value */
4307         bdva.va_mask = AT_CTIME|AT_SEQ;
4308         error = VOP_GETATTR(dvp, &bdva, 0, cs->cr, NULL);
4309         if (error) {
4310                 *cs->statusp = resp->status = puterrno4(error);
4311                 if (nm != name)
4312                         kmem_free(name, MAXPATHLEN + 1);
4313                 kmem_free(nm, len);
4314                 if (in_crit)
4315                         nbl_end_crit(vp);
4316                 VN_RELE(vp);
4317                 if (fp) {
4318                         rfs4_clear_dont_grant(fp);
4319                         rfs4_file_rele(fp);
4320                 }
4321                 goto out;
4322         }
4323         NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bdva.va_ctime)
4324 
4325         /* Actually do the REMOVE operation */
4326         if (vp->v_type == VDIR) {
4327                 /*
4328                  * Can't remove a directory that has a mounted-on filesystem.
4329                  */
4330                 if (vn_ismntpt(vp)) {
4331                         error = EACCES;
4332                 } else {
4333                         /*
4334                          * System V defines rmdir to return EEXIST,
4335                          * not ENOTEMPTY, if the directory is not
4336                          * empty.  A System V NFS server needs to map
4337                          * NFS4ERR_EXIST to NFS4ERR_NOTEMPTY to
4338                          * transmit over the wire.
4339                          */
4340                         if ((error = VOP_RMDIR(dvp, name, ZONE_ROOTVP(), cs->cr,
4341                             NULL, 0)) == EEXIST)
4342                                 error = ENOTEMPTY;
4343                 }
4344         } else {
4345                 if ((error = VOP_REMOVE(dvp, name, cs->cr, NULL, 0)) == 0 &&
4346                     fp != NULL) {
4347                         struct vattr va;
4348                         vnode_t *tvp;
4349 
4350                         rfs4_dbe_lock(fp->rf_dbe);
4351                         tvp = fp->rf_vp;
4352                         if (tvp)
4353                                 VN_HOLD(tvp);
4354                         rfs4_dbe_unlock(fp->rf_dbe);
4355 
4356                         if (tvp) {
4357                                 /*
4358                                  * This is va_seq safe because we are not
4359                                  * manipulating dvp.
4360                                  */
4361                                 va.va_mask = AT_NLINK;
4362                                 if (!VOP_GETATTR(tvp, &va, 0, cs->cr, NULL) &&
4363                                     va.va_nlink == 0) {
4364                                         /* Remove state on file remove */
4365                                         if (in_crit) {
4366                                                 nbl_end_crit(vp);
4367                                                 in_crit = 0;
4368                                         }
4369                                         rfs4_close_all_state(fp);
4370                                 }
4371                                 VN_RELE(tvp);
4372                         }
4373                 }
4374         }
4375 
4376         if (in_crit)
4377                 nbl_end_crit(vp);
4378         VN_RELE(vp);
4379 
4380         if (fp) {
4381                 rfs4_clear_dont_grant(fp);
4382                 rfs4_file_rele(fp);
4383         }
4384         if (nm != name)
4385                 kmem_free(name, MAXPATHLEN + 1);
4386         kmem_free(nm, len);
4387 
4388         if (error) {
4389                 *cs->statusp = resp->status = puterrno4(error);
4390                 goto out;
4391         }
4392 
4393         /*
4394          * Get the initial "after" sequence number, if it fails, set to zero
4395          */
4396         idva.va_mask = AT_SEQ;
4397         if (VOP_GETATTR(dvp, &idva, 0, cs->cr, NULL))
4398                 idva.va_seq = 0;
4399 
4400         /*
4401          * Force modified data and metadata out to stable storage.
4402          */
4403         (void) VOP_FSYNC(dvp, 0, cs->cr, NULL);
4404 
4405         /*
4406          * Get "after" change value, if it fails, simply return the
4407          * before value.
4408          */
4409         adva.va_mask = AT_CTIME|AT_SEQ;
4410         if (VOP_GETATTR(dvp, &adva, 0, cs->cr, NULL)) {
4411                 adva.va_ctime = bdva.va_ctime;
4412                 adva.va_seq = 0;
4413         }
4414 
4415         NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, adva.va_ctime)
4416 
4417         /*
4418          * The cinfo.atomic = TRUE only if we have
4419          * non-zero va_seq's, and it has incremented by exactly one
4420          * during the VOP_REMOVE/RMDIR and it didn't change during
4421          * the VOP_FSYNC.
4422          */
4423         if (bdva.va_seq && idva.va_seq && adva.va_seq &&
4424             idva.va_seq == (bdva.va_seq + 1) && idva.va_seq == adva.va_seq)
4425                 resp->cinfo.atomic = TRUE;
4426         else
4427                 resp->cinfo.atomic = FALSE;
4428 
4429         *cs->statusp = resp->status = NFS4_OK;
4430 
4431 out:
4432         DTRACE_NFSV4_2(op__remove__done, struct compound_state *, cs,
4433             REMOVE4res *, resp);
4434 }
4435 
4436 /*
4437  * rename: args: SAVED_FH: from directory, CURRENT_FH: target directory,
4438  *              oldname and newname.
4439  *      res: status. If success - CURRENT_FH unchanged, return change_info
4440  *              for both from and target directories.
4441  */
4442 /* ARGSUSED */
4443 static void
4444 rfs4_op_rename(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4445     struct compound_state *cs)
4446 {
4447         RENAME4args *args = &argop->nfs_argop4_u.oprename;
4448         RENAME4res *resp = &resop->nfs_resop4_u.oprename;
4449         int error;
4450         vnode_t *odvp;
4451         vnode_t *ndvp;
4452         vnode_t *srcvp, *targvp, *tvp;
4453         struct vattr obdva, oidva, oadva;
4454         struct vattr nbdva, nidva, nadva;
4455         char *onm, *nnm;
4456         uint_t olen, nlen;
4457         rfs4_file_t *fp, *sfp;
4458         int in_crit_src, in_crit_targ;
4459         int fp_rele_grant_hold, sfp_rele_grant_hold;
4460         int unlinked;
4461         bslabel_t *clabel;
4462         struct sockaddr *ca;
4463         char *converted_onm = NULL;
4464         char *converted_nnm = NULL;
4465         nfsstat4 status;
4466 
4467         DTRACE_NFSV4_2(op__rename__start, struct compound_state *, cs,
4468             RENAME4args *, args);
4469 
4470         fp = sfp = NULL;
4471         srcvp = targvp = tvp = NULL;
4472         in_crit_src = in_crit_targ = 0;
4473         fp_rele_grant_hold = sfp_rele_grant_hold = 0;
4474         unlinked = 0;
4475 
4476         /* CURRENT_FH: target directory */
4477         ndvp = cs->vp;
4478         if (ndvp == NULL) {
4479                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4480                 goto out;
4481         }
4482 
4483         /* SAVED_FH: from directory */
4484         odvp = cs->saved_vp;
4485         if (odvp == NULL) {
4486                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4487                 goto out;
4488         }
4489 
4490         if (cs->access == CS_ACCESS_DENIED) {
4491                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4492                 goto out;
4493         }
4494 
4495         /*
4496          * If there is an unshared filesystem mounted on this vnode,
4497          * do not allow to rename objects in this directory.
4498          */
4499         if (vn_ismntpt(odvp)) {
4500                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4501                 goto out;
4502         }
4503 
4504         /*
4505          * If there is an unshared filesystem mounted on this vnode,
4506          * do not allow to rename to this directory.
4507          */
4508         if (vn_ismntpt(ndvp)) {
4509                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4510                 goto out;
4511         }
4512 
4513         if (odvp->v_type != VDIR || ndvp->v_type != VDIR) {
4514                 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
4515                 goto out;
4516         }
4517 
4518         if (cs->saved_exi != cs->exi) {
4519                 *cs->statusp = resp->status = NFS4ERR_XDEV;
4520                 goto out;
4521         }
4522 
4523         status = utf8_dir_verify(&args->oldname);
4524         if (status != NFS4_OK) {
4525                 *cs->statusp = resp->status = status;
4526                 goto out;
4527         }
4528 
4529         status = utf8_dir_verify(&args->newname);
4530         if (status != NFS4_OK) {
4531                 *cs->statusp = resp->status = status;
4532                 goto out;
4533         }
4534 
4535         onm = utf8_to_fn(&args->oldname, &olen, NULL);
4536         if (onm == NULL) {
4537                 *cs->statusp = resp->status = NFS4ERR_INVAL;
4538                 goto out;
4539         }
4540         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
4541         nlen = MAXPATHLEN + 1;
4542         converted_onm = nfscmd_convname(ca, cs->exi, onm, NFSCMD_CONV_INBOUND,
4543             nlen);
4544 
4545         if (converted_onm == NULL) {
4546                 *cs->statusp = resp->status = NFS4ERR_INVAL;
4547                 kmem_free(onm, olen);
4548                 goto out;
4549         }
4550 
4551         nnm = utf8_to_fn(&args->newname, &nlen, NULL);
4552         if (nnm == NULL) {
4553                 *cs->statusp = resp->status = NFS4ERR_INVAL;
4554                 if (onm != converted_onm)
4555                         kmem_free(converted_onm, MAXPATHLEN + 1);
4556                 kmem_free(onm, olen);
4557                 goto out;
4558         }
4559         converted_nnm = nfscmd_convname(ca, cs->exi, nnm, NFSCMD_CONV_INBOUND,
4560             MAXPATHLEN  + 1);
4561 
4562         if (converted_nnm == NULL) {
4563                 *cs->statusp = resp->status = NFS4ERR_INVAL;
4564                 kmem_free(nnm, nlen);
4565                 nnm = NULL;
4566                 if (onm != converted_onm)
4567                         kmem_free(converted_onm, MAXPATHLEN + 1);
4568                 kmem_free(onm, olen);
4569                 goto out;
4570         }
4571 
4572 
4573         if (olen > MAXNAMELEN || nlen > MAXNAMELEN) {
4574                 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
4575                 kmem_free(onm, olen);
4576                 kmem_free(nnm, nlen);
4577                 goto out;
4578         }
4579 
4580 
4581         if (rdonly4(req, cs)) {
4582                 *cs->statusp = resp->status = NFS4ERR_ROFS;
4583                 if (onm != converted_onm)
4584                         kmem_free(converted_onm, MAXPATHLEN + 1);
4585                 kmem_free(onm, olen);
4586                 if (nnm != converted_nnm)
4587                         kmem_free(converted_nnm, MAXPATHLEN + 1);
4588                 kmem_free(nnm, nlen);
4589                 goto out;
4590         }
4591 
4592         /* check label of the target dir */
4593         if (is_system_labeled()) {
4594                 ASSERT(req->rq_label != NULL);
4595                 clabel = req->rq_label;
4596                 DTRACE_PROBE2(tx__rfs4__log__info__oprename__clabel, char *,
4597                     "got client label from request(1)",
4598                     struct svc_req *, req);
4599                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
4600                         if (!do_rfs_label_check(clabel, ndvp,
4601                             EQUALITY_CHECK, cs->exi)) {
4602                                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4603                                 goto err_out;
4604                         }
4605                 }
4606         }
4607 
4608         /*
4609          * Is the source a file and have a delegation?
4610          * We don't need to acquire va_seq before these lookups, if
4611          * it causes an update, cinfo.before will not match, which will
4612          * trigger a cache flush even if atomic is TRUE.
4613          */
4614         if (sfp = rfs4_lookup_and_findfile(odvp, converted_onm, &srcvp,
4615             &error, cs->cr)) {
4616                 if (rfs4_check_delegated_byfp(FWRITE, sfp, TRUE, TRUE, TRUE,
4617                     NULL)) {
4618                         *cs->statusp = resp->status = NFS4ERR_DELAY;
4619                         goto err_out;
4620                 }
4621         }
4622 
4623         if (srcvp == NULL) {
4624                 *cs->statusp = resp->status = puterrno4(error);
4625                 if (onm != converted_onm)
4626                         kmem_free(converted_onm, MAXPATHLEN + 1);
4627                 kmem_free(onm, olen);
4628                 if (nnm != converted_nnm)
4629                         kmem_free(converted_nnm, MAXPATHLEN + 1);
4630                 kmem_free(nnm, nlen);
4631                 goto out;
4632         }
4633 
4634         sfp_rele_grant_hold = 1;
4635 
4636         /* Does the destination exist and a file and have a delegation? */
4637         if (fp = rfs4_lookup_and_findfile(ndvp, converted_nnm, &targvp,
4638             NULL, cs->cr)) {
4639                 if (rfs4_check_delegated_byfp(FWRITE, fp, TRUE, TRUE, TRUE,
4640                     NULL)) {
4641                         *cs->statusp = resp->status = NFS4ERR_DELAY;
4642                         goto err_out;
4643                 }
4644         }
4645         fp_rele_grant_hold = 1;
4646 
4647         /* Check for NBMAND lock on both source and target */
4648         if (nbl_need_check(srcvp)) {
4649                 nbl_start_crit(srcvp, RW_READER);
4650                 in_crit_src = 1;
4651                 if (nbl_conflict(srcvp, NBL_RENAME, 0, 0, 0, NULL)) {
4652                         *cs->statusp = resp->status = NFS4ERR_FILE_OPEN;
4653                         goto err_out;
4654                 }
4655         }
4656 
4657         if (targvp && nbl_need_check(targvp)) {
4658                 nbl_start_crit(targvp, RW_READER);
4659                 in_crit_targ = 1;
4660                 if (nbl_conflict(targvp, NBL_REMOVE, 0, 0, 0, NULL)) {
4661                         *cs->statusp = resp->status = NFS4ERR_FILE_OPEN;
4662                         goto err_out;
4663                 }
4664         }
4665 
4666         /* Get source "before" change value */
4667         obdva.va_mask = AT_CTIME|AT_SEQ;
4668         error = VOP_GETATTR(odvp, &obdva, 0, cs->cr, NULL);
4669         if (!error) {
4670                 nbdva.va_mask = AT_CTIME|AT_SEQ;
4671                 error = VOP_GETATTR(ndvp, &nbdva, 0, cs->cr, NULL);
4672         }
4673         if (error) {
4674                 *cs->statusp = resp->status = puterrno4(error);
4675                 goto err_out;
4676         }
4677 
4678         NFS4_SET_FATTR4_CHANGE(resp->source_cinfo.before, obdva.va_ctime)
4679         NFS4_SET_FATTR4_CHANGE(resp->target_cinfo.before, nbdva.va_ctime)
4680 
4681         error = VOP_RENAME(odvp, converted_onm, ndvp, converted_nnm, cs->cr,
4682             NULL, 0);
4683 
4684         /*
4685          * If target existed and was unlinked by VOP_RENAME, state will need
4686          * closed. To avoid deadlock, rfs4_close_all_state will be done after
4687          * any necessary nbl_end_crit on srcvp and tgtvp.
4688          */
4689         if (error == 0 && fp != NULL) {
4690                 rfs4_dbe_lock(fp->rf_dbe);
4691                 tvp = fp->rf_vp;
4692                 if (tvp)
4693                         VN_HOLD(tvp);
4694                 rfs4_dbe_unlock(fp->rf_dbe);
4695 
4696                 if (tvp) {
4697                         struct vattr va;
4698                         va.va_mask = AT_NLINK;
4699 
4700                         if (!VOP_GETATTR(tvp, &va, 0, cs->cr, NULL) &&
4701                             va.va_nlink == 0) {
4702                                 unlinked = 1;
4703 
4704                                 /* DEBUG data */
4705                                 if ((srcvp == targvp) || (tvp != targvp)) {
4706                                         cmn_err(CE_WARN, "rfs4_op_rename: "
4707                                             "srcvp %p, targvp: %p, tvp: %p",
4708                                             (void *)srcvp, (void *)targvp,
4709                                             (void *)tvp);
4710                                 }
4711                         } else {
4712                                 VN_RELE(tvp);
4713                         }
4714                 }
4715         }
4716         if (error == 0)
4717                 vn_renamepath(ndvp, srcvp, nnm, nlen - 1);
4718 
4719         if (in_crit_src)
4720                 nbl_end_crit(srcvp);
4721         if (srcvp)
4722                 VN_RELE(srcvp);
4723         if (in_crit_targ)
4724                 nbl_end_crit(targvp);
4725         if (targvp)
4726                 VN_RELE(targvp);
4727 
4728         if (unlinked) {
4729                 ASSERT(fp != NULL);
4730                 ASSERT(tvp != NULL);
4731 
4732                 /* DEBUG data */
4733                 if (RW_READ_HELD(&tvp->v_nbllock)) {
4734                         cmn_err(CE_WARN, "rfs4_op_rename: "
4735                             "RW_READ_HELD(%p)", (void *)tvp);
4736                 }
4737 
4738                 /* The file is gone and so should the state */
4739                 rfs4_close_all_state(fp);
4740                 VN_RELE(tvp);
4741         }
4742 
4743         if (sfp) {
4744                 rfs4_clear_dont_grant(sfp);
4745                 rfs4_file_rele(sfp);
4746         }
4747         if (fp) {
4748                 rfs4_clear_dont_grant(fp);
4749                 rfs4_file_rele(fp);
4750         }
4751 
4752         if (converted_onm != onm)
4753                 kmem_free(converted_onm, MAXPATHLEN + 1);
4754         kmem_free(onm, olen);
4755         if (converted_nnm != nnm)
4756                 kmem_free(converted_nnm, MAXPATHLEN + 1);
4757         kmem_free(nnm, nlen);
4758 
4759         /*
4760          * Get the initial "after" sequence number, if it fails, set to zero
4761          */
4762         oidva.va_mask = AT_SEQ;
4763         if (VOP_GETATTR(odvp, &oidva, 0, cs->cr, NULL))
4764                 oidva.va_seq = 0;
4765 
4766         nidva.va_mask = AT_SEQ;
4767         if (VOP_GETATTR(ndvp, &nidva, 0, cs->cr, NULL))
4768                 nidva.va_seq = 0;
4769 
4770         /*
4771          * Force modified data and metadata out to stable storage.
4772          */
4773         (void) VOP_FSYNC(odvp, 0, cs->cr, NULL);
4774         (void) VOP_FSYNC(ndvp, 0, cs->cr, NULL);
4775 
4776         if (error) {
4777                 *cs->statusp = resp->status = puterrno4(error);
4778                 goto out;
4779         }
4780 
4781         /*
4782          * Get "after" change values, if it fails, simply return the
4783          * before value.
4784          */
4785         oadva.va_mask = AT_CTIME|AT_SEQ;
4786         if (VOP_GETATTR(odvp, &oadva, 0, cs->cr, NULL)) {
4787                 oadva.va_ctime = obdva.va_ctime;
4788                 oadva.va_seq = 0;
4789         }
4790 
4791         nadva.va_mask = AT_CTIME|AT_SEQ;
4792         if (VOP_GETATTR(odvp, &nadva, 0, cs->cr, NULL)) {
4793                 nadva.va_ctime = nbdva.va_ctime;
4794                 nadva.va_seq = 0;
4795         }
4796 
4797         NFS4_SET_FATTR4_CHANGE(resp->source_cinfo.after, oadva.va_ctime)
4798         NFS4_SET_FATTR4_CHANGE(resp->target_cinfo.after, nadva.va_ctime)
4799 
4800         /*
4801          * The cinfo.atomic = TRUE only if we have
4802          * non-zero va_seq's, and it has incremented by exactly one
4803          * during the VOP_RENAME and it didn't change during the VOP_FSYNC.
4804          */
4805         if (obdva.va_seq && oidva.va_seq && oadva.va_seq &&
4806             oidva.va_seq == (obdva.va_seq + 1) && oidva.va_seq == oadva.va_seq)
4807                 resp->source_cinfo.atomic = TRUE;
4808         else
4809                 resp->source_cinfo.atomic = FALSE;
4810 
4811         if (nbdva.va_seq && nidva.va_seq && nadva.va_seq &&
4812             nidva.va_seq == (nbdva.va_seq + 1) && nidva.va_seq == nadva.va_seq)
4813                 resp->target_cinfo.atomic = TRUE;
4814         else
4815                 resp->target_cinfo.atomic = FALSE;
4816 
4817 #ifdef  VOLATILE_FH_TEST
4818         {
4819         extern void add_volrnm_fh(struct exportinfo *, vnode_t *);
4820 
4821         /*
4822          * Add the renamed file handle to the volatile rename list
4823          */
4824         if (cs->exi->exi_export.ex_flags & EX_VOLRNM) {
4825                 /* file handles may expire on rename */
4826                 vnode_t *vp;
4827 
4828                 nnm = utf8_to_fn(&args->newname, &nlen, NULL);
4829                 /*
4830                  * Already know that nnm will be a valid string
4831                  */
4832                 error = VOP_LOOKUP(ndvp, nnm, &vp, NULL, 0, NULL, cs->cr,
4833                     NULL, NULL, NULL);
4834                 kmem_free(nnm, nlen);
4835                 if (!error) {
4836                         add_volrnm_fh(cs->exi, vp);
4837                         VN_RELE(vp);
4838                 }
4839         }
4840         }
4841 #endif  /* VOLATILE_FH_TEST */
4842 
4843         *cs->statusp = resp->status = NFS4_OK;
4844 out:
4845         DTRACE_NFSV4_2(op__rename__done, struct compound_state *, cs,
4846             RENAME4res *, resp);
4847         return;
4848 
4849 err_out:
4850         if (onm != converted_onm)
4851                 kmem_free(converted_onm, MAXPATHLEN + 1);
4852         if (onm != NULL)
4853                 kmem_free(onm, olen);
4854         if (nnm != converted_nnm)
4855                 kmem_free(converted_nnm, MAXPATHLEN + 1);
4856         if (nnm != NULL)
4857                 kmem_free(nnm, nlen);
4858 
4859         if (in_crit_src) nbl_end_crit(srcvp);
4860         if (in_crit_targ) nbl_end_crit(targvp);
4861         if (targvp) VN_RELE(targvp);
4862         if (srcvp) VN_RELE(srcvp);
4863         if (sfp) {
4864                 if (sfp_rele_grant_hold) rfs4_clear_dont_grant(sfp);
4865                 rfs4_file_rele(sfp);
4866         }
4867         if (fp) {
4868                 if (fp_rele_grant_hold) rfs4_clear_dont_grant(fp);
4869                 rfs4_file_rele(fp);
4870         }
4871 
4872         DTRACE_NFSV4_2(op__rename__done, struct compound_state *, cs,
4873             RENAME4res *, resp);
4874 }
4875 
4876 /* ARGSUSED */
4877 static void
4878 rfs4_op_renew(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4879     struct compound_state *cs)
4880 {
4881         RENEW4args *args = &argop->nfs_argop4_u.oprenew;
4882         RENEW4res *resp = &resop->nfs_resop4_u.oprenew;
4883         rfs4_client_t *cp;
4884 
4885         DTRACE_NFSV4_2(op__renew__start, struct compound_state *, cs,
4886             RENEW4args *, args);
4887 
4888         if ((cp = rfs4_findclient_by_id(args->clientid, FALSE)) == NULL) {
4889                 *cs->statusp = resp->status =
4890                     rfs4_check_clientid(&args->clientid, 0);
4891                 goto out;
4892         }
4893 
4894         if (rfs4_lease_expired(cp)) {
4895                 rfs4_client_rele(cp);
4896                 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
4897                 goto out;
4898         }
4899 
4900         rfs4_update_lease(cp);
4901 
4902         mutex_enter(cp->rc_cbinfo.cb_lock);
4903         if (cp->rc_cbinfo.cb_notified_of_cb_path_down == FALSE) {
4904                 cp->rc_cbinfo.cb_notified_of_cb_path_down = TRUE;
4905                 *cs->statusp = resp->status = NFS4ERR_CB_PATH_DOWN;
4906         } else {
4907                 *cs->statusp = resp->status = NFS4_OK;
4908         }
4909         mutex_exit(cp->rc_cbinfo.cb_lock);
4910 
4911         rfs4_client_rele(cp);
4912 
4913 out:
4914         DTRACE_NFSV4_2(op__renew__done, struct compound_state *, cs,
4915             RENEW4res *, resp);
4916 }
4917 
4918 /* ARGSUSED */
4919 static void
4920 rfs4_op_restorefh(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req,
4921     struct compound_state *cs)
4922 {
4923         RESTOREFH4res *resp = &resop->nfs_resop4_u.oprestorefh;
4924 
4925         DTRACE_NFSV4_1(op__restorefh__start, struct compound_state *, cs);
4926 
4927         /* No need to check cs->access - we are not accessing any object */
4928         if ((cs->saved_vp == NULL) || (cs->saved_fh.nfs_fh4_val == NULL)) {
4929                 *cs->statusp = resp->status = NFS4ERR_RESTOREFH;
4930                 goto out;
4931         }
4932         if (cs->vp != NULL) {
4933                 VN_RELE(cs->vp);
4934         }
4935         cs->vp = cs->saved_vp;
4936         cs->saved_vp = NULL;
4937         cs->exi = cs->saved_exi;
4938         nfs_fh4_copy(&cs->saved_fh, &cs->fh);
4939         *cs->statusp = resp->status = NFS4_OK;
4940         cs->deleg = FALSE;
4941 
4942 out:
4943         DTRACE_NFSV4_2(op__restorefh__done, struct compound_state *, cs,
4944             RESTOREFH4res *, resp);
4945 }
4946 
4947 /* ARGSUSED */
4948 static void
4949 rfs4_op_savefh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4950     struct compound_state *cs)
4951 {
4952         SAVEFH4res *resp = &resop->nfs_resop4_u.opsavefh;
4953 
4954         DTRACE_NFSV4_1(op__savefh__start, struct compound_state *, cs);
4955 
4956         /* No need to check cs->access - we are not accessing any object */
4957         if (cs->vp == NULL) {
4958                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4959                 goto out;
4960         }
4961         if (cs->saved_vp != NULL) {
4962                 VN_RELE(cs->saved_vp);
4963         }
4964         cs->saved_vp = cs->vp;
4965         VN_HOLD(cs->saved_vp);
4966         cs->saved_exi = cs->exi;
4967         /*
4968          * since SAVEFH is fairly rare, don't alloc space for its fh
4969          * unless necessary.
4970          */
4971         if (cs->saved_fh.nfs_fh4_val == NULL) {
4972                 cs->saved_fh.nfs_fh4_val = kmem_alloc(NFS4_FHSIZE, KM_SLEEP);
4973         }
4974         nfs_fh4_copy(&cs->fh, &cs->saved_fh);
4975         *cs->statusp = resp->status = NFS4_OK;
4976 
4977 out:
4978         DTRACE_NFSV4_2(op__savefh__done, struct compound_state *, cs,
4979             SAVEFH4res *, resp);
4980 }
4981 
4982 /*
4983  * rfs4_verify_attr is called when nfsv4 Setattr failed, but we wish to
4984  * return the bitmap of attrs that were set successfully. It is also
4985  * called by Verify/Nverify to test the vattr/vfsstat attrs. It should
4986  * always be called only after rfs4_do_set_attrs().
4987  *
4988  * Verify that the attributes are same as the expected ones. sargp->vap
4989  * and sargp->sbp contain the input attributes as translated from fattr4.
4990  *
4991  * This function verifies only the attrs that correspond to a vattr or
4992  * vfsstat struct. That is because of the extra step needed to get the
4993  * corresponding system structs. Other attributes have already been set or
4994  * verified by do_rfs4_set_attrs.
4995  *
4996  * Return 0 if all attrs match, -1 if some don't, error if error processing.
4997  */
4998 static int
4999 rfs4_verify_attr(struct nfs4_svgetit_arg *sargp,
5000     bitmap4 *resp, struct nfs4_ntov_table *ntovp)
5001 {
5002         int error, ret_error = 0;
5003         int i, k;
5004         uint_t sva_mask = sargp->vap->va_mask;
5005         uint_t vbit;
5006         union nfs4_attr_u *na;
5007         uint8_t *amap;
5008         bool_t getsb = ntovp->vfsstat;
5009 
5010         if (sva_mask != 0) {
5011                 /*
5012                  * Okay to overwrite sargp->vap because we verify based
5013                  * on the incoming values.
5014                  */
5015                 ret_error = VOP_GETATTR(sargp->cs->vp, sargp->vap, 0,
5016                     sargp->cs->cr, NULL);
5017                 if (ret_error) {
5018                         if (resp == NULL)
5019                                 return (ret_error);
5020                         /*
5021                          * Must return bitmap of successful attrs
5022                          */
5023                         sva_mask = 0;   /* to prevent checking vap later */
5024                 } else {
5025                         /*
5026                          * Some file systems clobber va_mask. it is probably
5027                          * wrong of them to do so, nonethless we practice
5028                          * defensive coding.
5029                          * See bug id 4276830.
5030                          */
5031                         sargp->vap->va_mask = sva_mask;
5032                 }
5033         }
5034 
5035         if (getsb) {
5036                 /*
5037                  * Now get the superblock and loop on the bitmap, as there is
5038                  * no simple way of translating from superblock to bitmap4.
5039                  */
5040                 ret_error = VFS_STATVFS(sargp->cs->vp->v_vfsp, sargp->sbp);
5041                 if (ret_error) {
5042                         if (resp == NULL)
5043                                 goto errout;
5044                         getsb = FALSE;
5045                 }
5046         }
5047 
5048         /*
5049          * Now loop and verify each attribute which getattr returned
5050          * whether it's the same as the input.
5051          */
5052         if (resp == NULL && !getsb && (sva_mask == 0))
5053                 goto errout;
5054 
5055         na = ntovp->na;
5056         amap = ntovp->amap;
5057         k = 0;
5058         for (i = 0; i < ntovp->attrcnt; i++, na++, amap++) {
5059                 k = *amap;
5060                 ASSERT(nfs4_ntov_map[k].nval == k);
5061                 vbit = nfs4_ntov_map[k].vbit;
5062 
5063                 /*
5064                  * If vattr attribute but VOP_GETATTR failed, or it's
5065                  * superblock attribute but VFS_STATVFS failed, skip
5066                  */
5067                 if (vbit) {
5068                         if ((vbit & sva_mask) == 0)
5069                                 continue;
5070                 } else if (!(getsb && nfs4_ntov_map[k].vfsstat)) {
5071                         continue;
5072                 }
5073                 error = (*nfs4_ntov_map[k].sv_getit)(NFS4ATTR_VERIT, sargp, na);
5074                 if (resp != NULL) {
5075                         if (error)
5076                                 ret_error = -1; /* not all match */
5077                         else    /* update response bitmap */
5078                                 *resp |= nfs4_ntov_map[k].fbit;
5079                         continue;
5080                 }
5081                 if (error) {
5082                         ret_error = -1; /* not all match */
5083                         break;
5084                 }
5085         }
5086 errout:
5087         return (ret_error);
5088 }
5089 
5090 /*
5091  * Decode the attribute to be set/verified. If the attr requires a sys op
5092  * (VOP_GETATTR, VFS_VFSSTAT), and the request is to verify, then don't
5093  * call the sv_getit function for it, because the sys op hasn't yet been done.
5094  * Return 0 for success, error code if failed.
5095  *
5096  * Note: the decoded arg is not freed here but in nfs4_ntov_table_free.
5097  */
5098 static int
5099 decode_fattr4_attr(nfs4_attr_cmd_t cmd, struct nfs4_svgetit_arg *sargp,
5100     int k, XDR *xdrp, bitmap4 *resp_bval, union nfs4_attr_u *nap)
5101 {
5102         int error = 0;
5103         bool_t set_later;
5104 
5105         sargp->vap->va_mask |= nfs4_ntov_map[k].vbit;
5106 
5107         if ((*nfs4_ntov_map[k].xfunc)(xdrp, nap)) {
5108                 set_later = nfs4_ntov_map[k].vbit || nfs4_ntov_map[k].vfsstat;
5109                 /*
5110                  * don't verify yet if a vattr or sb dependent attr,
5111                  * because we don't have their sys values yet.
5112                  * Will be done later.
5113                  */
5114                 if (! (set_later && (cmd == NFS4ATTR_VERIT))) {
5115                         /*
5116                          * ACLs are a special case, since setting the MODE
5117                          * conflicts with setting the ACL.  We delay setting
5118                          * the ACL until all other attributes have been set.
5119                          * The ACL gets set in do_rfs4_op_setattr().
5120                          */
5121                         if (nfs4_ntov_map[k].fbit != FATTR4_ACL_MASK) {
5122                                 error = (*nfs4_ntov_map[k].sv_getit)(cmd,
5123                                     sargp, nap);
5124                                 if (error) {
5125                                         xdr_free(nfs4_ntov_map[k].xfunc,
5126                                             (caddr_t)nap);
5127                                 }
5128                         }
5129                 }
5130         } else {
5131 #ifdef  DEBUG
5132                 cmn_err(CE_NOTE, "decode_fattr4_attr: error "
5133                     "decoding attribute %d\n", k);
5134 #endif
5135                 error = EINVAL;
5136         }
5137         if (!error && resp_bval && !set_later) {
5138                 *resp_bval |= nfs4_ntov_map[k].fbit;
5139         }
5140 
5141         return (error);
5142 }
5143 
5144 /*
5145  * Set vattr based on incoming fattr4 attrs - used by setattr.
5146  * Set response mask. Ignore any values that are not writable vattr attrs.
5147  */
5148 static nfsstat4
5149 do_rfs4_set_attrs(bitmap4 *resp, fattr4 *fattrp, struct compound_state *cs,
5150     struct nfs4_svgetit_arg *sargp, struct nfs4_ntov_table *ntovp,
5151     nfs4_attr_cmd_t cmd)
5152 {
5153         int error = 0;
5154         int i;
5155         char *attrs = fattrp->attrlist4;
5156         uint32_t attrslen = fattrp->attrlist4_len;
5157         XDR xdr;
5158         nfsstat4 status = NFS4_OK;
5159         vnode_t *vp = cs->vp;
5160         union nfs4_attr_u *na;
5161         uint8_t *amap;
5162 
5163 #ifndef lint
5164         /*
5165          * Make sure that maximum attribute number can be expressed as an
5166          * 8 bit quantity.
5167          */
5168         ASSERT(NFS4_MAXNUM_ATTRS <= (UINT8_MAX + 1));
5169 #endif
5170 
5171         if (vp == NULL) {
5172                 if (resp)
5173                         *resp = 0;
5174                 return (NFS4ERR_NOFILEHANDLE);
5175         }
5176         if (cs->access == CS_ACCESS_DENIED) {
5177                 if (resp)
5178                         *resp = 0;
5179                 return (NFS4ERR_ACCESS);
5180         }
5181 
5182         sargp->op = cmd;
5183         sargp->cs = cs;
5184         sargp->flag = 0;     /* may be set later */
5185         sargp->vap->va_mask = 0;
5186         sargp->rdattr_error = NFS4_OK;
5187         sargp->rdattr_error_req = FALSE;
5188         /* sargp->sbp is set by the caller */
5189 
5190         xdrmem_create(&xdr, attrs, attrslen, XDR_DECODE);
5191 
5192         na = ntovp->na;
5193         amap = ntovp->amap;
5194 
5195         /*
5196          * The following loop iterates on the nfs4_ntov_map checking
5197          * if the fbit is set in the requested bitmap.
5198          * If set then we process the arguments using the
5199          * rfs4_fattr4 conversion functions to populate the setattr
5200          * vattr and va_mask. Any settable attrs that are not using vattr
5201          * will be set in this loop.
5202          */
5203         for (i = 0; i < nfs4_ntov_map_size; i++) {
5204                 if (!(fattrp->attrmask & nfs4_ntov_map[i].fbit)) {
5205                         continue;
5206                 }
5207                 /*
5208                  * If setattr, must be a writable attr.
5209                  * If verify/nverify, must be a readable attr.
5210                  */
5211                 if ((error = (*nfs4_ntov_map[i].sv_getit)(
5212                     NFS4ATTR_SUPPORTED, sargp, NULL)) != 0) {
5213                         /*
5214                          * Client tries to set/verify an
5215                          * unsupported attribute, tries to set
5216                          * a read only attr or verify a write
5217                          * only one - error!
5218                          */
5219                         break;
5220                 }
5221                 /*
5222                  * Decode the attribute to set/verify
5223                  */
5224                 error = decode_fattr4_attr(cmd, sargp, nfs4_ntov_map[i].nval,
5225                     &xdr, resp ? resp : NULL, na);
5226                 if (error)
5227                         break;
5228                 *amap++ = (uint8_t)nfs4_ntov_map[i].nval;
5229                 na++;
5230                 (ntovp->attrcnt)++;
5231                 if (nfs4_ntov_map[i].vfsstat)
5232                         ntovp->vfsstat = TRUE;
5233         }
5234 
5235         if (error != 0)
5236                 status = (error == ENOTSUP ? NFS4ERR_ATTRNOTSUPP :
5237                     puterrno4(error));
5238         /* xdrmem_destroy(&xdrs); */        /* NO-OP */
5239         return (status);
5240 }
5241 
5242 static nfsstat4
5243 do_rfs4_op_setattr(bitmap4 *resp, fattr4 *fattrp, struct compound_state *cs,
5244     stateid4 *stateid)
5245 {
5246         int error = 0;
5247         struct nfs4_svgetit_arg sarg;
5248         bool_t trunc;
5249 
5250         nfsstat4 status = NFS4_OK;
5251         cred_t *cr = cs->cr;
5252         vnode_t *vp = cs->vp;
5253         struct nfs4_ntov_table ntov;
5254         struct statvfs64 sb;
5255         struct vattr bva;
5256         struct flock64 bf;
5257         int in_crit = 0;
5258         uint_t saved_mask = 0;
5259         caller_context_t ct;
5260 
5261         *resp = 0;
5262         sarg.sbp = &sb;
5263         sarg.is_referral = B_FALSE;
5264         nfs4_ntov_table_init(&ntov);
5265         status = do_rfs4_set_attrs(resp, fattrp, cs, &sarg, &ntov,
5266             NFS4ATTR_SETIT);
5267         if (status != NFS4_OK) {
5268                 /*
5269                  * failed set attrs
5270                  */
5271                 goto done;
5272         }
5273         if ((sarg.vap->va_mask == 0) &&
5274             (! (fattrp->attrmask & FATTR4_ACL_MASK))) {
5275                 /*
5276                  * no further work to be done
5277                  */
5278                 goto done;
5279         }
5280 
5281         /*
5282          * If we got a request to set the ACL and the MODE, only
5283          * allow changing VSUID, VSGID, and VSVTX.  Attempting
5284          * to change any other bits, along with setting an ACL,
5285          * gives NFS4ERR_INVAL.
5286          */
5287         if ((fattrp->attrmask & FATTR4_ACL_MASK) &&
5288             (fattrp->attrmask & FATTR4_MODE_MASK)) {
5289                 vattr_t va;
5290 
5291                 va.va_mask = AT_MODE;
5292                 error = VOP_GETATTR(vp, &va, 0, cs->cr, NULL);
5293                 if (error) {
5294                         status = puterrno4(error);
5295                         goto done;
5296                 }
5297                 if ((sarg.vap->va_mode ^ va.va_mode) &
5298                     ~(VSUID | VSGID | VSVTX)) {
5299                         status = NFS4ERR_INVAL;
5300                         goto done;
5301                 }
5302         }
5303 
5304         /* Check stateid only if size has been set */
5305         if (sarg.vap->va_mask & AT_SIZE) {
5306                 trunc = (sarg.vap->va_size == 0);
5307                 status = rfs4_check_stateid(FWRITE, cs->vp, stateid,
5308                     trunc, &cs->deleg, sarg.vap->va_mask & AT_SIZE, &ct);
5309                 if (status != NFS4_OK)
5310                         goto done;
5311         } else {
5312                 ct.cc_sysid = 0;
5313                 ct.cc_pid = 0;
5314                 ct.cc_caller_id = nfs4_srv_caller_id;
5315                 ct.cc_flags = CC_DONTBLOCK;
5316         }
5317 
5318         /* XXX start of possible race with delegations */
5319 
5320         /*
5321          * We need to specially handle size changes because it is
5322          * possible for the client to create a file with read-only
5323          * modes, but with the file opened for writing. If the client
5324          * then tries to set the file size, e.g. ftruncate(3C),
5325          * fcntl(F_FREESP), the normal access checking done in
5326          * VOP_SETATTR would prevent the client from doing it even though
5327          * it should be allowed to do so.  To get around this, we do the
5328          * access checking for ourselves and use VOP_SPACE which doesn't
5329          * do the access checking.
5330          * Also the client should not be allowed to change the file
5331          * size if there is a conflicting non-blocking mandatory lock in
5332          * the region of the change.
5333          */
5334         if (vp->v_type == VREG && (sarg.vap->va_mask & AT_SIZE)) {
5335                 u_offset_t offset;
5336                 ssize_t length;
5337 
5338                 /*
5339                  * ufs_setattr clears AT_SIZE from vap->va_mask, but
5340                  * before returning, sarg.vap->va_mask is used to
5341                  * generate the setattr reply bitmap.  We also clear
5342                  * AT_SIZE below before calling VOP_SPACE.  For both
5343                  * of these cases, the va_mask needs to be saved here
5344                  * and restored after calling VOP_SETATTR.
5345                  */
5346                 saved_mask = sarg.vap->va_mask;
5347 
5348                 /*
5349                  * Check any possible conflict due to NBMAND locks.
5350                  * Get into critical region before VOP_GETATTR, so the
5351                  * size attribute is valid when checking conflicts.
5352                  */
5353                 if (nbl_need_check(vp)) {
5354                         nbl_start_crit(vp, RW_READER);
5355                         in_crit = 1;
5356                 }
5357 
5358                 bva.va_mask = AT_UID|AT_SIZE;
5359                 if (error = VOP_GETATTR(vp, &bva, 0, cr, &ct)) {
5360                         status = puterrno4(error);
5361                         goto done;
5362                 }
5363 
5364                 if (in_crit) {
5365                         if (sarg.vap->va_size < bva.va_size) {
5366                                 offset = sarg.vap->va_size;
5367                                 length = bva.va_size - sarg.vap->va_size;
5368                         } else {
5369                                 offset = bva.va_size;
5370                                 length = sarg.vap->va_size - bva.va_size;
5371                         }
5372                         if (nbl_conflict(vp, NBL_WRITE, offset, length, 0,
5373                             &ct)) {
5374                                 status = NFS4ERR_LOCKED;
5375                                 goto done;
5376                         }
5377                 }
5378 
5379                 if (crgetuid(cr) == bva.va_uid) {
5380                         sarg.vap->va_mask &= ~AT_SIZE;
5381                         bf.l_type = F_WRLCK;
5382                         bf.l_whence = 0;
5383                         bf.l_start = (off64_t)sarg.vap->va_size;
5384                         bf.l_len = 0;
5385                         bf.l_sysid = 0;
5386                         bf.l_pid = 0;
5387                         error = VOP_SPACE(vp, F_FREESP, &bf, FWRITE,
5388                             (offset_t)sarg.vap->va_size, cr, &ct);
5389                 }
5390         }
5391 
5392         if (!error && sarg.vap->va_mask != 0)
5393                 error = VOP_SETATTR(vp, sarg.vap, sarg.flag, cr, &ct);
5394 
5395         /* restore va_mask -- ufs_setattr clears AT_SIZE */
5396         if (saved_mask & AT_SIZE)
5397                 sarg.vap->va_mask |= AT_SIZE;
5398 
5399         /*
5400          * If an ACL was being set, it has been delayed until now,
5401          * in order to set the mode (via the VOP_SETATTR() above) first.
5402          */
5403         if ((! error) && (fattrp->attrmask & FATTR4_ACL_MASK)) {
5404                 int i;
5405 
5406                 for (i = 0; i < NFS4_MAXNUM_ATTRS; i++)
5407                         if (ntov.amap[i] == FATTR4_ACL)
5408                                 break;
5409                 if (i < NFS4_MAXNUM_ATTRS) {
5410                         error = (*nfs4_ntov_map[FATTR4_ACL].sv_getit)(
5411                             NFS4ATTR_SETIT, &sarg, &ntov.na[i]);
5412                         if (error == 0) {
5413                                 *resp |= FATTR4_ACL_MASK;
5414                         } else if (error == ENOTSUP) {
5415                                 (void) rfs4_verify_attr(&sarg, resp, &ntov);
5416                                 status = NFS4ERR_ATTRNOTSUPP;
5417                                 goto done;
5418                         }
5419                 } else {
5420                         NFS4_DEBUG(rfs4_debug,
5421                             (CE_NOTE, "do_rfs4_op_setattr: "
5422                             "unable to find ACL in fattr4"));
5423                         error = EINVAL;
5424                 }
5425         }
5426 
5427         if (error) {
5428                 /* check if a monitor detected a delegation conflict */
5429                 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK))
5430                         status = NFS4ERR_DELAY;
5431                 else
5432                         status = puterrno4(error);
5433 
5434                 /*
5435                  * Set the response bitmap when setattr failed.
5436                  * If VOP_SETATTR partially succeeded, test by doing a
5437                  * VOP_GETATTR on the object and comparing the data
5438                  * to the setattr arguments.
5439                  */
5440                 (void) rfs4_verify_attr(&sarg, resp, &ntov);
5441         } else {
5442                 /*
5443                  * Force modified metadata out to stable storage.
5444                  */
5445                 (void) VOP_FSYNC(vp, FNODSYNC, cr, &ct);
5446                 /*
5447                  * Set response bitmap
5448                  */
5449                 nfs4_vmask_to_nmask_set(sarg.vap->va_mask, resp);
5450         }
5451 
5452 /* Return early and already have a NFSv4 error */
5453 done:
5454         /*
5455          * Except for nfs4_vmask_to_nmask_set(), vattr --> fattr
5456          * conversion sets both readable and writeable NFS4 attrs
5457          * for AT_MTIME and AT_ATIME.  The line below masks out
5458          * unrequested attrs from the setattr result bitmap.  This
5459          * is placed after the done: label to catch the ATTRNOTSUP
5460          * case.
5461          */
5462         *resp &= fattrp->attrmask;
5463 
5464         if (in_crit)
5465                 nbl_end_crit(vp);
5466 
5467         nfs4_ntov_table_free(&ntov, &sarg);
5468 
5469         return (status);
5470 }
5471 
5472 /* ARGSUSED */
5473 static void
5474 rfs4_op_setattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5475     struct compound_state *cs)
5476 {
5477         SETATTR4args *args = &argop->nfs_argop4_u.opsetattr;
5478         SETATTR4res *resp = &resop->nfs_resop4_u.opsetattr;
5479         bslabel_t *clabel;
5480 
5481         DTRACE_NFSV4_2(op__setattr__start, struct compound_state *, cs,
5482             SETATTR4args *, args);
5483 
5484         if (cs->vp == NULL) {
5485                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5486                 goto out;
5487         }
5488 
5489         /*
5490          * If there is an unshared filesystem mounted on this vnode,
5491          * do not allow to setattr on this vnode.
5492          */
5493         if (vn_ismntpt(cs->vp)) {
5494                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
5495                 goto out;
5496         }
5497 
5498         resp->attrsset = 0;
5499 
5500         if (rdonly4(req, cs)) {
5501                 *cs->statusp = resp->status = NFS4ERR_ROFS;
5502                 goto out;
5503         }
5504 
5505         /* check label before setting attributes */
5506         if (is_system_labeled()) {
5507                 ASSERT(req->rq_label != NULL);
5508                 clabel = req->rq_label;
5509                 DTRACE_PROBE2(tx__rfs4__log__info__opsetattr__clabel, char *,
5510                     "got client label from request(1)",
5511                     struct svc_req *, req);
5512                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
5513                         if (!do_rfs_label_check(clabel, cs->vp,
5514                             EQUALITY_CHECK, cs->exi)) {
5515                                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
5516                                 goto out;
5517                         }
5518                 }
5519         }
5520 
5521         *cs->statusp = resp->status =
5522             do_rfs4_op_setattr(&resp->attrsset, &args->obj_attributes, cs,
5523             &args->stateid);
5524 
5525 out:
5526         DTRACE_NFSV4_2(op__setattr__done, struct compound_state *, cs,
5527             SETATTR4res *, resp);
5528 }
5529 
5530 /* ARGSUSED */
5531 static void
5532 rfs4_op_verify(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5533     struct compound_state *cs)
5534 {
5535         /*
5536          * verify and nverify are exactly the same, except that nverify
5537          * succeeds when some argument changed, and verify succeeds when
5538          * when none changed.
5539          */
5540 
5541         VERIFY4args  *args = &argop->nfs_argop4_u.opverify;
5542         VERIFY4res *resp = &resop->nfs_resop4_u.opverify;
5543 
5544         int error;
5545         struct nfs4_svgetit_arg sarg;
5546         struct statvfs64 sb;
5547         struct nfs4_ntov_table ntov;
5548 
5549         DTRACE_NFSV4_2(op__verify__start, struct compound_state *, cs,
5550             VERIFY4args *, args);
5551 
5552         if (cs->vp == NULL) {
5553                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5554                 goto out;
5555         }
5556 
5557         sarg.sbp = &sb;
5558         sarg.is_referral = B_FALSE;
5559         nfs4_ntov_table_init(&ntov);
5560         resp->status = do_rfs4_set_attrs(NULL, &args->obj_attributes, cs,
5561             &sarg, &ntov, NFS4ATTR_VERIT);
5562         if (resp->status != NFS4_OK) {
5563                 /*
5564                  * do_rfs4_set_attrs will try to verify systemwide attrs,
5565                  * so could return -1 for "no match".
5566                  */
5567                 if (resp->status == -1)
5568                         resp->status = NFS4ERR_NOT_SAME;
5569                 goto done;
5570         }
5571         error = rfs4_verify_attr(&sarg, NULL, &ntov);
5572         switch (error) {
5573         case 0:
5574                 resp->status = NFS4_OK;
5575                 break;
5576         case -1:
5577                 resp->status = NFS4ERR_NOT_SAME;
5578                 break;
5579         default:
5580                 resp->status = puterrno4(error);
5581                 break;
5582         }
5583 done:
5584         *cs->statusp = resp->status;
5585         nfs4_ntov_table_free(&ntov, &sarg);
5586 out:
5587         DTRACE_NFSV4_2(op__verify__done, struct compound_state *, cs,
5588             VERIFY4res *, resp);
5589 }
5590 
5591 /* ARGSUSED */
5592 static void
5593 rfs4_op_nverify(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5594     struct compound_state *cs)
5595 {
5596         /*
5597          * verify and nverify are exactly the same, except that nverify
5598          * succeeds when some argument changed, and verify succeeds when
5599          * when none changed.
5600          */
5601 
5602         NVERIFY4args  *args = &argop->nfs_argop4_u.opnverify;
5603         NVERIFY4res *resp = &resop->nfs_resop4_u.opnverify;
5604 
5605         int error;
5606         struct nfs4_svgetit_arg sarg;
5607         struct statvfs64 sb;
5608         struct nfs4_ntov_table ntov;
5609 
5610         DTRACE_NFSV4_2(op__nverify__start, struct compound_state *, cs,
5611             NVERIFY4args *, args);
5612 
5613         if (cs->vp == NULL) {
5614                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5615                 DTRACE_NFSV4_2(op__nverify__done, struct compound_state *, cs,
5616                     NVERIFY4res *, resp);
5617                 return;
5618         }
5619         sarg.sbp = &sb;
5620         sarg.is_referral = B_FALSE;
5621         nfs4_ntov_table_init(&ntov);
5622         resp->status = do_rfs4_set_attrs(NULL, &args->obj_attributes, cs,
5623             &sarg, &ntov, NFS4ATTR_VERIT);
5624         if (resp->status != NFS4_OK) {
5625                 /*
5626                  * do_rfs4_set_attrs will try to verify systemwide attrs,
5627                  * so could return -1 for "no match".
5628                  */
5629                 if (resp->status == -1)
5630                         resp->status = NFS4_OK;
5631                 goto done;
5632         }
5633         error = rfs4_verify_attr(&sarg, NULL, &ntov);
5634         switch (error) {
5635         case 0:
5636                 resp->status = NFS4ERR_SAME;
5637                 break;
5638         case -1:
5639                 resp->status = NFS4_OK;
5640                 break;
5641         default:
5642                 resp->status = puterrno4(error);
5643                 break;
5644         }
5645 done:
5646         *cs->statusp = resp->status;
5647         nfs4_ntov_table_free(&ntov, &sarg);
5648 
5649         DTRACE_NFSV4_2(op__nverify__done, struct compound_state *, cs,
5650             NVERIFY4res *, resp);
5651 }
5652 
5653 /*
5654  * XXX - This should live in an NFS header file.
5655  */
5656 #define MAX_IOVECS      12
5657 
5658 /* ARGSUSED */
5659 static void
5660 rfs4_op_write(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5661     struct compound_state *cs)
5662 {
5663         WRITE4args *args = &argop->nfs_argop4_u.opwrite;
5664         WRITE4res *resp = &resop->nfs_resop4_u.opwrite;
5665         int error;
5666         vnode_t *vp;
5667         struct vattr bva;
5668         u_offset_t rlimit;
5669         struct uio uio;
5670         struct iovec iov[MAX_IOVECS];
5671         struct iovec *iovp;
5672         int iovcnt;
5673         int ioflag;
5674         cred_t *savecred, *cr;
5675         bool_t *deleg = &cs->deleg;
5676         nfsstat4 stat;
5677         int in_crit = 0;
5678         caller_context_t ct;
5679         nfs4_srv_t *nsrv4;
5680 
5681         DTRACE_NFSV4_2(op__write__start, struct compound_state *, cs,
5682             WRITE4args *, args);
5683 
5684         vp = cs->vp;
5685         if (vp == NULL) {
5686                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5687                 goto out;
5688         }
5689         if (cs->access == CS_ACCESS_DENIED) {
5690                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
5691                 goto out;
5692         }
5693 
5694         cr = cs->cr;
5695 
5696         if ((stat = rfs4_check_stateid(FWRITE, vp, &args->stateid, FALSE,
5697             deleg, TRUE, &ct)) != NFS4_OK) {
5698                 *cs->statusp = resp->status = stat;
5699                 goto out;
5700         }
5701 
5702         /*
5703          * We have to enter the critical region before calling VOP_RWLOCK
5704          * to avoid a deadlock with ufs.
5705          */
5706         if (nbl_need_check(vp)) {
5707                 nbl_start_crit(vp, RW_READER);
5708                 in_crit = 1;
5709                 if (nbl_conflict(vp, NBL_WRITE,
5710                     args->offset, args->data_len, 0, &ct)) {
5711                         *cs->statusp = resp->status = NFS4ERR_LOCKED;
5712                         goto out;
5713                 }
5714         }
5715 
5716         bva.va_mask = AT_MODE | AT_UID;
5717         error = VOP_GETATTR(vp, &bva, 0, cr, &ct);
5718 
5719         /*
5720          * If we can't get the attributes, then we can't do the
5721          * right access checking.  So, we'll fail the request.
5722          */
5723         if (error) {
5724                 *cs->statusp = resp->status = puterrno4(error);
5725                 goto out;
5726         }
5727 
5728         if (rdonly4(req, cs)) {
5729                 *cs->statusp = resp->status = NFS4ERR_ROFS;
5730                 goto out;
5731         }
5732 
5733         if (vp->v_type != VREG) {
5734                 *cs->statusp = resp->status =
5735                     ((vp->v_type == VDIR) ? NFS4ERR_ISDIR : NFS4ERR_INVAL);
5736                 goto out;
5737         }
5738 
5739         if (crgetuid(cr) != bva.va_uid &&
5740             (error = VOP_ACCESS(vp, VWRITE, 0, cr, &ct))) {
5741                 *cs->statusp = resp->status = puterrno4(error);
5742                 goto out;
5743         }
5744 
5745         if (MANDLOCK(vp, bva.va_mode)) {
5746                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
5747                 goto out;
5748         }
5749 
5750         nsrv4 = zone_getspecific(rfs4_zone_key, curzone);
5751         if (args->data_len == 0) {
5752                 *cs->statusp = resp->status = NFS4_OK;
5753                 resp->count = 0;
5754                 resp->committed = args->stable;
5755                 resp->writeverf = nsrv4->write4verf;
5756                 goto out;
5757         }
5758 
5759         if (args->mblk != NULL) {
5760                 mblk_t *m;
5761                 uint_t bytes, round_len;
5762 
5763                 iovcnt = 0;
5764                 bytes = 0;
5765                 round_len = roundup(args->data_len, BYTES_PER_XDR_UNIT);
5766                 for (m = args->mblk;
5767                     m != NULL && bytes < round_len;
5768                     m = m->b_cont) {
5769                         iovcnt++;
5770                         bytes += MBLKL(m);
5771                 }
5772 #ifdef DEBUG
5773                 /* should have ended on an mblk boundary */
5774                 if (bytes != round_len) {
5775                         printf("bytes=0x%x, round_len=0x%x, req len=0x%x\n",
5776                             bytes, round_len, args->data_len);
5777                         printf("args=%p, args->mblk=%p, m=%p", (void *)args,
5778                             (void *)args->mblk, (void *)m);
5779                         ASSERT(bytes == round_len);
5780                 }
5781 #endif
5782                 if (iovcnt <= MAX_IOVECS) {
5783                         iovp = iov;
5784                 } else {
5785                         iovp = kmem_alloc(sizeof (*iovp) * iovcnt, KM_SLEEP);
5786                 }
5787                 mblk_to_iov(args->mblk, iovcnt, iovp);
5788         } else if (args->rlist != NULL) {
5789                 iovcnt = 1;
5790                 iovp = iov;
5791                 iovp->iov_base = (char *)((args->rlist)->u.c_daddr3);
5792                 iovp->iov_len = args->data_len;
5793         } else {
5794                 iovcnt = 1;
5795                 iovp = iov;
5796                 iovp->iov_base = args->data_val;
5797                 iovp->iov_len = args->data_len;
5798         }
5799 
5800         uio.uio_iov = iovp;
5801         uio.uio_iovcnt = iovcnt;
5802 
5803         uio.uio_segflg = UIO_SYSSPACE;
5804         uio.uio_extflg = UIO_COPY_DEFAULT;
5805         uio.uio_loffset = args->offset;
5806         uio.uio_resid = args->data_len;
5807         uio.uio_llimit = curproc->p_fsz_ctl;
5808         rlimit = uio.uio_llimit - args->offset;
5809         if (rlimit < (u_offset_t)uio.uio_resid)
5810                 uio.uio_resid = (int)rlimit;
5811 
5812         if (args->stable == UNSTABLE4)
5813                 ioflag = 0;
5814         else if (args->stable == FILE_SYNC4)
5815                 ioflag = FSYNC;
5816         else if (args->stable == DATA_SYNC4)
5817                 ioflag = FDSYNC;
5818         else {
5819                 if (iovp != iov)
5820                         kmem_free(iovp, sizeof (*iovp) * iovcnt);
5821                 *cs->statusp = resp->status = NFS4ERR_INVAL;
5822                 goto out;
5823         }
5824 
5825         /*
5826          * We're changing creds because VM may fault and we need
5827          * the cred of the current thread to be used if quota
5828          * checking is enabled.
5829          */
5830         savecred = curthread->t_cred;
5831         curthread->t_cred = cr;
5832         error = do_io(FWRITE, vp, &uio, ioflag, cr, &ct);
5833         curthread->t_cred = savecred;
5834 
5835         if (iovp != iov)
5836                 kmem_free(iovp, sizeof (*iovp) * iovcnt);
5837 
5838         if (error) {
5839                 *cs->statusp = resp->status = puterrno4(error);
5840                 goto out;
5841         }
5842 
5843         *cs->statusp = resp->status = NFS4_OK;
5844         resp->count = args->data_len - uio.uio_resid;
5845 
5846         if (ioflag == 0)
5847                 resp->committed = UNSTABLE4;
5848         else
5849                 resp->committed = FILE_SYNC4;
5850 
5851         resp->writeverf = nsrv4->write4verf;
5852 
5853 out:
5854         if (in_crit)
5855                 nbl_end_crit(vp);
5856 
5857         DTRACE_NFSV4_2(op__write__done, struct compound_state *, cs,
5858             WRITE4res *, resp);
5859 }
5860 
5861 
5862 /* XXX put in a header file */
5863 extern int      sec_svc_getcred(struct svc_req *, cred_t *,  caddr_t *, int *);
5864 
5865 void
5866 rfs4_compound(COMPOUND4args *args, COMPOUND4res *resp, struct exportinfo *exi,
5867     struct svc_req *req, cred_t *cr, int *rv)
5868 {
5869         uint_t i;
5870         struct compound_state cs;
5871         nfs4_srv_t *nsrv4;
5872         nfs_export_t *ne = nfs_get_export();
5873 
5874         if (rv != NULL)
5875                 *rv = 0;
5876         rfs4_init_compound_state(&cs);
5877         /*
5878          * Form a reply tag by copying over the reqeuest tag.
5879          */
5880         resp->tag.utf8string_val =
5881             kmem_alloc(args->tag.utf8string_len, KM_SLEEP);
5882         resp->tag.utf8string_len = args->tag.utf8string_len;
5883         bcopy(args->tag.utf8string_val, resp->tag.utf8string_val,
5884             resp->tag.utf8string_len);
5885 
5886         cs.statusp = &resp->status;
5887         cs.req = req;
5888         resp->array = NULL;
5889         resp->array_len = 0;
5890 
5891         /*
5892          * XXX for now, minorversion should be zero
5893          */
5894         if (args->minorversion != NFS4_MINORVERSION) {
5895                 DTRACE_NFSV4_2(compound__start, struct compound_state *,
5896                     &cs, COMPOUND4args *, args);
5897                 resp->status = NFS4ERR_MINOR_VERS_MISMATCH;
5898                 DTRACE_NFSV4_2(compound__done, struct compound_state *,
5899                     &cs, COMPOUND4res *, resp);
5900                 return;
5901         }
5902 
5903         if (args->array_len == 0) {
5904                 resp->status = NFS4_OK;
5905                 return;
5906         }
5907 
5908         ASSERT(exi == NULL);
5909         ASSERT(cr == NULL);
5910 
5911         cr = crget();
5912         ASSERT(cr != NULL);
5913 
5914         if (sec_svc_getcred(req, cr, &cs.principal, &cs.nfsflavor) == 0) {
5915                 DTRACE_NFSV4_2(compound__start, struct compound_state *,
5916                     &cs, COMPOUND4args *, args);
5917                 crfree(cr);
5918                 DTRACE_NFSV4_2(compound__done, struct compound_state *,
5919                     &cs, COMPOUND4res *, resp);
5920                 svcerr_badcred(req->rq_xprt);
5921                 if (rv != NULL)
5922                         *rv = 1;
5923                 return;
5924         }
5925         resp->array_len = args->array_len;
5926         resp->array = kmem_zalloc(args->array_len * sizeof (nfs_resop4),
5927             KM_SLEEP);
5928 
5929         cs.basecr = cr;
5930         nsrv4 = zone_getspecific(rfs4_zone_key, curzone);
5931 
5932         DTRACE_NFSV4_2(compound__start, struct compound_state *, &cs,
5933             COMPOUND4args *, args);
5934 
5935         /*
5936          * For now, NFS4 compound processing must be protected by
5937          * exported_lock because it can access more than one exportinfo
5938          * per compound and share/unshare can now change multiple
5939          * exinfo structs.  The NFS2/3 code only refs 1 exportinfo
5940          * per proc (excluding public exinfo), and exi_count design
5941          * is sufficient to protect concurrent execution of NFS2/3
5942          * ops along with unexport.  This lock will be removed as
5943          * part of the NFSv4 phase 2 namespace redesign work.
5944          */
5945         rw_enter(&ne->exported_lock, RW_READER);
5946 
5947         /*
5948          * If this is the first compound we've seen, we need to start all
5949          * new instances' grace periods.
5950          */
5951         if (nsrv4->seen_first_compound == 0) {
5952                 rfs4_grace_start_new(nsrv4);
5953                 /*
5954                  * This must be set after rfs4_grace_start_new(), otherwise
5955                  * another thread could proceed past here before the former
5956                  * is finished.
5957                  */
5958                 nsrv4->seen_first_compound = 1;
5959         }
5960 
5961         for (i = 0; i < args->array_len && cs.cont; i++) {
5962                 nfs_argop4 *argop;
5963                 nfs_resop4 *resop;
5964                 uint_t op;
5965 
5966                 argop = &args->array[i];
5967                 resop = &resp->array[i];
5968                 resop->resop = argop->argop;
5969                 op = (uint_t)resop->resop;
5970 
5971                 if (op < rfsv4disp_cnt) {
5972                         kstat_t *ksp = rfsprocio_v4_ptr[op];
5973                         kstat_t *exi_ksp = NULL;
5974 
5975                         /*
5976                          * Count the individual ops here; NULL and COMPOUND
5977                          * are counted in common_dispatch()
5978                          */
5979                         rfsproccnt_v4_ptr[op].value.ui64++;
5980 
5981                         if (ksp != NULL) {
5982                                 mutex_enter(ksp->ks_lock);
5983                                 kstat_runq_enter(KSTAT_IO_PTR(ksp));
5984                                 mutex_exit(ksp->ks_lock);
5985                         }
5986 
5987                         switch (rfsv4disptab[op].op_type) {
5988                         case NFS4_OP_CFH:
5989                                 resop->exi = cs.exi;
5990                                 break;
5991                         case NFS4_OP_SFH:
5992                                 resop->exi = cs.saved_exi;
5993                                 break;
5994                         default:
5995                                 ASSERT(resop->exi == NULL);
5996                                 break;
5997                         }
5998 
5999                         if (resop->exi != NULL) {
6000                                 exi_ksp = NULL;
6001                                 if (resop->exi->exi_kstats != NULL) {
6002                                         exi_ksp = exp_kstats_v4(
6003                                             resop->exi->exi_kstats, op);
6004                                 }
6005                                 if (exi_ksp != NULL) {
6006                                         mutex_enter(exi_ksp->ks_lock);
6007                                         kstat_runq_enter(KSTAT_IO_PTR(exi_ksp));
6008                                         mutex_exit(exi_ksp->ks_lock);
6009                                 }
6010                         }
6011 
6012                         NFS4_DEBUG(rfs4_debug > 1,
6013                             (CE_NOTE, "Executing %s", rfs4_op_string[op]));
6014                         (*rfsv4disptab[op].dis_proc)(argop, resop, req, &cs);
6015                         NFS4_DEBUG(rfs4_debug > 1, (CE_NOTE, "%s returned %d",
6016                             rfs4_op_string[op], *cs.statusp));
6017                         if (*cs.statusp != NFS4_OK)
6018                                 cs.cont = FALSE;
6019 
6020                         if (rfsv4disptab[op].op_type == NFS4_OP_POSTCFH &&
6021                             *cs.statusp == NFS4_OK &&
6022                             (resop->exi = cs.exi) != NULL) {
6023                                 exi_ksp = NULL;
6024                                 if (resop->exi->exi_kstats != NULL) {
6025                                         exi_ksp = exp_kstats_v4(
6026                                             resop->exi->exi_kstats, op);
6027                                 }
6028                         }
6029 
6030                         if (exi_ksp != NULL) {
6031                                 mutex_enter(exi_ksp->ks_lock);
6032                                 KSTAT_IO_PTR(exi_ksp)->nwritten +=
6033                                     argop->opsize;
6034                                 KSTAT_IO_PTR(exi_ksp)->writes++;
6035                                 if (rfsv4disptab[op].op_type != NFS4_OP_POSTCFH)
6036                                         kstat_runq_exit(KSTAT_IO_PTR(exi_ksp));
6037                                 mutex_exit(exi_ksp->ks_lock);
6038                         } else {
6039                                 resop->exi = NULL;
6040                         }
6041 
6042                         if (ksp != NULL) {
6043                                 mutex_enter(ksp->ks_lock);
6044                                 kstat_runq_exit(KSTAT_IO_PTR(ksp));
6045                                 mutex_exit(ksp->ks_lock);
6046                         }
6047                 } else {
6048                         /*
6049                          * This is effectively dead code since XDR code
6050                          * will have already returned BADXDR if op doesn't
6051                          * decode to legal value.  This only done for a
6052                          * day when XDR code doesn't verify v4 opcodes.
6053                          */
6054                         op = OP_ILLEGAL;
6055                         rfsproccnt_v4_ptr[OP_ILLEGAL_IDX].value.ui64++;
6056 
6057                         rfs4_op_illegal(argop, resop, req, &cs);
6058                         cs.cont = FALSE;
6059                 }
6060 
6061                 /*
6062                  * The exi saved in the resop to be used for kstats update
6063                  * once the opsize is calculated during XDR response encoding.
6064                  * Put a hold on resop->exi so that it can't be destroyed.
6065                  */
6066                 if (resop->exi != NULL)
6067                         exi_hold(resop->exi);
6068 
6069                 /*
6070                  * If not at last op, and if we are to stop, then
6071                  * compact the results array.
6072                  */
6073                 if ((i + 1) < args->array_len && !cs.cont) {
6074                         nfs_resop4 *new_res = kmem_alloc(
6075                             (i + 1) * sizeof (nfs_resop4), KM_SLEEP);
6076                         bcopy(resp->array,
6077                             new_res, (i + 1) * sizeof (nfs_resop4));
6078                         kmem_free(resp->array,
6079                             args->array_len * sizeof (nfs_resop4));
6080 
6081                         resp->array_len = i + 1;
6082                         resp->array = new_res;
6083                 }
6084         }
6085 
6086         rw_exit(&ne->exported_lock);
6087 
6088         /*
6089          * clear exportinfo and vnode fields from compound_state before dtrace
6090          * probe, to avoid tracing residual values for path and share path.
6091          */
6092         if (cs.vp)
6093                 VN_RELE(cs.vp);
6094         if (cs.saved_vp)
6095                 VN_RELE(cs.saved_vp);
6096         cs.exi = cs.saved_exi = NULL;
6097         cs.vp = cs.saved_vp = NULL;
6098 
6099         DTRACE_NFSV4_2(compound__done, struct compound_state *, &cs,
6100             COMPOUND4res *, resp);
6101 
6102         if (cs.saved_fh.nfs_fh4_val)
6103                 kmem_free(cs.saved_fh.nfs_fh4_val, NFS4_FHSIZE);
6104 
6105         if (cs.basecr)
6106                 crfree(cs.basecr);
6107         if (cs.cr)
6108                 crfree(cs.cr);
6109         /*
6110          * done with this compound request, free the label
6111          */
6112 
6113         if (req->rq_label != NULL) {
6114                 kmem_free(req->rq_label, sizeof (bslabel_t));
6115                 req->rq_label = NULL;
6116         }
6117 }
6118 
6119 /*
6120  * XXX because of what appears to be duplicate calls to rfs4_compound_free
6121  * XXX zero out the tag and array values. Need to investigate why the
6122  * XXX calls occur, but at least prevent the panic for now.
6123  */
6124 void
6125 rfs4_compound_free(COMPOUND4res *resp)
6126 {
6127         uint_t i;
6128 
6129         if (resp->tag.utf8string_val) {
6130                 UTF8STRING_FREE(resp->tag)
6131         }
6132 
6133         for (i = 0; i < resp->array_len; i++) {
6134                 nfs_resop4 *resop;
6135                 uint_t op;
6136 
6137                 resop = &resp->array[i];
6138                 op = (uint_t)resop->resop;
6139                 if (op < rfsv4disp_cnt) {
6140                         (*rfsv4disptab[op].dis_resfree)(resop);
6141                 }
6142         }
6143         if (resp->array != NULL) {
6144                 kmem_free(resp->array, resp->array_len * sizeof (nfs_resop4));
6145         }
6146 }
6147 
6148 /*
6149  * Process the value of the compound request rpc flags, as a bit-AND
6150  * of the individual per-op flags (idempotent, allowork, publicfh_ok)
6151  */
6152 void
6153 rfs4_compound_flagproc(COMPOUND4args *args, int *flagp)
6154 {
6155         int i;
6156         int flag = RPC_ALL;
6157 
6158         for (i = 0; flag && i < args->array_len; i++) {
6159                 uint_t op;
6160 
6161                 op = (uint_t)args->array[i].argop;
6162 
6163                 if (op < rfsv4disp_cnt)
6164                         flag &= rfsv4disptab[op].dis_flags;
6165                 else
6166                         flag = 0;
6167         }
6168         *flagp = flag;
6169 }
6170 
6171 /*
6172  * Update the kstats for the received requests.
6173  * Note: writes/nwritten are used to hold count and nbytes of requests received.
6174  *
6175  * Per export request statistics need to be updated during the compound request
6176  * processing (rfs4_compound()) as that is where it is known which exportinfo to
6177  * associate the kstats with.
6178  */
6179 void
6180 rfs4_compound_kstat_args(COMPOUND4args *args)
6181 {
6182         int i;
6183 
6184         for (i = 0; i < args->array_len; i++) {
6185                 uint_t op = (uint_t)args->array[i].argop;
6186 
6187                 if (op < rfsv4disp_cnt) {
6188                         kstat_t *ksp = rfsprocio_v4_ptr[op];
6189 
6190                         if (ksp != NULL) {
6191                                 mutex_enter(ksp->ks_lock);
6192                                 KSTAT_IO_PTR(ksp)->nwritten +=
6193                                     args->array[i].opsize;
6194                                 KSTAT_IO_PTR(ksp)->writes++;
6195                                 mutex_exit(ksp->ks_lock);
6196                         }
6197                 }
6198         }
6199 }
6200 
6201 /*
6202  * Update the kstats for the sent responses.
6203  * Note: reads/nread are used to hold count and nbytes of responses sent.
6204  *
6205  * Per export response statistics cannot be updated until here, after the
6206  * response send has generated the opsize (bytes sent) in the XDR encoding.
6207  * The exportinfo with which the kstats should be associated is thus saved
6208  * in the response structure (by rfs4_compound()) for use here. A hold is
6209  * placed on the exi to ensure it cannot be deleted before use. This hold
6210  * is released, and the exi set to NULL, here.
6211  */
6212 void
6213 rfs4_compound_kstat_res(COMPOUND4res *res)
6214 {
6215         int i;
6216         nfs_export_t *ne = nfs_get_export();
6217 
6218         for (i = 0; i < res->array_len; i++) {
6219                 uint_t op = (uint_t)res->array[i].resop;
6220 
6221                 if (op < rfsv4disp_cnt) {
6222                         kstat_t *ksp = rfsprocio_v4_ptr[op];
6223                         struct exportinfo *exi = res->array[i].exi;
6224 
6225                         if (ksp != NULL) {
6226                                 mutex_enter(ksp->ks_lock);
6227                                 KSTAT_IO_PTR(ksp)->nread +=
6228                                     res->array[i].opsize;
6229                                 KSTAT_IO_PTR(ksp)->reads++;
6230                                 mutex_exit(ksp->ks_lock);
6231                         }
6232 
6233                         if (exi != NULL) {
6234                                 kstat_t *exi_ksp = NULL;
6235 
6236                                 rw_enter(&ne->exported_lock, RW_READER);
6237 
6238                                 if (exi->exi_kstats != NULL) {
6239                                         /*CSTYLED*/
6240                                         exi_ksp = exp_kstats_v4(exi->exi_kstats, op);
6241                                 }
6242                                 if (exi_ksp != NULL) {
6243                                         mutex_enter(exi_ksp->ks_lock);
6244                                         KSTAT_IO_PTR(exi_ksp)->nread +=
6245                                             res->array[i].opsize;
6246                                         KSTAT_IO_PTR(exi_ksp)->reads++;
6247                                         mutex_exit(exi_ksp->ks_lock);
6248                                 }
6249 
6250                                 exi_rele(&exi);
6251                                 res->array[i].exi = NULL;
6252                                 rw_exit(&ne->exported_lock);
6253                         }
6254                 }
6255         }
6256 }
6257 
6258 nfsstat4
6259 rfs4_client_sysid(rfs4_client_t *cp, sysid_t *sp)
6260 {
6261         nfsstat4 e;
6262 
6263         rfs4_dbe_lock(cp->rc_dbe);
6264 
6265         if (cp->rc_sysidt != LM_NOSYSID) {
6266                 *sp = cp->rc_sysidt;
6267                 e = NFS4_OK;
6268 
6269         } else if ((cp->rc_sysidt = lm_alloc_sysidt()) != LM_NOSYSID) {
6270                 *sp = cp->rc_sysidt;
6271                 e = NFS4_OK;
6272 
6273                 NFS4_DEBUG(rfs4_debug, (CE_NOTE,
6274                     "rfs4_client_sysid: allocated 0x%x\n", *sp));
6275         } else
6276                 e = NFS4ERR_DELAY;
6277 
6278         rfs4_dbe_unlock(cp->rc_dbe);
6279         return (e);
6280 }
6281 
6282 #if defined(DEBUG) && ! defined(lint)
6283 static void lock_print(char *str, int operation, struct flock64 *flk)
6284 {
6285         char *op, *type;
6286 
6287         switch (operation) {
6288         case F_GETLK: op = "F_GETLK";
6289                 break;
6290         case F_SETLK: op = "F_SETLK";
6291                 break;
6292         case F_SETLK_NBMAND: op = "F_SETLK_NBMAND";
6293                 break;
6294         default: op = "F_UNKNOWN";
6295                 break;
6296         }
6297         switch (flk->l_type) {
6298         case F_UNLCK: type = "F_UNLCK";
6299                 break;
6300         case F_RDLCK: type = "F_RDLCK";
6301                 break;
6302         case F_WRLCK: type = "F_WRLCK";
6303                 break;
6304         default: type = "F_UNKNOWN";
6305                 break;
6306         }
6307 
6308         ASSERT(flk->l_whence == 0);
6309         cmn_err(CE_NOTE, "%s:  %s, type = %s, off = %llx len = %llx pid = %d",
6310             str, op, type, (longlong_t)flk->l_start,
6311             flk->l_len ? (longlong_t)flk->l_len : ~0LL, flk->l_pid);
6312 }
6313 
6314 #define LOCK_PRINT(d, s, t, f) if (d) lock_print(s, t, f)
6315 #else
6316 #define LOCK_PRINT(d, s, t, f)
6317 #endif
6318 
6319 /*ARGSUSED*/
6320 static bool_t
6321 creds_ok(cred_set_t cr_set, struct svc_req *req, struct compound_state *cs)
6322 {
6323         return (TRUE);
6324 }
6325 
6326 /*
6327  * Look up the pathname using the vp in cs as the directory vnode.
6328  * cs->vp will be the vnode for the file on success
6329  */
6330 
6331 static nfsstat4
6332 rfs4_lookup(component4 *component, struct svc_req *req,
6333     struct compound_state *cs)
6334 {
6335         char *nm;
6336         uint32_t len;
6337         nfsstat4 status;
6338         struct sockaddr *ca;
6339         char *name;
6340 
6341         if (cs->vp == NULL) {
6342                 return (NFS4ERR_NOFILEHANDLE);
6343         }
6344         if (cs->vp->v_type != VDIR) {
6345                 return (NFS4ERR_NOTDIR);
6346         }
6347 
6348         status = utf8_dir_verify(component);
6349         if (status != NFS4_OK)
6350                 return (status);
6351 
6352         nm = utf8_to_fn(component, &len, NULL);
6353         if (nm == NULL) {
6354                 return (NFS4ERR_INVAL);
6355         }
6356 
6357         if (len > MAXNAMELEN) {
6358                 kmem_free(nm, len);
6359                 return (NFS4ERR_NAMETOOLONG);
6360         }
6361 
6362         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
6363         name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
6364             MAXPATHLEN + 1);
6365 
6366         if (name == NULL) {
6367                 kmem_free(nm, len);
6368                 return (NFS4ERR_INVAL);
6369         }
6370 
6371         status = do_rfs4_op_lookup(name, req, cs);
6372 
6373         if (name != nm)
6374                 kmem_free(name, MAXPATHLEN + 1);
6375 
6376         kmem_free(nm, len);
6377 
6378         return (status);
6379 }
6380 
6381 static nfsstat4
6382 rfs4_lookupfile(component4 *component, struct svc_req *req,
6383     struct compound_state *cs, uint32_t access, change_info4 *cinfo)
6384 {
6385         nfsstat4 status;
6386         vnode_t *dvp = cs->vp;
6387         vattr_t bva, ava, fva;
6388         int error;
6389 
6390         /* Get "before" change value */
6391         bva.va_mask = AT_CTIME|AT_SEQ;
6392         error = VOP_GETATTR(dvp, &bva, 0, cs->cr, NULL);
6393         if (error)
6394                 return (puterrno4(error));
6395 
6396         /* rfs4_lookup may VN_RELE directory */
6397         VN_HOLD(dvp);
6398 
6399         status = rfs4_lookup(component, req, cs);
6400         if (status != NFS4_OK) {
6401                 VN_RELE(dvp);
6402                 return (status);
6403         }
6404 
6405         /*
6406          * Get "after" change value, if it fails, simply return the
6407          * before value.
6408          */
6409         ava.va_mask = AT_CTIME|AT_SEQ;
6410         if (VOP_GETATTR(dvp, &ava, 0, cs->cr, NULL)) {
6411                 ava.va_ctime = bva.va_ctime;
6412                 ava.va_seq = 0;
6413         }
6414         VN_RELE(dvp);
6415 
6416         /*
6417          * Validate the file is a file
6418          */
6419         fva.va_mask = AT_TYPE|AT_MODE;
6420         error = VOP_GETATTR(cs->vp, &fva, 0, cs->cr, NULL);
6421         if (error)
6422                 return (puterrno4(error));
6423 
6424         if (fva.va_type != VREG) {
6425                 if (fva.va_type == VDIR)
6426                         return (NFS4ERR_ISDIR);
6427                 if (fva.va_type == VLNK)
6428                         return (NFS4ERR_SYMLINK);
6429                 return (NFS4ERR_INVAL);
6430         }
6431 
6432         NFS4_SET_FATTR4_CHANGE(cinfo->before, bva.va_ctime);
6433         NFS4_SET_FATTR4_CHANGE(cinfo->after, ava.va_ctime);
6434 
6435         /*
6436          * It is undefined if VOP_LOOKUP will change va_seq, so
6437          * cinfo.atomic = TRUE only if we have
6438          * non-zero va_seq's, and they have not changed.
6439          */
6440         if (bva.va_seq && ava.va_seq && ava.va_seq == bva.va_seq)
6441                 cinfo->atomic = TRUE;
6442         else
6443                 cinfo->atomic = FALSE;
6444 
6445         /* Check for mandatory locking */
6446         cs->mandlock = MANDLOCK(cs->vp, fva.va_mode);
6447         return (check_open_access(access, cs, req));
6448 }
6449 
6450 static nfsstat4
6451 create_vnode(vnode_t *dvp, char *nm,  vattr_t *vap, createmode4 mode,
6452     cred_t *cr, vnode_t **vpp, bool_t *created)
6453 {
6454         int error;
6455         nfsstat4 status = NFS4_OK;
6456         vattr_t va;
6457 
6458 tryagain:
6459 
6460         /*
6461          * The file open mode used is VWRITE.  If the client needs
6462          * some other semantic, then it should do the access checking
6463          * itself.  It would have been nice to have the file open mode
6464          * passed as part of the arguments.
6465          */
6466 
6467         *created = TRUE;
6468         error = VOP_CREATE(dvp, nm, vap, EXCL, VWRITE, vpp, cr, 0, NULL, NULL);
6469 
6470         if (error) {
6471                 *created = FALSE;
6472 
6473                 /*
6474                  * If we got something other than file already exists
6475                  * then just return this error.  Otherwise, we got
6476                  * EEXIST.  If we were doing a GUARDED create, then
6477                  * just return this error.  Otherwise, we need to
6478                  * make sure that this wasn't a duplicate of an
6479                  * exclusive create request.
6480                  *
6481                  * The assumption is made that a non-exclusive create
6482                  * request will never return EEXIST.
6483                  */
6484 
6485                 if (error != EEXIST || mode == GUARDED4) {
6486                         status = puterrno4(error);
6487                         return (status);
6488                 }
6489                 error = VOP_LOOKUP(dvp, nm, vpp, NULL, 0, NULL, cr,
6490                     NULL, NULL, NULL);
6491 
6492                 if (error) {
6493                         /*
6494                          * We couldn't find the file that we thought that
6495                          * we just created.  So, we'll just try creating
6496                          * it again.
6497                          */
6498                         if (error == ENOENT)
6499                                 goto tryagain;
6500 
6501                         status = puterrno4(error);
6502                         return (status);
6503                 }
6504 
6505                 if (mode == UNCHECKED4) {
6506                         /* existing object must be regular file */
6507                         if ((*vpp)->v_type != VREG) {
6508                                 if ((*vpp)->v_type == VDIR)
6509                                         status = NFS4ERR_ISDIR;
6510                                 else if ((*vpp)->v_type == VLNK)
6511                                         status = NFS4ERR_SYMLINK;
6512                                 else
6513                                         status = NFS4ERR_INVAL;
6514                                 VN_RELE(*vpp);
6515                                 return (status);
6516                         }
6517 
6518                         return (NFS4_OK);
6519                 }
6520 
6521                 /* Check for duplicate request */
6522                 va.va_mask = AT_MTIME;
6523                 error = VOP_GETATTR(*vpp, &va, 0, cr, NULL);
6524                 if (!error) {
6525                         /* We found the file */
6526                         const timestruc_t *mtime = &vap->va_mtime;
6527 
6528                         if (va.va_mtime.tv_sec != mtime->tv_sec ||
6529                             va.va_mtime.tv_nsec != mtime->tv_nsec) {
6530                                 /* but its not our creation */
6531                                 VN_RELE(*vpp);
6532                                 return (NFS4ERR_EXIST);
6533                         }
6534                         *created = TRUE; /* retrans of create == created */
6535                         return (NFS4_OK);
6536                 }
6537                 VN_RELE(*vpp);
6538                 return (NFS4ERR_EXIST);
6539         }
6540 
6541         return (NFS4_OK);
6542 }
6543 
6544 static nfsstat4
6545 check_open_access(uint32_t access, struct compound_state *cs,
6546     struct svc_req *req)
6547 {
6548         int error;
6549         vnode_t *vp;
6550         bool_t readonly;
6551         cred_t *cr = cs->cr;
6552 
6553         /* For now we don't allow mandatory locking as per V2/V3 */
6554         if (cs->access == CS_ACCESS_DENIED || cs->mandlock) {
6555                 return (NFS4ERR_ACCESS);
6556         }
6557 
6558         vp = cs->vp;
6559         ASSERT(cr != NULL && vp->v_type == VREG);
6560 
6561         /*
6562          * If the file system is exported read only and we are trying
6563          * to open for write, then return NFS4ERR_ROFS
6564          */
6565 
6566         readonly = rdonly4(req, cs);
6567 
6568         if ((access & OPEN4_SHARE_ACCESS_WRITE) && readonly)
6569                 return (NFS4ERR_ROFS);
6570 
6571         if (access & OPEN4_SHARE_ACCESS_READ) {
6572                 if ((VOP_ACCESS(vp, VREAD, 0, cr, NULL) != 0) &&
6573                     (VOP_ACCESS(vp, VEXEC, 0, cr, NULL) != 0)) {
6574                         return (NFS4ERR_ACCESS);
6575                 }
6576         }
6577 
6578         if (access & OPEN4_SHARE_ACCESS_WRITE) {
6579                 error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
6580                 if (error)
6581                         return (NFS4ERR_ACCESS);
6582         }
6583 
6584         return (NFS4_OK);
6585 }
6586 
6587 static nfsstat4
6588 rfs4_createfile(OPEN4args *args, struct svc_req *req, struct compound_state *cs,
6589     change_info4 *cinfo, bitmap4 *attrset, clientid4 clientid)
6590 {
6591         struct nfs4_svgetit_arg sarg;
6592         struct nfs4_ntov_table ntov;
6593 
6594         bool_t ntov_table_init = FALSE;
6595         struct statvfs64 sb;
6596         nfsstat4 status;
6597         vnode_t *vp;
6598         vattr_t bva, ava, iva, cva, *vap;
6599         vnode_t *dvp;
6600         timespec32_t *mtime;
6601         char *nm = NULL;
6602         uint_t buflen;
6603         bool_t created;
6604         bool_t setsize = FALSE;
6605         len_t reqsize;
6606         int error;
6607         bool_t trunc;
6608         caller_context_t ct;
6609         component4 *component;
6610         bslabel_t *clabel;
6611         struct sockaddr *ca;
6612         char *name = NULL;
6613 
6614         sarg.sbp = &sb;
6615         sarg.is_referral = B_FALSE;
6616 
6617         dvp = cs->vp;
6618 
6619         /* Check if the file system is read only */
6620         if (rdonly4(req, cs))
6621                 return (NFS4ERR_ROFS);
6622 
6623         /* check the label of including directory */
6624         if (is_system_labeled()) {
6625                 ASSERT(req->rq_label != NULL);
6626                 clabel = req->rq_label;
6627                 DTRACE_PROBE2(tx__rfs4__log__info__opremove__clabel, char *,
6628                     "got client label from request(1)",
6629                     struct svc_req *, req);
6630                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
6631                         if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
6632                             cs->exi)) {
6633                                 return (NFS4ERR_ACCESS);
6634                         }
6635                 }
6636         }
6637 
6638         /*
6639          * Get the last component of path name in nm. cs will reference
6640          * the including directory on success.
6641          */
6642         component = &args->open_claim4_u.file;
6643         status = utf8_dir_verify(component);
6644         if (status != NFS4_OK)
6645                 return (status);
6646 
6647         nm = utf8_to_fn(component, &buflen, NULL);
6648 
6649         if (nm == NULL)
6650                 return (NFS4ERR_RESOURCE);
6651 
6652         if (buflen > MAXNAMELEN) {
6653                 kmem_free(nm, buflen);
6654                 return (NFS4ERR_NAMETOOLONG);
6655         }
6656 
6657         bva.va_mask = AT_TYPE|AT_CTIME|AT_SEQ;
6658         error = VOP_GETATTR(dvp, &bva, 0, cs->cr, NULL);
6659         if (error) {
6660                 kmem_free(nm, buflen);
6661                 return (puterrno4(error));
6662         }
6663 
6664         if (bva.va_type != VDIR) {
6665                 kmem_free(nm, buflen);
6666                 return (NFS4ERR_NOTDIR);
6667         }
6668 
6669         NFS4_SET_FATTR4_CHANGE(cinfo->before, bva.va_ctime)
6670 
6671         switch (args->mode) {
6672         case GUARDED4:
6673                 /*FALLTHROUGH*/
6674         case UNCHECKED4:
6675                 nfs4_ntov_table_init(&ntov);
6676                 ntov_table_init = TRUE;
6677 
6678                 *attrset = 0;
6679                 status = do_rfs4_set_attrs(attrset,
6680                     &args->createhow4_u.createattrs,
6681                     cs, &sarg, &ntov, NFS4ATTR_SETIT);
6682 
6683                 if (status == NFS4_OK && (sarg.vap->va_mask & AT_TYPE) &&
6684                     sarg.vap->va_type != VREG) {
6685                         if (sarg.vap->va_type == VDIR)
6686                                 status = NFS4ERR_ISDIR;
6687                         else if (sarg.vap->va_type == VLNK)
6688                                 status = NFS4ERR_SYMLINK;
6689                         else
6690                                 status = NFS4ERR_INVAL;
6691                 }
6692 
6693                 if (status != NFS4_OK) {
6694                         kmem_free(nm, buflen);
6695                         nfs4_ntov_table_free(&ntov, &sarg);
6696                         *attrset = 0;
6697                         return (status);
6698                 }
6699 
6700                 vap = sarg.vap;
6701                 vap->va_type = VREG;
6702                 vap->va_mask |= AT_TYPE;
6703 
6704                 if ((vap->va_mask & AT_MODE) == 0) {
6705                         vap->va_mask |= AT_MODE;
6706                         vap->va_mode = (mode_t)0600;
6707                 }
6708 
6709                 if (vap->va_mask & AT_SIZE) {
6710 
6711                         /* Disallow create with a non-zero size */
6712 
6713                         if ((reqsize = sarg.vap->va_size) != 0) {
6714                                 kmem_free(nm, buflen);
6715                                 nfs4_ntov_table_free(&ntov, &sarg);
6716                                 *attrset = 0;
6717                                 return (NFS4ERR_INVAL);
6718                         }
6719                         setsize = TRUE;
6720                 }
6721                 break;
6722 
6723         case EXCLUSIVE4:
6724                 /* prohibit EXCL create of named attributes */
6725                 if (dvp->v_flag & V_XATTRDIR) {
6726                         kmem_free(nm, buflen);
6727                         *attrset = 0;
6728                         return (NFS4ERR_INVAL);
6729                 }
6730 
6731                 cva.va_mask = AT_TYPE | AT_MTIME | AT_MODE;
6732                 cva.va_type = VREG;
6733                 /*
6734                  * Ensure no time overflows. Assumes underlying
6735                  * filesystem supports at least 32 bits.
6736                  * Truncate nsec to usec resolution to allow valid
6737                  * compares even if the underlying filesystem truncates.
6738                  */
6739                 mtime = (timespec32_t *)&args->createhow4_u.createverf;
6740                 cva.va_mtime.tv_sec = mtime->tv_sec % TIME32_MAX;
6741                 cva.va_mtime.tv_nsec = (mtime->tv_nsec / 1000) * 1000;
6742                 cva.va_mode = (mode_t)0;
6743                 vap = &cva;
6744 
6745                 /*
6746                  * For EXCL create, attrset is set to the server attr
6747                  * used to cache the client's verifier.
6748                  */
6749                 *attrset = FATTR4_TIME_MODIFY_MASK;
6750                 break;
6751         }
6752 
6753         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
6754         name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
6755             MAXPATHLEN  + 1);
6756 
6757         if (name == NULL) {
6758                 kmem_free(nm, buflen);
6759                 return (NFS4ERR_SERVERFAULT);
6760         }
6761 
6762         status = create_vnode(dvp, name, vap, args->mode,
6763             cs->cr, &vp, &created);
6764         if (nm != name)
6765                 kmem_free(name, MAXPATHLEN + 1);
6766         kmem_free(nm, buflen);
6767 
6768         if (status != NFS4_OK) {
6769                 if (ntov_table_init)
6770                         nfs4_ntov_table_free(&ntov, &sarg);
6771                 *attrset = 0;
6772                 return (status);
6773         }
6774 
6775         trunc = (setsize && !created);
6776 
6777         if (args->mode != EXCLUSIVE4) {
6778                 bitmap4 createmask = args->createhow4_u.createattrs.attrmask;
6779 
6780                 /*
6781                  * True verification that object was created with correct
6782                  * attrs is impossible.  The attrs could have been changed
6783                  * immediately after object creation.  If attributes did
6784                  * not verify, the only recourse for the server is to
6785                  * destroy the object.  Maybe if some attrs (like gid)
6786                  * are set incorrectly, the object should be destroyed;
6787                  * however, seems bad as a default policy.  Do we really
6788                  * want to destroy an object over one of the times not
6789                  * verifying correctly?  For these reasons, the server
6790                  * currently sets bits in attrset for createattrs
6791                  * that were set; however, no verification is done.
6792                  *
6793                  * vmask_to_nmask accounts for vattr bits set on create
6794                  *      [do_rfs4_set_attrs() only sets resp bits for
6795                  *       non-vattr/vfs bits.]
6796                  * Mask off any bits we set by default so as not to return
6797                  * more attrset bits than were requested in createattrs
6798                  */
6799                 if (created) {
6800                         nfs4_vmask_to_nmask(sarg.vap->va_mask, attrset);
6801                         *attrset &= createmask;
6802                 } else {
6803                         /*
6804                          * We did not create the vnode (we tried but it
6805                          * already existed).  In this case, the only createattr
6806                          * that the spec allows the server to set is size,
6807                          * and even then, it can only be set if it is 0.
6808                          */
6809                         *attrset = 0;
6810                         if (trunc)
6811                                 *attrset = FATTR4_SIZE_MASK;
6812                 }
6813         }
6814         if (ntov_table_init)
6815                 nfs4_ntov_table_free(&ntov, &sarg);
6816 
6817         /*
6818          * Get the initial "after" sequence number, if it fails,
6819          * set to zero, time to before.
6820          */
6821         iva.va_mask = AT_CTIME|AT_SEQ;
6822         if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL)) {
6823                 iva.va_seq = 0;
6824                 iva.va_ctime = bva.va_ctime;
6825         }
6826 
6827         /*
6828          * create_vnode attempts to create the file exclusive,
6829          * if it already exists the VOP_CREATE will fail and
6830          * may not increase va_seq. It is atomic if
6831          * we haven't changed the directory, but if it has changed
6832          * we don't know what changed it.
6833          */
6834         if (!created) {
6835                 if (bva.va_seq && iva.va_seq &&
6836                     bva.va_seq == iva.va_seq)
6837                         cinfo->atomic = TRUE;
6838                 else
6839                         cinfo->atomic = FALSE;
6840                 NFS4_SET_FATTR4_CHANGE(cinfo->after, iva.va_ctime);
6841         } else {
6842                 /*
6843                  * The entry was created, we need to sync the
6844                  * directory metadata.
6845                  */
6846                 (void) VOP_FSYNC(dvp, 0, cs->cr, NULL);
6847 
6848                 /*
6849                  * Get "after" change value, if it fails, simply return the
6850                  * before value.
6851                  */
6852                 ava.va_mask = AT_CTIME|AT_SEQ;
6853                 if (VOP_GETATTR(dvp, &ava, 0, cs->cr, NULL)) {
6854                         ava.va_ctime = bva.va_ctime;
6855                         ava.va_seq = 0;
6856                 }
6857 
6858                 NFS4_SET_FATTR4_CHANGE(cinfo->after, ava.va_ctime);
6859 
6860                 /*
6861                  * The cinfo->atomic = TRUE only if we have
6862                  * non-zero va_seq's, and it has incremented by exactly one
6863                  * during the create_vnode and it didn't
6864                  * change during the VOP_FSYNC.
6865                  */
6866                 if (bva.va_seq && iva.va_seq && ava.va_seq &&
6867                     iva.va_seq == (bva.va_seq + 1) && iva.va_seq == ava.va_seq)
6868                         cinfo->atomic = TRUE;
6869                 else
6870                         cinfo->atomic = FALSE;
6871         }
6872 
6873         /* Check for mandatory locking and that the size gets set. */
6874         cva.va_mask = AT_MODE;
6875         if (setsize)
6876                 cva.va_mask |= AT_SIZE;
6877 
6878         /* Assume the worst */
6879         cs->mandlock = TRUE;
6880 
6881         if (VOP_GETATTR(vp, &cva, 0, cs->cr, NULL) == 0) {
6882                 cs->mandlock = MANDLOCK(cs->vp, cva.va_mode);
6883 
6884                 /*
6885                  * Truncate the file if necessary; this would be
6886                  * the case for create over an existing file.
6887                  */
6888 
6889                 if (trunc) {
6890                         int in_crit = 0;
6891                         rfs4_file_t *fp;
6892                         nfs4_srv_t *nsrv4;
6893                         bool_t create = FALSE;
6894 
6895                         /*
6896                          * We are writing over an existing file.
6897                          * Check to see if we need to recall a delegation.
6898                          */
6899                         nsrv4 = zone_getspecific(rfs4_zone_key, curzone);
6900                         rfs4_hold_deleg_policy(nsrv4);
6901                         if ((fp = rfs4_findfile(vp, NULL, &create)) != NULL) {
6902                                 if (rfs4_check_delegated_byfp(FWRITE, fp,
6903                                     (reqsize == 0), FALSE, FALSE, &clientid)) {
6904                                         rfs4_file_rele(fp);
6905                                         rfs4_rele_deleg_policy(nsrv4);
6906                                         VN_RELE(vp);
6907                                         *attrset = 0;
6908                                         return (NFS4ERR_DELAY);
6909                                 }
6910                                 rfs4_file_rele(fp);
6911                         }
6912                         rfs4_rele_deleg_policy(nsrv4);
6913 
6914                         if (nbl_need_check(vp)) {
6915                                 in_crit = 1;
6916 
6917                                 ASSERT(reqsize == 0);
6918 
6919                                 nbl_start_crit(vp, RW_READER);
6920                                 if (nbl_conflict(vp, NBL_WRITE, 0,
6921                                     cva.va_size, 0, NULL)) {
6922                                         in_crit = 0;
6923                                         nbl_end_crit(vp);
6924                                         VN_RELE(vp);
6925                                         *attrset = 0;
6926                                         return (NFS4ERR_ACCESS);
6927                                 }
6928                         }
6929                         ct.cc_sysid = 0;
6930                         ct.cc_pid = 0;
6931                         ct.cc_caller_id = nfs4_srv_caller_id;
6932                         ct.cc_flags = CC_DONTBLOCK;
6933 
6934                         cva.va_mask = AT_SIZE;
6935                         cva.va_size = reqsize;
6936                         (void) VOP_SETATTR(vp, &cva, 0, cs->cr, &ct);
6937                         if (in_crit)
6938                                 nbl_end_crit(vp);
6939                 }
6940         }
6941 
6942         error = makefh4(&cs->fh, vp, cs->exi);
6943 
6944         /*
6945          * Force modified data and metadata out to stable storage.
6946          */
6947         (void) VOP_FSYNC(vp, FNODSYNC, cs->cr, NULL);
6948 
6949         if (error) {
6950                 VN_RELE(vp);
6951                 *attrset = 0;
6952                 return (puterrno4(error));
6953         }
6954 
6955         /* if parent dir is attrdir, set namedattr fh flag */
6956         if (dvp->v_flag & V_XATTRDIR)
6957                 set_fh4_flag(&cs->fh, FH4_NAMEDATTR);
6958 
6959         if (cs->vp)
6960                 VN_RELE(cs->vp);
6961 
6962         cs->vp = vp;
6963 
6964         /*
6965          * if we did not create the file, we will need to check
6966          * the access bits on the file
6967          */
6968 
6969         if (!created) {
6970                 if (setsize)
6971                         args->share_access |= OPEN4_SHARE_ACCESS_WRITE;
6972                 status = check_open_access(args->share_access, cs, req);
6973                 if (status != NFS4_OK)
6974                         *attrset = 0;
6975         }
6976         return (status);
6977 }
6978 
6979 /*ARGSUSED*/
6980 static void
6981 rfs4_do_open(struct compound_state *cs, struct svc_req *req,
6982     rfs4_openowner_t *oo, delegreq_t deleg,
6983     uint32_t access, uint32_t deny,
6984     OPEN4res *resp, int deleg_cur)
6985 {
6986         /* XXX Currently not using req  */
6987         rfs4_state_t *sp;
6988         rfs4_file_t *fp;
6989         bool_t screate = TRUE;
6990         bool_t fcreate = TRUE;
6991         uint32_t open_a, share_a;
6992         uint32_t open_d, share_d;
6993         rfs4_deleg_state_t *dsp;
6994         sysid_t sysid;
6995         nfsstat4 status;
6996         caller_context_t ct;
6997         int fflags = 0;
6998         int recall = 0;
6999         int err;
7000         int first_open;
7001 
7002         /* get the file struct and hold a lock on it during initial open */
7003         fp = rfs4_findfile_withlock(cs->vp, &cs->fh, &fcreate);
7004         if (fp == NULL) {
7005                 resp->status = NFS4ERR_RESOURCE;
7006                 DTRACE_PROBE1(nfss__e__do__open1, nfsstat4, resp->status);
7007                 return;
7008         }
7009 
7010         sp = rfs4_findstate_by_owner_file(oo, fp, &screate);
7011         if (sp == NULL) {
7012                 resp->status = NFS4ERR_RESOURCE;
7013                 DTRACE_PROBE1(nfss__e__do__open2, nfsstat4, resp->status);
7014                 /* No need to keep any reference */
7015                 rw_exit(&fp->rf_file_rwlock);
7016                 rfs4_file_rele(fp);
7017                 return;
7018         }
7019 
7020         /* try to get the sysid before continuing */
7021         if ((status = rfs4_client_sysid(oo->ro_client, &sysid)) != NFS4_OK) {
7022                 resp->status = status;
7023                 rfs4_file_rele(fp);
7024                 /* Not a fully formed open; "close" it */
7025                 if (screate == TRUE)
7026                         rfs4_state_close(sp, FALSE, FALSE, cs->cr);
7027                 rfs4_state_rele(sp);
7028                 return;
7029         }
7030 
7031         /* Calculate the fflags for this OPEN. */
7032         if (access & OPEN4_SHARE_ACCESS_READ)
7033                 fflags |= FREAD;
7034         if (access & OPEN4_SHARE_ACCESS_WRITE)
7035                 fflags |= FWRITE;
7036 
7037         rfs4_dbe_lock(sp->rs_dbe);
7038 
7039         /*
7040          * Calculate the new deny and access mode that this open is adding to
7041          * the file for this open owner;
7042          */
7043         open_d = (deny & ~sp->rs_open_deny);
7044         open_a = (access & ~sp->rs_open_access);
7045 
7046         /*
7047          * Calculate the new share access and share deny modes that this open
7048          * is adding to the file for this open owner;
7049          */
7050         share_a = (access & ~sp->rs_share_access);
7051         share_d = (deny & ~sp->rs_share_deny);
7052 
7053         first_open = (sp->rs_open_access & OPEN4_SHARE_ACCESS_BOTH) == 0;
7054 
7055         /*
7056          * Check to see the client has already sent an open for this
7057          * open owner on this file with the same share/deny modes.
7058          * If so, we don't need to check for a conflict and we don't
7059          * need to add another shrlock.  If not, then we need to
7060          * check for conflicts in deny and access before checking for
7061          * conflicts in delegation.  We don't want to recall a
7062          * delegation based on an open that will eventually fail based
7063          * on shares modes.
7064          */
7065 
7066         if (share_a || share_d) {
7067                 if ((err = rfs4_share(sp, access, deny)) != 0) {
7068                         rfs4_dbe_unlock(sp->rs_dbe);
7069                         resp->status = err;
7070 
7071                         rfs4_file_rele(fp);
7072                         /* Not a fully formed open; "close" it */
7073                         if (screate == TRUE)
7074                                 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
7075                         rfs4_state_rele(sp);
7076                         return;
7077                 }
7078         }
7079 
7080         rfs4_dbe_lock(fp->rf_dbe);
7081 
7082         /*
7083          * Check to see if this file is delegated and if so, if a
7084          * recall needs to be done.
7085          */
7086         if (rfs4_check_recall(sp, access)) {
7087                 rfs4_dbe_unlock(fp->rf_dbe);
7088                 rfs4_dbe_unlock(sp->rs_dbe);
7089                 rfs4_recall_deleg(fp, FALSE, sp->rs_owner->ro_client);
7090                 delay(NFS4_DELEGATION_CONFLICT_DELAY);
7091                 rfs4_dbe_lock(sp->rs_dbe);
7092 
7093                 /* if state closed while lock was dropped */
7094                 if (sp->rs_closed) {
7095                         if (share_a || share_d)
7096                                 (void) rfs4_unshare(sp);
7097                         rfs4_dbe_unlock(sp->rs_dbe);
7098                         rfs4_file_rele(fp);
7099                         /* Not a fully formed open; "close" it */
7100                         if (screate == TRUE)
7101                                 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
7102                         rfs4_state_rele(sp);
7103                         resp->status = NFS4ERR_OLD_STATEID;
7104                         return;
7105                 }
7106 
7107                 rfs4_dbe_lock(fp->rf_dbe);
7108                 /* Let's see if the delegation was returned */
7109                 if (rfs4_check_recall(sp, access)) {
7110                         rfs4_dbe_unlock(fp->rf_dbe);
7111                         if (share_a || share_d)
7112                                 (void) rfs4_unshare(sp);
7113                         rfs4_dbe_unlock(sp->rs_dbe);
7114                         rfs4_file_rele(fp);
7115                         rfs4_update_lease(sp->rs_owner->ro_client);
7116 
7117                         /* Not a fully formed open; "close" it */
7118                         if (screate == TRUE)
7119                                 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
7120                         rfs4_state_rele(sp);
7121                         resp->status = NFS4ERR_DELAY;
7122                         return;
7123                 }
7124         }
7125         /*
7126          * the share check passed and any delegation conflict has been
7127          * taken care of, now call vop_open.
7128          * if this is the first open then call vop_open with fflags.
7129          * if not, call vn_open_upgrade with just the upgrade flags.
7130          *
7131          * if the file has been opened already, it will have the current
7132          * access mode in the state struct.  if it has no share access, then
7133          * this is a new open.
7134          *
7135          * However, if this is open with CLAIM_DLEGATE_CUR, then don't
7136          * call VOP_OPEN(), just do the open upgrade.
7137          */
7138         if (first_open && !deleg_cur) {
7139                 ct.cc_sysid = sysid;
7140                 ct.cc_pid = rfs4_dbe_getid(sp->rs_owner->ro_dbe);
7141                 ct.cc_caller_id = nfs4_srv_caller_id;
7142                 ct.cc_flags = CC_DONTBLOCK;
7143                 err = VOP_OPEN(&cs->vp, fflags, cs->cr, &ct);
7144                 if (err) {
7145                         rfs4_dbe_unlock(fp->rf_dbe);
7146                         if (share_a || share_d)
7147                                 (void) rfs4_unshare(sp);
7148                         rfs4_dbe_unlock(sp->rs_dbe);
7149                         rfs4_file_rele(fp);
7150 
7151                         /* Not a fully formed open; "close" it */
7152                         if (screate == TRUE)
7153                                 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
7154                         rfs4_state_rele(sp);
7155                         /* check if a monitor detected a delegation conflict */
7156                         if (err == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK))
7157                                 resp->status = NFS4ERR_DELAY;
7158                         else
7159                                 resp->status = NFS4ERR_SERVERFAULT;
7160                         return;
7161                 }
7162         } else { /* open upgrade */
7163                 /*
7164                  * calculate the fflags for the new mode that is being added
7165                  * by this upgrade.
7166                  */
7167                 fflags = 0;
7168                 if (open_a & OPEN4_SHARE_ACCESS_READ)
7169                         fflags |= FREAD;
7170                 if (open_a & OPEN4_SHARE_ACCESS_WRITE)
7171                         fflags |= FWRITE;
7172                 vn_open_upgrade(cs->vp, fflags);
7173         }
7174         sp->rs_open_access |= access;
7175         sp->rs_open_deny |= deny;
7176 
7177         if (open_d & OPEN4_SHARE_DENY_READ)
7178                 fp->rf_deny_read++;
7179         if (open_d & OPEN4_SHARE_DENY_WRITE)
7180                 fp->rf_deny_write++;
7181         fp->rf_share_deny |= deny;
7182 
7183         if (open_a & OPEN4_SHARE_ACCESS_READ)
7184                 fp->rf_access_read++;
7185         if (open_a & OPEN4_SHARE_ACCESS_WRITE)
7186                 fp->rf_access_write++;
7187         fp->rf_share_access |= access;
7188 
7189         /*
7190          * Check for delegation here. if the deleg argument is not
7191          * DELEG_ANY, then this is a reclaim from a client and
7192          * we must honor the delegation requested. If necessary we can
7193          * set the recall flag.
7194          */
7195 
7196         dsp = rfs4_grant_delegation(deleg, sp, &recall);
7197 
7198         cs->deleg = (fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_WRITE);
7199 
7200         next_stateid(&sp->rs_stateid);
7201 
7202         resp->stateid = sp->rs_stateid.stateid;
7203 
7204         rfs4_dbe_unlock(fp->rf_dbe);
7205         rfs4_dbe_unlock(sp->rs_dbe);
7206 
7207         if (dsp) {
7208                 rfs4_set_deleg_response(dsp, &resp->delegation, NULL, recall);
7209                 rfs4_deleg_state_rele(dsp);
7210         }
7211 
7212         rfs4_file_rele(fp);
7213         rfs4_state_rele(sp);
7214 
7215         resp->status = NFS4_OK;
7216 }
7217 
7218 /*ARGSUSED*/
7219 static void
7220 rfs4_do_opennull(struct compound_state *cs, struct svc_req *req,
7221     OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
7222 {
7223         change_info4 *cinfo = &resp->cinfo;
7224         bitmap4 *attrset = &resp->attrset;
7225 
7226         if (args->opentype == OPEN4_NOCREATE)
7227                 resp->status = rfs4_lookupfile(&args->open_claim4_u.file,
7228                     req, cs, args->share_access, cinfo);
7229         else {
7230                 /* inhibit delegation grants during exclusive create */
7231 
7232                 if (args->mode == EXCLUSIVE4)
7233                         rfs4_disable_delegation();
7234 
7235                 resp->status = rfs4_createfile(args, req, cs, cinfo, attrset,
7236                     oo->ro_client->rc_clientid);
7237         }
7238 
7239         if (resp->status == NFS4_OK) {
7240 
7241                 /* cs->vp cs->fh now reference the desired file */
7242 
7243                 rfs4_do_open(cs, req, oo,
7244                     oo->ro_need_confirm ? DELEG_NONE : DELEG_ANY,
7245                     args->share_access, args->share_deny, resp, 0);
7246 
7247                 /*
7248                  * If rfs4_createfile set attrset, we must
7249                  * clear this attrset before the response is copied.
7250                  */
7251                 if (resp->status != NFS4_OK && resp->attrset) {
7252                         resp->attrset = 0;
7253                 }
7254         }
7255         else
7256                 *cs->statusp = resp->status;
7257 
7258         if (args->mode == EXCLUSIVE4)
7259                 rfs4_enable_delegation();
7260 }
7261 
7262 /*ARGSUSED*/
7263 static void
7264 rfs4_do_openprev(struct compound_state *cs, struct svc_req *req,
7265     OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
7266 {
7267         change_info4 *cinfo = &resp->cinfo;
7268         vattr_t va;
7269         vtype_t v_type = cs->vp->v_type;
7270         int error = 0;
7271 
7272         /* Verify that we have a regular file */
7273         if (v_type != VREG) {
7274                 if (v_type == VDIR)
7275                         resp->status = NFS4ERR_ISDIR;
7276                 else if (v_type == VLNK)
7277                         resp->status = NFS4ERR_SYMLINK;
7278                 else
7279                         resp->status = NFS4ERR_INVAL;
7280                 return;
7281         }
7282 
7283         va.va_mask = AT_MODE|AT_UID;
7284         error = VOP_GETATTR(cs->vp, &va, 0, cs->cr, NULL);
7285         if (error) {
7286                 resp->status = puterrno4(error);
7287                 return;
7288         }
7289 
7290         cs->mandlock = MANDLOCK(cs->vp, va.va_mode);
7291 
7292         /*
7293          * Check if we have access to the file, Note the the file
7294          * could have originally been open UNCHECKED or GUARDED
7295          * with mode bits that will now fail, but there is nothing
7296          * we can really do about that except in the case that the
7297          * owner of the file is the one requesting the open.
7298          */
7299         if (crgetuid(cs->cr) != va.va_uid) {
7300                 resp->status = check_open_access(args->share_access, cs, req);
7301                 if (resp->status != NFS4_OK) {
7302                         return;
7303                 }
7304         }
7305 
7306         /*
7307          * cinfo on a CLAIM_PREVIOUS is undefined, initialize to zero
7308          */
7309         cinfo->before = 0;
7310         cinfo->after = 0;
7311         cinfo->atomic = FALSE;
7312 
7313         rfs4_do_open(cs, req, oo,
7314             NFS4_DELEG4TYPE2REQTYPE(args->open_claim4_u.delegate_type),
7315             args->share_access, args->share_deny, resp, 0);
7316 }
7317 
7318 static void
7319 rfs4_do_opendelcur(struct compound_state *cs, struct svc_req *req,
7320     OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
7321 {
7322         int error;
7323         nfsstat4 status;
7324         stateid4 stateid =
7325             args->open_claim4_u.delegate_cur_info.delegate_stateid;
7326         rfs4_deleg_state_t *dsp;
7327 
7328         /*
7329          * Find the state info from the stateid and confirm that the
7330          * file is delegated.  If the state openowner is the same as
7331          * the supplied openowner we're done. If not, get the file
7332          * info from the found state info. Use that file info to
7333          * create the state for this lock owner. Note solaris doen't
7334          * really need the pathname to find the file. We may want to
7335          * lookup the pathname and make sure that the vp exist and
7336          * matches the vp in the file structure. However it is
7337          * possible that the pathname nolonger exists (local process
7338          * unlinks the file), so this may not be that useful.
7339          */
7340 
7341         status = rfs4_get_deleg_state(&stateid, &dsp);
7342         if (status != NFS4_OK) {
7343                 resp->status = status;
7344                 return;
7345         }
7346 
7347         ASSERT(dsp->rds_finfo->rf_dinfo.rd_dtype != OPEN_DELEGATE_NONE);
7348 
7349         /*
7350          * New lock owner, create state. Since this was probably called
7351          * in response to a CB_RECALL we set deleg to DELEG_NONE
7352          */
7353 
7354         ASSERT(cs->vp != NULL);
7355         VN_RELE(cs->vp);
7356         VN_HOLD(dsp->rds_finfo->rf_vp);
7357         cs->vp = dsp->rds_finfo->rf_vp;
7358 
7359         if (error = makefh4(&cs->fh, cs->vp, cs->exi)) {
7360                 rfs4_deleg_state_rele(dsp);
7361                 *cs->statusp = resp->status = puterrno4(error);
7362                 return;
7363         }
7364 
7365         /* Mark progress for delegation returns */
7366         dsp->rds_finfo->rf_dinfo.rd_time_lastwrite = gethrestime_sec();
7367         rfs4_deleg_state_rele(dsp);
7368         rfs4_do_open(cs, req, oo, DELEG_NONE,
7369             args->share_access, args->share_deny, resp, 1);
7370 }
7371 
7372 /*ARGSUSED*/
7373 static void
7374 rfs4_do_opendelprev(struct compound_state *cs, struct svc_req *req,
7375     OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
7376 {
7377         /*
7378          * Lookup the pathname, it must already exist since this file
7379          * was delegated.
7380          *
7381          * Find the file and state info for this vp and open owner pair.
7382          *      check that they are in fact delegated.
7383          *      check that the state access and deny modes are the same.
7384          *
7385          * Return the delgation possibly seting the recall flag.
7386          */
7387         rfs4_file_t *fp;
7388         rfs4_state_t *sp;
7389         bool_t create = FALSE;
7390         bool_t dcreate = FALSE;
7391         rfs4_deleg_state_t *dsp;
7392         nfsace4 *ace;
7393 
7394         /* Note we ignore oflags */
7395         resp->status = rfs4_lookupfile(&args->open_claim4_u.file_delegate_prev,
7396             req, cs, args->share_access, &resp->cinfo);
7397 
7398         if (resp->status != NFS4_OK) {
7399                 return;
7400         }
7401 
7402         /* get the file struct and hold a lock on it during initial open */
7403         fp = rfs4_findfile_withlock(cs->vp, NULL, &create);
7404         if (fp == NULL) {
7405                 resp->status = NFS4ERR_RESOURCE;
7406                 DTRACE_PROBE1(nfss__e__do_opendelprev1, nfsstat4, resp->status);
7407                 return;
7408         }
7409 
7410         sp = rfs4_findstate_by_owner_file(oo, fp, &create);
7411         if (sp == NULL) {
7412                 resp->status = NFS4ERR_SERVERFAULT;
7413                 DTRACE_PROBE1(nfss__e__do_opendelprev2, nfsstat4, resp->status);
7414                 rw_exit(&fp->rf_file_rwlock);
7415                 rfs4_file_rele(fp);
7416                 return;
7417         }
7418 
7419         rfs4_dbe_lock(sp->rs_dbe);
7420         rfs4_dbe_lock(fp->rf_dbe);
7421         if (args->share_access != sp->rs_share_access ||
7422             args->share_deny != sp->rs_share_deny ||
7423             sp->rs_finfo->rf_dinfo.rd_dtype == OPEN_DELEGATE_NONE) {
7424                 NFS4_DEBUG(rfs4_debug,
7425                     (CE_NOTE, "rfs4_do_opendelprev: state mixup"));
7426                 rfs4_dbe_unlock(fp->rf_dbe);
7427                 rfs4_dbe_unlock(sp->rs_dbe);
7428                 rfs4_file_rele(fp);
7429                 rfs4_state_rele(sp);
7430                 resp->status = NFS4ERR_SERVERFAULT;
7431                 return;
7432         }
7433         rfs4_dbe_unlock(fp->rf_dbe);
7434         rfs4_dbe_unlock(sp->rs_dbe);
7435 
7436         dsp = rfs4_finddeleg(sp, &dcreate);
7437         if (dsp == NULL) {
7438                 rfs4_state_rele(sp);
7439                 rfs4_file_rele(fp);
7440                 resp->status = NFS4ERR_SERVERFAULT;
7441                 return;
7442         }
7443 
7444         next_stateid(&sp->rs_stateid);
7445 
7446         resp->stateid = sp->rs_stateid.stateid;
7447 
7448         resp->delegation.delegation_type = dsp->rds_dtype;
7449 
7450         if (dsp->rds_dtype == OPEN_DELEGATE_READ) {
7451                 open_read_delegation4 *rv =
7452                     &resp->delegation.open_delegation4_u.read;
7453 
7454                 rv->stateid = dsp->rds_delegid.stateid;
7455                 rv->recall = FALSE; /* no policy in place to set to TRUE */
7456                 ace = &rv->permissions;
7457         } else {
7458                 open_write_delegation4 *rv =
7459                     &resp->delegation.open_delegation4_u.write;
7460 
7461                 rv->stateid = dsp->rds_delegid.stateid;
7462                 rv->recall = FALSE;  /* no policy in place to set to TRUE */
7463                 ace = &rv->permissions;
7464                 rv->space_limit.limitby = NFS_LIMIT_SIZE;
7465                 rv->space_limit.nfs_space_limit4_u.filesize = UINT64_MAX;
7466         }
7467 
7468         /* XXX For now */
7469         ace->type = ACE4_ACCESS_ALLOWED_ACE_TYPE;
7470         ace->flag = 0;
7471         ace->access_mask = 0;
7472         ace->who.utf8string_len = 0;
7473         ace->who.utf8string_val = 0;
7474 
7475         rfs4_deleg_state_rele(dsp);
7476         rfs4_state_rele(sp);
7477         rfs4_file_rele(fp);
7478 }
7479 
7480 typedef enum {
7481         NFS4_CHKSEQ_OKAY = 0,
7482         NFS4_CHKSEQ_REPLAY = 1,
7483         NFS4_CHKSEQ_BAD = 2
7484 } rfs4_chkseq_t;
7485 
7486 /*
7487  * Generic function for sequence number checks.
7488  */
7489 static rfs4_chkseq_t
7490 rfs4_check_seqid(seqid4 seqid, nfs_resop4 *lastop,
7491     seqid4 rqst_seq, nfs_resop4 *resop, bool_t copyres)
7492 {
7493         /* Same sequence ids and matching operations? */
7494         if (seqid == rqst_seq && resop->resop == lastop->resop) {
7495                 if (copyres == TRUE) {
7496                         rfs4_free_reply(resop);
7497                         rfs4_copy_reply(resop, lastop);
7498                 }
7499                 NFS4_DEBUG(rfs4_debug, (CE_NOTE,
7500                     "Replayed SEQID %d\n", seqid));
7501                 return (NFS4_CHKSEQ_REPLAY);
7502         }
7503 
7504         /* If the incoming sequence is not the next expected then it is bad */
7505         if (rqst_seq != seqid + 1) {
7506                 if (rqst_seq == seqid) {
7507                         NFS4_DEBUG(rfs4_debug,
7508                             (CE_NOTE, "BAD SEQID: Replayed sequence id "
7509                             "but last op was %d current op is %d\n",
7510                             lastop->resop, resop->resop));
7511                         return (NFS4_CHKSEQ_BAD);
7512                 }
7513                 NFS4_DEBUG(rfs4_debug,
7514                     (CE_NOTE, "BAD SEQID: got %u expecting %u\n",
7515                     rqst_seq, seqid));
7516                 return (NFS4_CHKSEQ_BAD);
7517         }
7518 
7519         /* Everything okay -- next expected */
7520         return (NFS4_CHKSEQ_OKAY);
7521 }
7522 
7523 
7524 static rfs4_chkseq_t
7525 rfs4_check_open_seqid(seqid4 seqid, rfs4_openowner_t *op, nfs_resop4 *resop)
7526 {
7527         rfs4_chkseq_t rc;
7528 
7529         rfs4_dbe_lock(op->ro_dbe);
7530         rc = rfs4_check_seqid(op->ro_open_seqid, &op->ro_reply, seqid, resop,
7531             TRUE);
7532         rfs4_dbe_unlock(op->ro_dbe);
7533 
7534         if (rc == NFS4_CHKSEQ_OKAY)
7535                 rfs4_update_lease(op->ro_client);
7536 
7537         return (rc);
7538 }
7539 
7540 static rfs4_chkseq_t
7541 rfs4_check_olo_seqid(seqid4 olo_seqid, rfs4_openowner_t *op, nfs_resop4 *resop)
7542 {
7543         rfs4_chkseq_t rc;
7544 
7545         rfs4_dbe_lock(op->ro_dbe);
7546         rc = rfs4_check_seqid(op->ro_open_seqid, &op->ro_reply,
7547             olo_seqid, resop, FALSE);
7548         rfs4_dbe_unlock(op->ro_dbe);
7549 
7550         return (rc);
7551 }
7552 
7553 static rfs4_chkseq_t
7554 rfs4_check_lock_seqid(seqid4 seqid, rfs4_lo_state_t *lsp, nfs_resop4 *resop)
7555 {
7556         rfs4_chkseq_t rc = NFS4_CHKSEQ_OKAY;
7557 
7558         rfs4_dbe_lock(lsp->rls_dbe);
7559         if (!lsp->rls_skip_seqid_check)
7560                 rc = rfs4_check_seqid(lsp->rls_seqid, &lsp->rls_reply, seqid,
7561                     resop, TRUE);
7562         rfs4_dbe_unlock(lsp->rls_dbe);
7563 
7564         return (rc);
7565 }
7566 
7567 static void
7568 rfs4_op_open(nfs_argop4 *argop, nfs_resop4 *resop,
7569     struct svc_req *req, struct compound_state *cs)
7570 {
7571         OPEN4args *args = &argop->nfs_argop4_u.opopen;
7572         OPEN4res *resp = &resop->nfs_resop4_u.opopen;
7573         open_owner4 *owner = &args->owner;
7574         open_claim_type4 claim = args->claim;
7575         rfs4_client_t *cp;
7576         rfs4_openowner_t *oo;
7577         bool_t create;
7578         bool_t replay = FALSE;
7579         int can_reclaim;
7580 
7581         DTRACE_NFSV4_2(op__open__start, struct compound_state *, cs,
7582             OPEN4args *, args);
7583 
7584         if (cs->vp == NULL) {
7585                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
7586                 goto end;
7587         }
7588 
7589         /*
7590          * Need to check clientid and lease expiration first based on
7591          * error ordering and incrementing sequence id.
7592          */
7593         cp = rfs4_findclient_by_id(owner->clientid, FALSE);
7594         if (cp == NULL) {
7595                 *cs->statusp = resp->status =
7596                     rfs4_check_clientid(&owner->clientid, 0);
7597                 goto end;
7598         }
7599 
7600         if (rfs4_lease_expired(cp)) {
7601                 rfs4_client_close(cp);
7602                 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
7603                 goto end;
7604         }
7605         can_reclaim = cp->rc_can_reclaim;
7606 
7607         /*
7608          * Find the open_owner for use from this point forward.  Take
7609          * care in updating the sequence id based on the type of error
7610          * being returned.
7611          */
7612 retry:
7613         create = TRUE;
7614         oo = rfs4_findopenowner(owner, &create, args->seqid);
7615         if (oo == NULL) {
7616                 *cs->statusp = resp->status = NFS4ERR_RESOURCE;
7617                 rfs4_client_rele(cp);
7618                 goto end;
7619         }
7620 
7621         /* Hold off access to the sequence space while the open is done */
7622         rfs4_sw_enter(&oo->ro_sw);
7623 
7624         /*
7625          * If the open_owner existed before at the server, then check
7626          * the sequence id.
7627          */
7628         if (!create && !oo->ro_postpone_confirm) {
7629                 switch (rfs4_check_open_seqid(args->seqid, oo, resop)) {
7630                 case NFS4_CHKSEQ_BAD:
7631                         if ((args->seqid > oo->ro_open_seqid) &&
7632                             oo->ro_need_confirm) {
7633                                 rfs4_free_opens(oo, TRUE, FALSE);
7634                                 rfs4_sw_exit(&oo->ro_sw);
7635                                 rfs4_openowner_rele(oo);
7636                                 goto retry;
7637                         }
7638                         resp->status = NFS4ERR_BAD_SEQID;
7639                         goto out;
7640                 case NFS4_CHKSEQ_REPLAY: /* replay of previous request */
7641                         replay = TRUE;
7642                         goto out;
7643                 default:
7644                         break;
7645                 }
7646 
7647                 /*
7648                  * Sequence was ok and open owner exists
7649                  * check to see if we have yet to see an
7650                  * open_confirm.
7651                  */
7652                 if (oo->ro_need_confirm) {
7653                         rfs4_free_opens(oo, TRUE, FALSE);
7654                         rfs4_sw_exit(&oo->ro_sw);
7655                         rfs4_openowner_rele(oo);
7656                         goto retry;
7657                 }
7658         }
7659         /* Grace only applies to regular-type OPENs */
7660         if (rfs4_clnt_in_grace(cp) &&
7661             (claim == CLAIM_NULL || claim == CLAIM_DELEGATE_CUR)) {
7662                 *cs->statusp = resp->status = NFS4ERR_GRACE;
7663                 goto out;
7664         }
7665 
7666         /*
7667          * If previous state at the server existed then can_reclaim
7668          * will be set. If not reply NFS4ERR_NO_GRACE to the
7669          * client.
7670          */
7671         if (rfs4_clnt_in_grace(cp) && claim == CLAIM_PREVIOUS && !can_reclaim) {
7672                 *cs->statusp = resp->status = NFS4ERR_NO_GRACE;
7673                 goto out;
7674         }
7675 
7676 
7677         /*
7678          * Reject the open if the client has missed the grace period
7679          */
7680         if (!rfs4_clnt_in_grace(cp) && claim == CLAIM_PREVIOUS) {
7681                 *cs->statusp = resp->status = NFS4ERR_NO_GRACE;
7682                 goto out;
7683         }
7684 
7685         /* Couple of up-front bookkeeping items */
7686         if (oo->ro_need_confirm) {
7687                 /*
7688                  * If this is a reclaim OPEN then we should not ask
7689                  * for a confirmation of the open_owner per the
7690                  * protocol specification.
7691                  */
7692                 if (claim == CLAIM_PREVIOUS)
7693                         oo->ro_need_confirm = FALSE;
7694                 else
7695                         resp->rflags |= OPEN4_RESULT_CONFIRM;
7696         }
7697         resp->rflags |= OPEN4_RESULT_LOCKTYPE_POSIX;
7698 
7699         /*
7700          * If there is an unshared filesystem mounted on this vnode,
7701          * do not allow to open/create in this directory.
7702          */
7703         if (vn_ismntpt(cs->vp)) {
7704                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
7705                 goto out;
7706         }
7707 
7708         /*
7709          * access must READ, WRITE, or BOTH.  No access is invalid.
7710          * deny can be READ, WRITE, BOTH, or NONE.
7711          * bits not defined for access/deny are invalid.
7712          */
7713         if (! (args->share_access & OPEN4_SHARE_ACCESS_BOTH) ||
7714             (args->share_access & ~OPEN4_SHARE_ACCESS_BOTH) ||
7715             (args->share_deny & ~OPEN4_SHARE_DENY_BOTH)) {
7716                 *cs->statusp = resp->status = NFS4ERR_INVAL;
7717                 goto out;
7718         }
7719 
7720 
7721         /*
7722          * make sure attrset is zero before response is built.
7723          */
7724         resp->attrset = 0;
7725 
7726         switch (claim) {
7727         case CLAIM_NULL:
7728                 rfs4_do_opennull(cs, req, args, oo, resp);
7729                 break;
7730         case CLAIM_PREVIOUS:
7731                 rfs4_do_openprev(cs, req, args, oo, resp);
7732                 break;
7733         case CLAIM_DELEGATE_CUR:
7734                 rfs4_do_opendelcur(cs, req, args, oo, resp);
7735                 break;
7736         case CLAIM_DELEGATE_PREV:
7737                 rfs4_do_opendelprev(cs, req, args, oo, resp);
7738                 break;
7739         default:
7740                 resp->status = NFS4ERR_INVAL;
7741                 break;
7742         }
7743 
7744 out:
7745         rfs4_client_rele(cp);
7746 
7747         /* Catch sequence id handling here to make it a little easier */
7748         switch (resp->status) {
7749         case NFS4ERR_BADXDR:
7750         case NFS4ERR_BAD_SEQID:
7751         case NFS4ERR_BAD_STATEID:
7752         case NFS4ERR_NOFILEHANDLE:
7753         case NFS4ERR_RESOURCE:
7754         case NFS4ERR_STALE_CLIENTID:
7755         case NFS4ERR_STALE_STATEID:
7756                 /*
7757                  * The protocol states that if any of these errors are
7758                  * being returned, the sequence id should not be
7759                  * incremented.  Any other return requires an
7760                  * increment.
7761                  */
7762                 break;
7763         default:
7764                 /* Always update the lease in this case */
7765                 rfs4_update_lease(oo->ro_client);
7766 
7767                 /* Regular response - copy the result */
7768                 if (!replay)
7769                         rfs4_update_open_resp(oo, resop, &cs->fh);
7770 
7771                 /*
7772                  * REPLAY case: Only if the previous response was OK
7773                  * do we copy the filehandle.  If not OK, no
7774                  * filehandle to copy.
7775                  */
7776                 if (replay == TRUE &&
7777                     resp->status == NFS4_OK &&
7778                     oo->ro_reply_fh.nfs_fh4_val) {
7779                         /*
7780                          * If this is a replay, we must restore the
7781                          * current filehandle/vp to that of what was
7782                          * returned originally.  Try our best to do
7783                          * it.
7784                          */
7785                         nfs_fh4_fmt_t *fh_fmtp =
7786                             (nfs_fh4_fmt_t *)oo->ro_reply_fh.nfs_fh4_val;
7787 
7788                         cs->exi = checkexport4(&fh_fmtp->fh4_fsid,
7789                             (fid_t *)&fh_fmtp->fh4_xlen, NULL);
7790 
7791                         if (cs->exi == NULL) {
7792                                 resp->status = NFS4ERR_STALE;
7793                                 goto finish;
7794                         }
7795 
7796                         VN_RELE(cs->vp);
7797 
7798                         cs->vp = nfs4_fhtovp(&oo->ro_reply_fh, cs->exi,
7799                             &resp->status);
7800 
7801                         if (cs->vp == NULL)
7802                                 goto finish;
7803 
7804                         nfs_fh4_copy(&oo->ro_reply_fh, &cs->fh);
7805                 }
7806 
7807                 /*
7808                  * If this was a replay, no need to update the
7809                  * sequence id. If the open_owner was not created on
7810                  * this pass, then update.  The first use of an
7811                  * open_owner will not bump the sequence id.
7812                  */
7813                 if (replay == FALSE && !create)
7814                         rfs4_update_open_sequence(oo);
7815                 /*
7816                  * If the client is receiving an error and the
7817                  * open_owner needs to be confirmed, there is no way
7818                  * to notify the client of this fact ignoring the fact
7819                  * that the server has no method of returning a
7820                  * stateid to confirm.  Therefore, the server needs to
7821                  * mark this open_owner in a way as to avoid the
7822                  * sequence id checking the next time the client uses
7823                  * this open_owner.
7824                  */
7825                 if (resp->status != NFS4_OK && oo->ro_need_confirm)
7826                         oo->ro_postpone_confirm = TRUE;
7827                 /*
7828                  * If OK response then clear the postpone flag and
7829                  * reset the sequence id to keep in sync with the
7830                  * client.
7831                  */
7832                 if (resp->status == NFS4_OK && oo->ro_postpone_confirm) {
7833                         oo->ro_postpone_confirm = FALSE;
7834                         oo->ro_open_seqid = args->seqid;
7835                 }
7836                 break;
7837         }
7838 
7839 finish:
7840         *cs->statusp = resp->status;
7841 
7842         rfs4_sw_exit(&oo->ro_sw);
7843         rfs4_openowner_rele(oo);
7844 
7845 end:
7846         DTRACE_NFSV4_2(op__open__done, struct compound_state *, cs,
7847             OPEN4res *, resp);
7848 }
7849 
7850 /*ARGSUSED*/
7851 void
7852 rfs4_op_open_confirm(nfs_argop4 *argop, nfs_resop4 *resop,
7853     struct svc_req *req, struct compound_state *cs)
7854 {
7855         OPEN_CONFIRM4args *args = &argop->nfs_argop4_u.opopen_confirm;
7856         OPEN_CONFIRM4res *resp = &resop->nfs_resop4_u.opopen_confirm;
7857         rfs4_state_t *sp;
7858         nfsstat4 status;
7859 
7860         DTRACE_NFSV4_2(op__open__confirm__start, struct compound_state *, cs,
7861             OPEN_CONFIRM4args *, args);
7862 
7863         if (cs->vp == NULL) {
7864                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
7865                 goto out;
7866         }
7867 
7868         if (cs->vp->v_type != VREG) {
7869                 *cs->statusp = resp->status =
7870                     cs->vp->v_type == VDIR ? NFS4ERR_ISDIR : NFS4ERR_INVAL;
7871                 return;
7872         }
7873 
7874         status = rfs4_get_state(&args->open_stateid, &sp, RFS4_DBS_VALID);
7875         if (status != NFS4_OK) {
7876                 *cs->statusp = resp->status = status;
7877                 goto out;
7878         }
7879 
7880         /* Ensure specified filehandle matches */
7881         if (cs->vp != sp->rs_finfo->rf_vp) {
7882                 rfs4_state_rele(sp);
7883                 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7884                 goto out;
7885         }
7886 
7887         /* hold off other access to open_owner while we tinker */
7888         rfs4_sw_enter(&sp->rs_owner->ro_sw);
7889 
7890         switch (rfs4_check_stateid_seqid(sp, &args->open_stateid)) {
7891         case NFS4_CHECK_STATEID_OKAY:
7892                 if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7893                     resop) != 0) {
7894                         *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7895                         break;
7896                 }
7897                 /*
7898                  * If it is the appropriate stateid and determined to
7899                  * be "OKAY" then this means that the stateid does not
7900                  * need to be confirmed and the client is in error for
7901                  * sending an OPEN_CONFIRM.
7902                  */
7903                 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7904                 break;
7905         case NFS4_CHECK_STATEID_OLD:
7906                 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7907                 break;
7908         case NFS4_CHECK_STATEID_BAD:
7909                 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7910                 break;
7911         case NFS4_CHECK_STATEID_EXPIRED:
7912                 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
7913                 break;
7914         case NFS4_CHECK_STATEID_CLOSED:
7915                 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7916                 break;
7917         case NFS4_CHECK_STATEID_REPLAY:
7918                 switch (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7919                     resop)) {
7920                 case NFS4_CHKSEQ_OKAY:
7921                         /*
7922                          * This is replayed stateid; if seqid matches
7923                          * next expected, then client is using wrong seqid.
7924                          */
7925                         /* fall through */
7926                 case NFS4_CHKSEQ_BAD:
7927                         *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7928                         break;
7929                 case NFS4_CHKSEQ_REPLAY:
7930                         /*
7931                          * Note this case is the duplicate case so
7932                          * resp->status is already set.
7933                          */
7934                         *cs->statusp = resp->status;
7935                         rfs4_update_lease(sp->rs_owner->ro_client);
7936                         break;
7937                 }
7938                 break;
7939         case NFS4_CHECK_STATEID_UNCONFIRMED:
7940                 if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7941                     resop) != NFS4_CHKSEQ_OKAY) {
7942                         *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7943                         break;
7944                 }
7945                 *cs->statusp = resp->status = NFS4_OK;
7946 
7947                 next_stateid(&sp->rs_stateid);
7948                 resp->open_stateid = sp->rs_stateid.stateid;
7949                 sp->rs_owner->ro_need_confirm = FALSE;
7950                 rfs4_update_lease(sp->rs_owner->ro_client);
7951                 rfs4_update_open_sequence(sp->rs_owner);
7952                 rfs4_update_open_resp(sp->rs_owner, resop, NULL);
7953                 break;
7954         default:
7955                 ASSERT(FALSE);
7956                 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
7957                 break;
7958         }
7959         rfs4_sw_exit(&sp->rs_owner->ro_sw);
7960         rfs4_state_rele(sp);
7961 
7962 out:
7963         DTRACE_NFSV4_2(op__open__confirm__done, struct compound_state *, cs,
7964             OPEN_CONFIRM4res *, resp);
7965 }
7966 
7967 /*ARGSUSED*/
7968 void
7969 rfs4_op_open_downgrade(nfs_argop4 *argop, nfs_resop4 *resop,
7970     struct svc_req *req, struct compound_state *cs)
7971 {
7972         OPEN_DOWNGRADE4args *args = &argop->nfs_argop4_u.opopen_downgrade;
7973         OPEN_DOWNGRADE4res *resp = &resop->nfs_resop4_u.opopen_downgrade;
7974         uint32_t access = args->share_access;
7975         uint32_t deny = args->share_deny;
7976         nfsstat4 status;
7977         rfs4_state_t *sp;
7978         rfs4_file_t *fp;
7979         int fflags = 0;
7980 
7981         DTRACE_NFSV4_2(op__open__downgrade__start, struct compound_state *, cs,
7982             OPEN_DOWNGRADE4args *, args);
7983 
7984         if (cs->vp == NULL) {
7985                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
7986                 goto out;
7987         }
7988 
7989         if (cs->vp->v_type != VREG) {
7990                 *cs->statusp = resp->status = NFS4ERR_INVAL;
7991                 return;
7992         }
7993 
7994         status = rfs4_get_state(&args->open_stateid, &sp, RFS4_DBS_VALID);
7995         if (status != NFS4_OK) {
7996                 *cs->statusp = resp->status = status;
7997                 goto out;
7998         }
7999 
8000         /* Ensure specified filehandle matches */
8001         if (cs->vp != sp->rs_finfo->rf_vp) {
8002                 rfs4_state_rele(sp);
8003                 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8004                 goto out;
8005         }
8006 
8007         /* hold off other access to open_owner while we tinker */
8008         rfs4_sw_enter(&sp->rs_owner->ro_sw);
8009 
8010         switch (rfs4_check_stateid_seqid(sp, &args->open_stateid)) {
8011         case NFS4_CHECK_STATEID_OKAY:
8012                 if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
8013                     resop) != NFS4_CHKSEQ_OKAY) {
8014                         *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8015                         goto end;
8016                 }
8017                 break;
8018         case NFS4_CHECK_STATEID_OLD:
8019                 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8020                 goto end;
8021         case NFS4_CHECK_STATEID_BAD:
8022                 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8023                 goto end;
8024         case NFS4_CHECK_STATEID_EXPIRED:
8025                 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
8026                 goto end;
8027         case NFS4_CHECK_STATEID_CLOSED:
8028                 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8029                 goto end;
8030         case NFS4_CHECK_STATEID_UNCONFIRMED:
8031                 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8032                 goto end;
8033         case NFS4_CHECK_STATEID_REPLAY:
8034                 /* Check the sequence id for the open owner */
8035                 switch (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
8036                     resop)) {
8037                 case NFS4_CHKSEQ_OKAY:
8038                         /*
8039                          * This is replayed stateid; if seqid matches
8040                          * next expected, then client is using wrong seqid.
8041                          */
8042                         /* fall through */
8043                 case NFS4_CHKSEQ_BAD:
8044                         *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8045                         goto end;
8046                 case NFS4_CHKSEQ_REPLAY:
8047                         /*
8048                          * Note this case is the duplicate case so
8049                          * resp->status is already set.
8050                          */
8051                         *cs->statusp = resp->status;
8052                         rfs4_update_lease(sp->rs_owner->ro_client);
8053                         goto end;
8054                 }
8055                 break;
8056         default:
8057                 ASSERT(FALSE);
8058                 break;
8059         }
8060 
8061         rfs4_dbe_lock(sp->rs_dbe);
8062         /*
8063          * Check that the new access modes and deny modes are valid.
8064          * Check that no invalid bits are set.
8065          */
8066         if ((access & ~(OPEN4_SHARE_ACCESS_READ | OPEN4_SHARE_ACCESS_WRITE)) ||
8067             (deny & ~(OPEN4_SHARE_DENY_READ | OPEN4_SHARE_DENY_WRITE))) {
8068                 *cs->statusp = resp->status = NFS4ERR_INVAL;
8069                 rfs4_update_open_sequence(sp->rs_owner);
8070                 rfs4_dbe_unlock(sp->rs_dbe);
8071                 goto end;
8072         }
8073 
8074         /*
8075          * The new modes must be a subset of the current modes and
8076          * the access must specify at least one mode. To test that
8077          * the new mode is a subset of the current modes we bitwise
8078          * AND them together and check that the result equals the new
8079          * mode. For example:
8080          * New mode, access == R and current mode, sp->rs_open_access  == RW
8081          * access & sp->rs_open_access == R == access, so the new access mode
8082          * is valid. Consider access == RW, sp->rs_open_access = R
8083          * access & sp->rs_open_access == R != access, so the new access mode
8084          * is invalid.
8085          */
8086         if ((access & sp->rs_open_access) != access ||
8087             (deny & sp->rs_open_deny) != deny ||
8088             (access &
8089             (OPEN4_SHARE_ACCESS_READ | OPEN4_SHARE_ACCESS_WRITE)) == 0) {
8090                 *cs->statusp = resp->status = NFS4ERR_INVAL;
8091                 rfs4_update_open_sequence(sp->rs_owner);
8092                 rfs4_dbe_unlock(sp->rs_dbe);
8093                 goto end;
8094         }
8095 
8096         /*
8097          * Release any share locks associated with this stateID.
8098          * Strictly speaking, this violates the spec because the
8099          * spec effectively requires that open downgrade be atomic.
8100          * At present, fs_shrlock does not have this capability.
8101          */
8102         (void) rfs4_unshare(sp);
8103 
8104         status = rfs4_share(sp, access, deny);
8105         if (status != NFS4_OK) {
8106                 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
8107                 rfs4_update_open_sequence(sp->rs_owner);
8108                 rfs4_dbe_unlock(sp->rs_dbe);
8109                 goto end;
8110         }
8111 
8112         fp = sp->rs_finfo;
8113         rfs4_dbe_lock(fp->rf_dbe);
8114 
8115         /*
8116          * If the current mode has deny read and the new mode
8117          * does not, decrement the number of deny read mode bits
8118          * and if it goes to zero turn off the deny read bit
8119          * on the file.
8120          */
8121         if ((sp->rs_open_deny & OPEN4_SHARE_DENY_READ) &&
8122             (deny & OPEN4_SHARE_DENY_READ) == 0) {
8123                 fp->rf_deny_read--;
8124                 if (fp->rf_deny_read == 0)
8125                         fp->rf_share_deny &= ~OPEN4_SHARE_DENY_READ;
8126         }
8127 
8128         /*
8129          * If the current mode has deny write and the new mode
8130          * does not, decrement the number of deny write mode bits
8131          * and if it goes to zero turn off the deny write bit
8132          * on the file.
8133          */
8134         if ((sp->rs_open_deny & OPEN4_SHARE_DENY_WRITE) &&
8135             (deny & OPEN4_SHARE_DENY_WRITE) == 0) {
8136                 fp->rf_deny_write--;
8137                 if (fp->rf_deny_write == 0)
8138                         fp->rf_share_deny &= ~OPEN4_SHARE_DENY_WRITE;
8139         }
8140 
8141         /*
8142          * If the current mode has access read and the new mode
8143          * does not, decrement the number of access read mode bits
8144          * and if it goes to zero turn off the access read bit
8145          * on the file.  set fflags to FREAD for the call to
8146          * vn_open_downgrade().
8147          */
8148         if ((sp->rs_open_access & OPEN4_SHARE_ACCESS_READ) &&
8149             (access & OPEN4_SHARE_ACCESS_READ) == 0) {
8150                 fp->rf_access_read--;
8151                 if (fp->rf_access_read == 0)
8152                         fp->rf_share_access &= ~OPEN4_SHARE_ACCESS_READ;
8153                 fflags |= FREAD;
8154         }
8155 
8156         /*
8157          * If the current mode has access write and the new mode
8158          * does not, decrement the number of access write mode bits
8159          * and if it goes to zero turn off the access write bit
8160          * on the file.  set fflags to FWRITE for the call to
8161          * vn_open_downgrade().
8162          */
8163         if ((sp->rs_open_access & OPEN4_SHARE_ACCESS_WRITE) &&
8164             (access & OPEN4_SHARE_ACCESS_WRITE) == 0) {
8165                 fp->rf_access_write--;
8166                 if (fp->rf_access_write == 0)
8167                         fp->rf_share_deny &= ~OPEN4_SHARE_ACCESS_WRITE;
8168                 fflags |= FWRITE;
8169         }
8170 
8171         /* Check that the file is still accessible */
8172         ASSERT(fp->rf_share_access);
8173 
8174         rfs4_dbe_unlock(fp->rf_dbe);
8175 
8176         /* now set the new open access and deny modes */
8177         sp->rs_open_access = access;
8178         sp->rs_open_deny = deny;
8179 
8180         /*
8181          * we successfully downgraded the share lock, now we need to downgrade
8182          * the open. it is possible that the downgrade was only for a deny
8183          * mode and we have nothing else to do.
8184          */
8185         if ((fflags & (FREAD|FWRITE)) != 0)
8186                 vn_open_downgrade(cs->vp, fflags);
8187 
8188         /* Update the stateid */
8189         next_stateid(&sp->rs_stateid);
8190         resp->open_stateid = sp->rs_stateid.stateid;
8191 
8192         rfs4_dbe_unlock(sp->rs_dbe);
8193 
8194         *cs->statusp = resp->status = NFS4_OK;
8195         /* Update the lease */
8196         rfs4_update_lease(sp->rs_owner->ro_client);
8197         /* And the sequence */
8198         rfs4_update_open_sequence(sp->rs_owner);
8199         rfs4_update_open_resp(sp->rs_owner, resop, NULL);
8200 
8201 end:
8202         rfs4_sw_exit(&sp->rs_owner->ro_sw);
8203         rfs4_state_rele(sp);
8204 out:
8205         DTRACE_NFSV4_2(op__open__downgrade__done, struct compound_state *, cs,
8206             OPEN_DOWNGRADE4res *, resp);
8207 }
8208 
8209 static void *
8210 memstr(const void *s1, const char *s2, size_t n)
8211 {
8212         size_t l = strlen(s2);
8213         char *p = (char *)s1;
8214 
8215         while (n >= l) {
8216                 if (bcmp(p, s2, l) == 0)
8217                         return (p);
8218                 p++;
8219                 n--;
8220         }
8221 
8222         return (NULL);
8223 }
8224 
8225 /*
8226  * The logic behind this function is detailed in the NFSv4 RFC in the
8227  * SETCLIENTID operation description under IMPLEMENTATION.  Refer to
8228  * that section for explicit guidance to server behavior for
8229  * SETCLIENTID.
8230  */
8231 void
8232 rfs4_op_setclientid(nfs_argop4 *argop, nfs_resop4 *resop,
8233     struct svc_req *req, struct compound_state *cs)
8234 {
8235         SETCLIENTID4args *args = &argop->nfs_argop4_u.opsetclientid;
8236         SETCLIENTID4res *res = &resop->nfs_resop4_u.opsetclientid;
8237         rfs4_client_t *cp, *newcp, *cp_confirmed, *cp_unconfirmed;
8238         rfs4_clntip_t *ci;
8239         bool_t create;
8240         char *addr, *netid;
8241         int len;
8242 
8243         DTRACE_NFSV4_2(op__setclientid__start, struct compound_state *, cs,
8244             SETCLIENTID4args *, args);
8245 retry:
8246         newcp = cp_confirmed = cp_unconfirmed = NULL;
8247 
8248         /*
8249          * Save the caller's IP address
8250          */
8251         args->client.cl_addr =
8252             (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
8253 
8254         /*
8255          * Record if it is a Solaris client that cannot handle referrals.
8256          */
8257         if (memstr(args->client.id_val, "Solaris", args->client.id_len) &&
8258             !memstr(args->client.id_val, "+referrals", args->client.id_len)) {
8259                 /* Add a "yes, it's downrev" record */
8260                 create = TRUE;
8261                 ci = rfs4_find_clntip(args->client.cl_addr, &create);
8262                 ASSERT(ci != NULL);
8263                 rfs4_dbe_rele(ci->ri_dbe);
8264         } else {
8265                 /* Remove any previous record */
8266                 rfs4_invalidate_clntip(args->client.cl_addr);
8267         }
8268 
8269         /*
8270          * In search of an EXISTING client matching the incoming
8271          * request to establish a new client identifier at the server
8272          */
8273         create = TRUE;
8274         cp = rfs4_findclient(&args->client, &create, NULL);
8275 
8276         /* Should never happen */
8277         ASSERT(cp != NULL);
8278 
8279         if (cp == NULL) {
8280                 *cs->statusp = res->status = NFS4ERR_SERVERFAULT;
8281                 goto out;
8282         }
8283 
8284         /*
8285          * Easiest case. Client identifier is newly created and is
8286          * unconfirmed.  Also note that for this case, no other
8287          * entries exist for the client identifier.  Nothing else to
8288          * check.  Just setup the response and respond.
8289          */
8290         if (create) {
8291                 *cs->statusp = res->status = NFS4_OK;
8292                 res->SETCLIENTID4res_u.resok4.clientid = cp->rc_clientid;
8293                 res->SETCLIENTID4res_u.resok4.setclientid_confirm =
8294                     cp->rc_confirm_verf;
8295                 /* Setup callback information; CB_NULL confirmation later */
8296                 rfs4_client_setcb(cp, &args->callback, args->callback_ident);
8297 
8298                 rfs4_client_rele(cp);
8299                 goto out;
8300         }
8301 
8302         /*
8303          * An existing, confirmed client may exist but it may not have
8304          * been active for at least one lease period.  If so, then
8305          * "close" the client and create a new client identifier
8306          */
8307         if (rfs4_lease_expired(cp)) {
8308                 rfs4_client_close(cp);
8309                 goto retry;
8310         }
8311 
8312         if (cp->rc_need_confirm == TRUE)
8313                 cp_unconfirmed = cp;
8314         else
8315                 cp_confirmed = cp;
8316 
8317         cp = NULL;
8318 
8319         /*
8320          * We have a confirmed client, now check for an
8321          * unconfimred entry
8322          */
8323         if (cp_confirmed) {
8324                 /* If creds don't match then client identifier is inuse */
8325                 if (!creds_ok(cp_confirmed->rc_cr_set, req, cs)) {
8326                         rfs4_cbinfo_t *cbp;
8327                         /*
8328                          * Some one else has established this client
8329                          * id. Try and say * who they are. We will use
8330                          * the call back address supplied by * the
8331                          * first client.
8332                          */
8333                         *cs->statusp = res->status = NFS4ERR_CLID_INUSE;
8334 
8335                         addr = netid = NULL;
8336 
8337                         cbp = &cp_confirmed->rc_cbinfo;
8338                         if (cbp->cb_callback.cb_location.r_addr &&
8339                             cbp->cb_callback.cb_location.r_netid) {
8340                                 cb_client4 *cbcp = &cbp->cb_callback;
8341 
8342                                 len = strlen(cbcp->cb_location.r_addr)+1;
8343                                 addr = kmem_alloc(len, KM_SLEEP);
8344                                 bcopy(cbcp->cb_location.r_addr, addr, len);
8345                                 len = strlen(cbcp->cb_location.r_netid)+1;
8346                                 netid = kmem_alloc(len, KM_SLEEP);
8347                                 bcopy(cbcp->cb_location.r_netid, netid, len);
8348                         }
8349 
8350                         res->SETCLIENTID4res_u.client_using.r_addr = addr;
8351                         res->SETCLIENTID4res_u.client_using.r_netid = netid;
8352 
8353                         rfs4_client_rele(cp_confirmed);
8354                 }
8355 
8356                 /*
8357                  * Confirmed, creds match, and verifier matches; must
8358                  * be an update of the callback info
8359                  */
8360                 if (cp_confirmed->rc_nfs_client.verifier ==
8361                     args->client.verifier) {
8362                         /* Setup callback information */
8363                         rfs4_client_setcb(cp_confirmed, &args->callback,
8364                             args->callback_ident);
8365 
8366                         /* everything okay -- move ahead */
8367                         *cs->statusp = res->status = NFS4_OK;
8368                         res->SETCLIENTID4res_u.resok4.clientid =
8369                             cp_confirmed->rc_clientid;
8370 
8371                         /* update the confirm_verifier and return it */
8372                         rfs4_client_scv_next(cp_confirmed);
8373                         res->SETCLIENTID4res_u.resok4.setclientid_confirm =
8374                             cp_confirmed->rc_confirm_verf;
8375 
8376                         rfs4_client_rele(cp_confirmed);
8377                         goto out;
8378                 }
8379 
8380                 /*
8381                  * Creds match but the verifier doesn't.  Must search
8382                  * for an unconfirmed client that would be replaced by
8383                  * this request.
8384                  */
8385                 create = FALSE;
8386                 cp_unconfirmed = rfs4_findclient(&args->client, &create,
8387                     cp_confirmed);
8388         }
8389 
8390         /*
8391          * At this point, we have taken care of the brand new client
8392          * struct, INUSE case, update of an existing, and confirmed
8393          * client struct.
8394          */
8395 
8396         /*
8397          * check to see if things have changed while we originally
8398          * picked up the client struct.  If they have, then return and
8399          * retry the processing of this SETCLIENTID request.
8400          */
8401         if (cp_unconfirmed) {
8402                 rfs4_dbe_lock(cp_unconfirmed->rc_dbe);
8403                 if (!cp_unconfirmed->rc_need_confirm) {
8404                         rfs4_dbe_unlock(cp_unconfirmed->rc_dbe);
8405                         rfs4_client_rele(cp_unconfirmed);
8406                         if (cp_confirmed)
8407                                 rfs4_client_rele(cp_confirmed);
8408                         goto retry;
8409                 }
8410                 /* do away with the old unconfirmed one */
8411                 rfs4_dbe_invalidate(cp_unconfirmed->rc_dbe);
8412                 rfs4_dbe_unlock(cp_unconfirmed->rc_dbe);
8413                 rfs4_client_rele(cp_unconfirmed);
8414                 cp_unconfirmed = NULL;
8415         }
8416 
8417         /*
8418          * This search will temporarily hide the confirmed client
8419          * struct while a new client struct is created as the
8420          * unconfirmed one.
8421          */
8422         create = TRUE;
8423         newcp = rfs4_findclient(&args->client, &create, cp_confirmed);
8424 
8425         ASSERT(newcp != NULL);
8426 
8427         if (newcp == NULL) {
8428                 *cs->statusp = res->status = NFS4ERR_SERVERFAULT;
8429                 rfs4_client_rele(cp_confirmed);
8430                 goto out;
8431         }
8432 
8433         /*
8434          * If one was not created, then a similar request must be in
8435          * process so release and start over with this one
8436          */
8437         if (create != TRUE) {
8438                 rfs4_client_rele(newcp);
8439                 if (cp_confirmed)
8440                         rfs4_client_rele(cp_confirmed);
8441                 goto retry;
8442         }
8443 
8444         *cs->statusp = res->status = NFS4_OK;
8445         res->SETCLIENTID4res_u.resok4.clientid = newcp->rc_clientid;
8446         res->SETCLIENTID4res_u.resok4.setclientid_confirm =
8447             newcp->rc_confirm_verf;
8448         /* Setup callback information; CB_NULL confirmation later */
8449         rfs4_client_setcb(newcp, &args->callback, args->callback_ident);
8450 
8451         newcp->rc_cp_confirmed = cp_confirmed;
8452 
8453         rfs4_client_rele(newcp);
8454 
8455 out:
8456         DTRACE_NFSV4_2(op__setclientid__done, struct compound_state *, cs,
8457             SETCLIENTID4res *, res);
8458 }
8459 
8460 /*ARGSUSED*/
8461 void
8462 rfs4_op_setclientid_confirm(nfs_argop4 *argop, nfs_resop4 *resop,
8463     struct svc_req *req, struct compound_state *cs)
8464 {
8465         SETCLIENTID_CONFIRM4args *args =
8466             &argop->nfs_argop4_u.opsetclientid_confirm;
8467         SETCLIENTID_CONFIRM4res *res =
8468             &resop->nfs_resop4_u.opsetclientid_confirm;
8469         rfs4_client_t *cp, *cptoclose = NULL;
8470         nfs4_srv_t *nsrv4;
8471 
8472         DTRACE_NFSV4_2(op__setclientid__confirm__start,
8473             struct compound_state *, cs,
8474             SETCLIENTID_CONFIRM4args *, args);
8475 
8476         nsrv4 = zone_getspecific(rfs4_zone_key, curzone);
8477         *cs->statusp = res->status = NFS4_OK;
8478 
8479         cp = rfs4_findclient_by_id(args->clientid, TRUE);
8480 
8481         if (cp == NULL) {
8482                 *cs->statusp = res->status =
8483                     rfs4_check_clientid(&args->clientid, 1);
8484                 goto out;
8485         }
8486 
8487         if (!creds_ok(cp, req, cs)) {
8488                 *cs->statusp = res->status = NFS4ERR_CLID_INUSE;
8489                 rfs4_client_rele(cp);
8490                 goto out;
8491         }
8492 
8493         /* If the verifier doesn't match, the record doesn't match */
8494         if (cp->rc_confirm_verf != args->setclientid_confirm) {
8495                 *cs->statusp = res->status = NFS4ERR_STALE_CLIENTID;
8496                 rfs4_client_rele(cp);
8497                 goto out;
8498         }
8499 
8500         rfs4_dbe_lock(cp->rc_dbe);
8501         cp->rc_need_confirm = FALSE;
8502         if (cp->rc_cp_confirmed) {
8503                 cptoclose = cp->rc_cp_confirmed;
8504                 cptoclose->rc_ss_remove = 1;
8505                 cp->rc_cp_confirmed = NULL;
8506         }
8507 
8508         /*
8509          * Update the client's associated server instance, if it's changed
8510          * since the client was created.
8511          */
8512         if (rfs4_servinst(cp) != nsrv4->nfs4_cur_servinst)
8513                 rfs4_servinst_assign(nsrv4, cp, nsrv4->nfs4_cur_servinst);
8514 
8515         /*
8516          * Record clientid in stable storage.
8517          * Must be done after server instance has been assigned.
8518          */
8519         rfs4_ss_clid(nsrv4, cp);
8520 
8521         rfs4_dbe_unlock(cp->rc_dbe);
8522 
8523         if (cptoclose)
8524                 /* don't need to rele, client_close does it */
8525                 rfs4_client_close(cptoclose);
8526 
8527         /* If needed, initiate CB_NULL call for callback path */
8528         rfs4_deleg_cb_check(cp);
8529         rfs4_update_lease(cp);
8530 
8531         /*
8532          * Check to see if client can perform reclaims
8533          */
8534         rfs4_ss_chkclid(nsrv4, cp);
8535 
8536         rfs4_client_rele(cp);
8537 
8538 out:
8539         DTRACE_NFSV4_2(op__setclientid__confirm__done,
8540             struct compound_state *, cs,
8541             SETCLIENTID_CONFIRM4 *, res);
8542 }
8543 
8544 
8545 /*ARGSUSED*/
8546 void
8547 rfs4_op_close(nfs_argop4 *argop, nfs_resop4 *resop,
8548     struct svc_req *req, struct compound_state *cs)
8549 {
8550         CLOSE4args *args = &argop->nfs_argop4_u.opclose;
8551         CLOSE4res *resp = &resop->nfs_resop4_u.opclose;
8552         rfs4_state_t *sp;
8553         nfsstat4 status;
8554 
8555         DTRACE_NFSV4_2(op__close__start, struct compound_state *, cs,
8556             CLOSE4args *, args);
8557 
8558         if (cs->vp == NULL) {
8559                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
8560                 goto out;
8561         }
8562 
8563         status = rfs4_get_state(&args->open_stateid, &sp, RFS4_DBS_INVALID);
8564         if (status != NFS4_OK) {
8565                 *cs->statusp = resp->status = status;
8566                 goto out;
8567         }
8568 
8569         /* Ensure specified filehandle matches */
8570         if (cs->vp != sp->rs_finfo->rf_vp) {
8571                 rfs4_state_rele(sp);
8572                 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8573                 goto out;
8574         }
8575 
8576         /* hold off other access to open_owner while we tinker */
8577         rfs4_sw_enter(&sp->rs_owner->ro_sw);
8578 
8579         switch (rfs4_check_stateid_seqid(sp, &args->open_stateid)) {
8580         case NFS4_CHECK_STATEID_OKAY:
8581                 if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
8582                     resop) != NFS4_CHKSEQ_OKAY) {
8583                         *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8584                         goto end;
8585                 }
8586                 break;
8587         case NFS4_CHECK_STATEID_OLD:
8588                 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8589                 goto end;
8590         case NFS4_CHECK_STATEID_BAD:
8591                 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8592                 goto end;
8593         case NFS4_CHECK_STATEID_EXPIRED:
8594                 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
8595                 goto end;
8596         case NFS4_CHECK_STATEID_CLOSED:
8597                 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8598                 goto end;
8599         case NFS4_CHECK_STATEID_UNCONFIRMED:
8600                 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8601                 goto end;
8602         case NFS4_CHECK_STATEID_REPLAY:
8603                 /* Check the sequence id for the open owner */
8604                 switch (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
8605                     resop)) {
8606                 case NFS4_CHKSEQ_OKAY:
8607                         /*
8608                          * This is replayed stateid; if seqid matches
8609                          * next expected, then client is using wrong seqid.
8610                          */
8611                         /* FALL THROUGH */
8612                 case NFS4_CHKSEQ_BAD:
8613                         *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8614                         goto end;
8615                 case NFS4_CHKSEQ_REPLAY:
8616                         /*
8617                          * Note this case is the duplicate case so
8618                          * resp->status is already set.
8619                          */
8620                         *cs->statusp = resp->status;
8621                         rfs4_update_lease(sp->rs_owner->ro_client);
8622                         goto end;
8623                 }
8624                 break;
8625         default:
8626                 ASSERT(FALSE);
8627                 break;
8628         }
8629 
8630         rfs4_dbe_lock(sp->rs_dbe);
8631 
8632         /* Update the stateid. */
8633         next_stateid(&sp->rs_stateid);
8634         resp->open_stateid = sp->rs_stateid.stateid;
8635 
8636         rfs4_dbe_unlock(sp->rs_dbe);
8637 
8638         rfs4_update_lease(sp->rs_owner->ro_client);
8639         rfs4_update_open_sequence(sp->rs_owner);
8640         rfs4_update_open_resp(sp->rs_owner, resop, NULL);
8641 
8642         rfs4_state_close(sp, FALSE, FALSE, cs->cr);
8643 
8644         *cs->statusp = resp->status = status;
8645 
8646 end:
8647         rfs4_sw_exit(&sp->rs_owner->ro_sw);
8648         rfs4_state_rele(sp);
8649 out:
8650         DTRACE_NFSV4_2(op__close__done, struct compound_state *, cs,
8651             CLOSE4res *, resp);
8652 }
8653 
8654 /*
8655  * Manage the counts on the file struct and close all file locks
8656  */
8657 /*ARGSUSED*/
8658 void
8659 rfs4_release_share_lock_state(rfs4_state_t *sp, cred_t *cr,
8660     bool_t close_of_client)
8661 {
8662         rfs4_file_t *fp = sp->rs_finfo;
8663         rfs4_lo_state_t *lsp;
8664         int fflags = 0;
8665 
8666         /*
8667          * If this call is part of the larger closing down of client
8668          * state then it is just easier to release all locks
8669          * associated with this client instead of going through each
8670          * individual file and cleaning locks there.
8671          */
8672         if (close_of_client) {
8673                 if (sp->rs_owner->ro_client->rc_unlksys_completed == FALSE &&
8674                     !list_is_empty(&sp->rs_lostatelist) &&
8675                     sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID) {
8676                         /* Is the PxFS kernel module loaded? */
8677                         if (lm_remove_file_locks != NULL) {
8678                                 int new_sysid;
8679 
8680                                 /* Encode the cluster nodeid in new sysid */
8681                                 new_sysid = sp->rs_owner->ro_client->rc_sysidt;
8682                                 lm_set_nlmid_flk(&new_sysid);
8683 
8684                                 /*
8685                                  * This PxFS routine removes file locks for a
8686                                  * client over all nodes of a cluster.
8687                                  */
8688                                 NFS4_DEBUG(rfs4_debug, (CE_NOTE,
8689                                     "lm_remove_file_locks(sysid=0x%x)\n",
8690                                     new_sysid));
8691                                 (*lm_remove_file_locks)(new_sysid);
8692                         } else {
8693                                 struct flock64 flk;
8694 
8695                                 /* Release all locks for this client */
8696                                 flk.l_type = F_UNLKSYS;
8697                                 flk.l_whence = 0;
8698                                 flk.l_start = 0;
8699                                 flk.l_len = 0;
8700                                 flk.l_sysid =
8701                                     sp->rs_owner->ro_client->rc_sysidt;
8702                                 flk.l_pid = 0;
8703                                 (void) VOP_FRLOCK(sp->rs_finfo->rf_vp, F_SETLK,
8704                                     &flk, F_REMOTELOCK | FREAD | FWRITE,
8705                                     (u_offset_t)0, NULL, CRED(), NULL);
8706                         }
8707 
8708                         sp->rs_owner->ro_client->rc_unlksys_completed = TRUE;
8709                 }
8710         }
8711 
8712         /*
8713          * Release all locks on this file by this lock owner or at
8714          * least mark the locks as having been released
8715          */
8716         for (lsp = list_head(&sp->rs_lostatelist); lsp != NULL;
8717             lsp = list_next(&sp->rs_lostatelist, lsp)) {
8718                 lsp->rls_locks_cleaned = TRUE;
8719 
8720                 /* Was this already taken care of above? */
8721                 if (!close_of_client &&
8722                     sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID)
8723                         (void) cleanlocks(sp->rs_finfo->rf_vp,
8724                             lsp->rls_locker->rl_pid,
8725                             lsp->rls_locker->rl_client->rc_sysidt);
8726         }
8727 
8728         /*
8729          * Release any shrlocks associated with this open state ID.
8730          * This must be done before the rfs4_state gets marked closed.
8731          */
8732         if (sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID)
8733                 (void) rfs4_unshare(sp);
8734 
8735         if (sp->rs_open_access) {
8736                 rfs4_dbe_lock(fp->rf_dbe);
8737 
8738                 /*
8739                  * Decrement the count for each access and deny bit that this
8740                  * state has contributed to the file.
8741                  * If the file counts go to zero
8742                  * clear the appropriate bit in the appropriate mask.
8743                  */
8744                 if (sp->rs_open_access & OPEN4_SHARE_ACCESS_READ) {
8745                         fp->rf_access_read--;
8746                         fflags |= FREAD;
8747                         if (fp->rf_access_read == 0)
8748                                 fp->rf_share_access &= ~OPEN4_SHARE_ACCESS_READ;
8749                 }
8750                 if (sp->rs_open_access & OPEN4_SHARE_ACCESS_WRITE) {
8751                         fp->rf_access_write--;
8752                         fflags |= FWRITE;
8753                         if (fp->rf_access_write == 0)
8754                                 fp->rf_share_access &=
8755                                     ~OPEN4_SHARE_ACCESS_WRITE;
8756                 }
8757                 if (sp->rs_open_deny & OPEN4_SHARE_DENY_READ) {
8758                         fp->rf_deny_read--;
8759                         if (fp->rf_deny_read == 0)
8760                                 fp->rf_share_deny &= ~OPEN4_SHARE_DENY_READ;
8761                 }
8762                 if (sp->rs_open_deny & OPEN4_SHARE_DENY_WRITE) {
8763                         fp->rf_deny_write--;
8764                         if (fp->rf_deny_write == 0)
8765                                 fp->rf_share_deny &= ~OPEN4_SHARE_DENY_WRITE;
8766                 }
8767 
8768                 (void) VOP_CLOSE(fp->rf_vp, fflags, 1, (offset_t)0, cr, NULL);
8769 
8770                 rfs4_dbe_unlock(fp->rf_dbe);
8771 
8772                 sp->rs_open_access = 0;
8773                 sp->rs_open_deny = 0;
8774         }
8775 }
8776 
8777 /*
8778  * lock_denied: Fill in a LOCK4deneid structure given an flock64 structure.
8779  */
8780 static nfsstat4
8781 lock_denied(LOCK4denied *dp, struct flock64 *flk)
8782 {
8783         rfs4_lockowner_t *lo;
8784         rfs4_client_t *cp;
8785         uint32_t len;
8786 
8787         lo = rfs4_findlockowner_by_pid(flk->l_pid);
8788         if (lo != NULL) {
8789                 cp = lo->rl_client;
8790                 if (rfs4_lease_expired(cp)) {
8791                         rfs4_lockowner_rele(lo);
8792                         rfs4_dbe_hold(cp->rc_dbe);
8793                         rfs4_client_close(cp);
8794                         return (NFS4ERR_EXPIRED);
8795                 }
8796                 dp->owner.clientid = lo->rl_owner.clientid;
8797                 len = lo->rl_owner.owner_len;
8798                 dp->owner.owner_val = kmem_alloc(len, KM_SLEEP);
8799                 bcopy(lo->rl_owner.owner_val, dp->owner.owner_val, len);
8800                 dp->owner.owner_len = len;
8801                 rfs4_lockowner_rele(lo);
8802                 goto finish;
8803         }
8804 
8805         /*
8806          * Its not a NFS4 lock. We take advantage that the upper 32 bits
8807          * of the client id contain the boot time for a NFS4 lock. So we
8808          * fabricate and identity by setting clientid to the sysid, and
8809          * the lock owner to the pid.
8810          */
8811         dp->owner.clientid = flk->l_sysid;
8812         len = sizeof (pid_t);
8813         dp->owner.owner_len = len;
8814         dp->owner.owner_val = kmem_alloc(len, KM_SLEEP);
8815         bcopy(&flk->l_pid, dp->owner.owner_val, len);
8816 finish:
8817         dp->offset = flk->l_start;
8818         dp->length = flk->l_len;
8819 
8820         if (flk->l_type == F_RDLCK)
8821                 dp->locktype = READ_LT;
8822         else if (flk->l_type == F_WRLCK)
8823                 dp->locktype = WRITE_LT;
8824         else
8825                 return (NFS4ERR_INVAL); /* no mapping from POSIX ltype to v4 */
8826 
8827         return (NFS4_OK);
8828 }
8829 
8830 /*
8831  * The NFSv4.0 LOCK operation does not support the blocking lock (at the
8832  * NFSv4.0 protocol level) so the client needs to resend the LOCK request in a
8833  * case the lock is denied by the NFSv4.0 server.  NFSv4.0 clients are prepared
8834  * for that (obviously); they are sending the LOCK requests with some delays
8835  * between the attempts.  See nfs4frlock() and nfs4_block_and_wait() for the
8836  * locking and delay implementation at the client side.
8837  *
8838  * To make the life of the clients easier, the NFSv4.0 server tries to do some
8839  * fast retries on its own (the for loop below) in a hope the lock will be
8840  * available soon.  And if not, the client won't need to resend the LOCK
8841  * requests so fast to check the lock availability.  This basically saves some
8842  * network traffic and tries to make sure the client gets the lock ASAP.
8843  */
8844 static int
8845 setlock(vnode_t *vp, struct flock64 *flock, int flag, cred_t *cred)
8846 {
8847         int error;
8848         struct flock64 flk;
8849         int i;
8850         clock_t delaytime;
8851         int cmd;
8852         int spin_cnt = 0;
8853 
8854         cmd = nbl_need_check(vp) ? F_SETLK_NBMAND : F_SETLK;
8855 retry:
8856         delaytime = MSEC_TO_TICK_ROUNDUP(rfs4_lock_delay);
8857 
8858         for (i = 0; i < rfs4_maxlock_tries; i++) {
8859                 LOCK_PRINT(rfs4_debug, "setlock", cmd, flock);
8860                 error = VOP_FRLOCK(vp, cmd,
8861                     flock, flag, (u_offset_t)0, NULL, cred, NULL);
8862 
8863                 if (error != EAGAIN && error != EACCES)
8864                         break;
8865 
8866                 if (i < rfs4_maxlock_tries - 1) {
8867                         delay(delaytime);
8868                         delaytime *= 2;
8869                 }
8870         }
8871 
8872         if (error == EAGAIN || error == EACCES) {
8873                 /* Get the owner of the lock */
8874                 flk = *flock;
8875                 LOCK_PRINT(rfs4_debug, "setlock", F_GETLK, &flk);
8876                 if (VOP_FRLOCK(vp, F_GETLK, &flk, flag, 0, NULL, cred,
8877                     NULL) == 0) {
8878                         /*
8879                          * There's a race inherent in the current VOP_FRLOCK
8880                          * design where:
8881                          * a: "other guy" takes a lock that conflicts with a
8882                          * lock we want
8883                          * b: we attempt to take our lock (non-blocking) and
8884                          * the attempt fails.
8885                          * c: "other guy" releases the conflicting lock
8886                          * d: we ask what lock conflicts with the lock we want,
8887                          * getting F_UNLCK (no lock blocks us)
8888                          *
8889                          * If we retry the non-blocking lock attempt in this
8890                          * case (restart at step 'b') there's some possibility
8891                          * that many such attempts might fail.  However a test
8892                          * designed to actually provoke this race shows that
8893                          * the vast majority of cases require no retry, and
8894                          * only a few took as many as three retries.  Here's
8895                          * the test outcome:
8896                          *
8897                          *         number of retries    how many times we needed
8898                          *                              that many retries
8899                          *         0                    79461
8900                          *         1                      862
8901                          *         2                       49
8902                          *         3                        5
8903                          *
8904                          * Given those empirical results, we arbitrarily limit
8905                          * the retry count to ten.
8906                          *
8907                          * If we actually make to ten retries and give up,
8908                          * nothing catastrophic happens, but we're unable to
8909                          * return the information about the conflicting lock to
8910                          * the NFS client.  That's an acceptable trade off vs.
8911                          * letting this retry loop run forever.
8912                          */
8913                         if (flk.l_type == F_UNLCK) {
8914                                 if (spin_cnt++ < 10) {
8915                                         /* No longer locked, retry */
8916                                         goto retry;
8917                                 }
8918                         } else {
8919                                 *flock = flk;
8920                                 LOCK_PRINT(rfs4_debug, "setlock(blocking lock)",
8921                                     F_GETLK, &flk);
8922                         }
8923                 }
8924         }
8925 
8926         return (error);
8927 }
8928 
8929 /*ARGSUSED*/
8930 static nfsstat4
8931 rfs4_do_lock(rfs4_lo_state_t *lsp, nfs_lock_type4 locktype,
8932     offset4 offset, length4 length, cred_t *cred, nfs_resop4 *resop)
8933 {
8934         nfsstat4 status;
8935         rfs4_lockowner_t *lo = lsp->rls_locker;
8936         rfs4_state_t *sp = lsp->rls_state;
8937         struct flock64 flock;
8938         int16_t ltype;
8939         int flag;
8940         int error;
8941         sysid_t sysid;
8942         LOCK4res *lres;
8943         vnode_t *vp;
8944 
8945         if (rfs4_lease_expired(lo->rl_client)) {
8946                 return (NFS4ERR_EXPIRED);
8947         }
8948 
8949         if ((status = rfs4_client_sysid(lo->rl_client, &sysid)) != NFS4_OK)
8950                 return (status);
8951 
8952         /* Check for zero length. To lock to end of file use all ones for V4 */
8953         if (length == 0)
8954                 return (NFS4ERR_INVAL);
8955         else if (length == (length4)(~0))
8956                 length = 0;             /* Posix to end of file  */
8957 
8958 retry:
8959         rfs4_dbe_lock(sp->rs_dbe);
8960         if (sp->rs_closed == TRUE) {
8961                 rfs4_dbe_unlock(sp->rs_dbe);
8962                 return (NFS4ERR_OLD_STATEID);
8963         }
8964 
8965         if (resop->resop != OP_LOCKU) {
8966                 switch (locktype) {
8967                 case READ_LT:
8968                 case READW_LT:
8969                         if ((sp->rs_share_access
8970                             & OPEN4_SHARE_ACCESS_READ) == 0) {
8971                                 rfs4_dbe_unlock(sp->rs_dbe);
8972 
8973                                 return (NFS4ERR_OPENMODE);
8974                         }
8975                         ltype = F_RDLCK;
8976                         break;
8977                 case WRITE_LT:
8978                 case WRITEW_LT:
8979                         if ((sp->rs_share_access
8980                             & OPEN4_SHARE_ACCESS_WRITE) == 0) {
8981                                 rfs4_dbe_unlock(sp->rs_dbe);
8982 
8983                                 return (NFS4ERR_OPENMODE);
8984                         }
8985                         ltype = F_WRLCK;
8986                         break;
8987                 }
8988         } else
8989                 ltype = F_UNLCK;
8990 
8991         flock.l_type = ltype;
8992         flock.l_whence = 0;             /* SEEK_SET */
8993         flock.l_start = offset;
8994         flock.l_len = length;
8995         flock.l_sysid = sysid;
8996         flock.l_pid = lsp->rls_locker->rl_pid;
8997 
8998         /* Note that length4 is uint64_t but l_len and l_start are off64_t */
8999         if (flock.l_len < 0 || flock.l_start < 0) {
9000                 rfs4_dbe_unlock(sp->rs_dbe);
9001                 return (NFS4ERR_INVAL);
9002         }
9003 
9004         /*
9005          * N.B. FREAD has the same value as OPEN4_SHARE_ACCESS_READ and
9006          * FWRITE has the same value as OPEN4_SHARE_ACCESS_WRITE.
9007          */
9008         flag = (int)sp->rs_share_access | F_REMOTELOCK;
9009 
9010         vp = sp->rs_finfo->rf_vp;
9011         VN_HOLD(vp);
9012 
9013         /*
9014          * We need to unlock sp before we call the underlying filesystem to
9015          * acquire the file lock.
9016          */
9017         rfs4_dbe_unlock(sp->rs_dbe);
9018 
9019         error = setlock(vp, &flock, flag, cred);
9020 
9021         /*
9022          * Make sure the file is still open.  In a case the file was closed in
9023          * the meantime, clean the lock we acquired using the setlock() call
9024          * above, and return the appropriate error.
9025          */
9026         rfs4_dbe_lock(sp->rs_dbe);
9027         if (sp->rs_closed == TRUE) {
9028                 cleanlocks(vp, lsp->rls_locker->rl_pid, sysid);
9029                 rfs4_dbe_unlock(sp->rs_dbe);
9030 
9031                 VN_RELE(vp);
9032 
9033                 return (NFS4ERR_OLD_STATEID);
9034         }
9035         rfs4_dbe_unlock(sp->rs_dbe);
9036 
9037         VN_RELE(vp);
9038 
9039         if (error == 0) {
9040                 rfs4_dbe_lock(lsp->rls_dbe);
9041                 next_stateid(&lsp->rls_lockid);
9042                 rfs4_dbe_unlock(lsp->rls_dbe);
9043         }
9044 
9045         /*
9046          * N.B. We map error values to nfsv4 errors. This is differrent
9047          * than puterrno4 routine.
9048          */
9049         switch (error) {
9050         case 0:
9051                 status = NFS4_OK;
9052                 break;
9053         case EAGAIN:
9054         case EACCES:            /* Old value */
9055                 /* Can only get here if op is OP_LOCK */
9056                 ASSERT(resop->resop == OP_LOCK);
9057                 lres = &resop->nfs_resop4_u.oplock;
9058                 status = NFS4ERR_DENIED;
9059                 if (lock_denied(&lres->LOCK4res_u.denied, &flock)
9060                     == NFS4ERR_EXPIRED)
9061                         goto retry;
9062                 break;
9063         case ENOLCK:
9064                 status = NFS4ERR_DELAY;
9065                 break;
9066         case EOVERFLOW:
9067                 status = NFS4ERR_INVAL;
9068                 break;
9069         case EINVAL:
9070                 status = NFS4ERR_NOTSUPP;
9071                 break;
9072         default:
9073                 status = NFS4ERR_SERVERFAULT;
9074                 break;
9075         }
9076 
9077         return (status);
9078 }
9079 
9080 /*ARGSUSED*/
9081 void
9082 rfs4_op_lock(nfs_argop4 *argop, nfs_resop4 *resop,
9083     struct svc_req *req, struct compound_state *cs)
9084 {
9085         LOCK4args *args = &argop->nfs_argop4_u.oplock;
9086         LOCK4res *resp = &resop->nfs_resop4_u.oplock;
9087         nfsstat4 status;
9088         stateid4 *stateid;
9089         rfs4_lockowner_t *lo;
9090         rfs4_client_t *cp;
9091         rfs4_state_t *sp = NULL;
9092         rfs4_lo_state_t *lsp = NULL;
9093         bool_t ls_sw_held = FALSE;
9094         bool_t create = TRUE;
9095         bool_t lcreate = TRUE;
9096         bool_t dup_lock = FALSE;
9097         int rc;
9098 
9099         DTRACE_NFSV4_2(op__lock__start, struct compound_state *, cs,
9100             LOCK4args *, args);
9101 
9102         if (cs->vp == NULL) {
9103                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
9104                 DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
9105                     cs, LOCK4res *, resp);
9106                 return;
9107         }
9108 
9109         if (args->locker.new_lock_owner) {
9110                 /* Create a new lockowner for this instance */
9111                 open_to_lock_owner4 *olo = &args->locker.locker4_u.open_owner;
9112 
9113                 NFS4_DEBUG(rfs4_debug, (CE_NOTE, "Creating new lock owner"));
9114 
9115                 stateid = &olo->open_stateid;
9116                 status = rfs4_get_state(stateid, &sp, RFS4_DBS_VALID);
9117                 if (status != NFS4_OK) {
9118                         NFS4_DEBUG(rfs4_debug,
9119                             (CE_NOTE, "Get state failed in lock %d", status));
9120                         *cs->statusp = resp->status = status;
9121                         DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
9122                             cs, LOCK4res *, resp);
9123                         return;
9124                 }
9125 
9126                 /* Ensure specified filehandle matches */
9127                 if (cs->vp != sp->rs_finfo->rf_vp) {
9128                         rfs4_state_rele(sp);
9129                         *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9130                         DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
9131                             cs, LOCK4res *, resp);
9132                         return;
9133                 }
9134 
9135                 /* hold off other access to open_owner while we tinker */
9136                 rfs4_sw_enter(&sp->rs_owner->ro_sw);
9137 
9138                 switch (rc = rfs4_check_stateid_seqid(sp, stateid)) {
9139                 case NFS4_CHECK_STATEID_OLD:
9140                         *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9141                         goto end;
9142                 case NFS4_CHECK_STATEID_BAD:
9143                         *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9144                         goto end;
9145                 case NFS4_CHECK_STATEID_EXPIRED:
9146                         *cs->statusp = resp->status = NFS4ERR_EXPIRED;
9147                         goto end;
9148                 case NFS4_CHECK_STATEID_UNCONFIRMED:
9149                         *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9150                         goto end;
9151                 case NFS4_CHECK_STATEID_CLOSED:
9152                         *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9153                         goto end;
9154                 case NFS4_CHECK_STATEID_OKAY:
9155                 case NFS4_CHECK_STATEID_REPLAY:
9156                         switch (rfs4_check_olo_seqid(olo->open_seqid,
9157                             sp->rs_owner, resop)) {
9158                         case NFS4_CHKSEQ_OKAY:
9159                                 if (rc == NFS4_CHECK_STATEID_OKAY)
9160                                         break;
9161                                 /*
9162                                  * This is replayed stateid; if seqid
9163                                  * matches next expected, then client
9164                                  * is using wrong seqid.
9165                                  */
9166                                 /* FALLTHROUGH */
9167                         case NFS4_CHKSEQ_BAD:
9168                                 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9169                                 goto end;
9170                         case NFS4_CHKSEQ_REPLAY:
9171                                 /* This is a duplicate LOCK request */
9172                                 dup_lock = TRUE;
9173 
9174                                 /*
9175                                  * For a duplicate we do not want to
9176                                  * create a new lockowner as it should
9177                                  * already exist.
9178                                  * Turn off the lockowner create flag.
9179                                  */
9180                                 lcreate = FALSE;
9181                         }
9182                         break;
9183                 }
9184 
9185                 lo = rfs4_findlockowner(&olo->lock_owner, &lcreate);
9186                 if (lo == NULL) {
9187                         NFS4_DEBUG(rfs4_debug,
9188                             (CE_NOTE, "rfs4_op_lock: no lock owner"));
9189                         *cs->statusp = resp->status = NFS4ERR_RESOURCE;
9190                         goto end;
9191                 }
9192 
9193                 lsp = rfs4_findlo_state_by_owner(lo, sp, &create);
9194                 if (lsp == NULL) {
9195                         rfs4_update_lease(sp->rs_owner->ro_client);
9196                         /*
9197                          * Only update theh open_seqid if this is not
9198                          * a duplicate request
9199                          */
9200                         if (dup_lock == FALSE) {
9201                                 rfs4_update_open_sequence(sp->rs_owner);
9202                         }
9203 
9204                         NFS4_DEBUG(rfs4_debug,
9205                             (CE_NOTE, "rfs4_op_lock: no state"));
9206                         *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
9207                         rfs4_update_open_resp(sp->rs_owner, resop, NULL);
9208                         rfs4_lockowner_rele(lo);
9209                         goto end;
9210                 }
9211 
9212                 /*
9213                  * This is the new_lock_owner branch and the client is
9214                  * supposed to be associating a new lock_owner with
9215                  * the open file at this point.  If we find that a
9216                  * lock_owner/state association already exists and a
9217                  * successful LOCK request was returned to the client,
9218                  * an error is returned to the client since this is
9219                  * not appropriate.  The client should be using the
9220                  * existing lock_owner branch.
9221                  */
9222                 if (dup_lock == FALSE && create == FALSE) {
9223                         if (lsp->rls_lock_completed == TRUE) {
9224                                 *cs->statusp =
9225                                     resp->status = NFS4ERR_BAD_SEQID;
9226                                 rfs4_lockowner_rele(lo);
9227                                 goto end;
9228                         }
9229                 }
9230 
9231                 rfs4_update_lease(sp->rs_owner->ro_client);
9232 
9233                 /*
9234                  * Only update theh open_seqid if this is not
9235                  * a duplicate request
9236                  */
9237                 if (dup_lock == FALSE) {
9238                         rfs4_update_open_sequence(sp->rs_owner);
9239                 }
9240 
9241                 /*
9242                  * If this is a duplicate lock request, just copy the
9243                  * previously saved reply and return.
9244                  */
9245                 if (dup_lock == TRUE) {
9246                         /* verify that lock_seqid's match */
9247                         if (lsp->rls_seqid != olo->lock_seqid) {
9248                                 NFS4_DEBUG(rfs4_debug,
9249                                     (CE_NOTE, "rfs4_op_lock: Dup-Lock seqid bad"
9250                                     "lsp->seqid=%d old->seqid=%d",
9251                                     lsp->rls_seqid, olo->lock_seqid));
9252                                 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9253                         } else {
9254                                 rfs4_copy_reply(resop, &lsp->rls_reply);
9255                                 /*
9256                                  * Make sure to copy the just
9257                                  * retrieved reply status into the
9258                                  * overall compound status
9259                                  */
9260                                 *cs->statusp = resp->status;
9261                         }
9262                         rfs4_lockowner_rele(lo);
9263                         goto end;
9264                 }
9265 
9266                 rfs4_dbe_lock(lsp->rls_dbe);
9267 
9268                 /* Make sure to update the lock sequence id */
9269                 lsp->rls_seqid = olo->lock_seqid;
9270 
9271                 NFS4_DEBUG(rfs4_debug,
9272                     (CE_NOTE, "Lock seqid established as %d", lsp->rls_seqid));
9273 
9274                 /*
9275                  * This is used to signify the newly created lockowner
9276                  * stateid and its sequence number.  The checks for
9277                  * sequence number and increment don't occur on the
9278                  * very first lock request for a lockowner.
9279                  */
9280                 lsp->rls_skip_seqid_check = TRUE;
9281 
9282                 /* hold off other access to lsp while we tinker */
9283                 rfs4_sw_enter(&lsp->rls_sw);
9284                 ls_sw_held = TRUE;
9285 
9286                 rfs4_dbe_unlock(lsp->rls_dbe);
9287 
9288                 rfs4_lockowner_rele(lo);
9289         } else {
9290                 stateid = &args->locker.locker4_u.lock_owner.lock_stateid;
9291                 /* get lsp and hold the lock on the underlying file struct */
9292                 if ((status = rfs4_get_lo_state(stateid, &lsp, TRUE))
9293                     != NFS4_OK) {
9294                         *cs->statusp = resp->status = status;
9295                         DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
9296                             cs, LOCK4res *, resp);
9297                         return;
9298                 }
9299                 create = FALSE; /* We didn't create lsp */
9300 
9301                 /* Ensure specified filehandle matches */
9302                 if (cs->vp != lsp->rls_state->rs_finfo->rf_vp) {
9303                         rfs4_lo_state_rele(lsp, TRUE);
9304                         *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9305                         DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
9306                             cs, LOCK4res *, resp);
9307                         return;
9308                 }
9309 
9310                 /* hold off other access to lsp while we tinker */
9311                 rfs4_sw_enter(&lsp->rls_sw);
9312                 ls_sw_held = TRUE;
9313 
9314                 switch (rfs4_check_lo_stateid_seqid(lsp, stateid)) {
9315                 /*
9316                  * The stateid looks like it was okay (expected to be
9317                  * the next one)
9318                  */
9319                 case NFS4_CHECK_STATEID_OKAY:
9320                         /*
9321                          * The sequence id is now checked.  Determine
9322                          * if this is a replay or if it is in the
9323                          * expected (next) sequence.  In the case of a
9324                          * replay, there are two replay conditions
9325                          * that may occur.  The first is the normal
9326                          * condition where a LOCK is done with a
9327                          * NFS4_OK response and the stateid is
9328                          * updated.  That case is handled below when
9329                          * the stateid is identified as a REPLAY.  The
9330                          * second is the case where an error is
9331                          * returned, like NFS4ERR_DENIED, and the
9332                          * sequence number is updated but the stateid
9333                          * is not updated.  This second case is dealt
9334                          * with here.  So it may seem odd that the
9335                          * stateid is okay but the sequence id is a
9336                          * replay but it is okay.
9337                          */
9338                         switch (rfs4_check_lock_seqid(
9339                             args->locker.locker4_u.lock_owner.lock_seqid,
9340                             lsp, resop)) {
9341                         case NFS4_CHKSEQ_REPLAY:
9342                                 if (resp->status != NFS4_OK) {
9343                                         /*
9344                                          * Here is our replay and need
9345                                          * to verify that the last
9346                                          * response was an error.
9347                                          */
9348                                         *cs->statusp = resp->status;
9349                                         goto end;
9350                                 }
9351                                 /*
9352                                  * This is done since the sequence id
9353                                  * looked like a replay but it didn't
9354                                  * pass our check so a BAD_SEQID is
9355                                  * returned as a result.
9356                                  */
9357                                 /*FALLTHROUGH*/
9358                         case NFS4_CHKSEQ_BAD:
9359                                 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9360                                 goto end;
9361                         case NFS4_CHKSEQ_OKAY:
9362                                 /* Everything looks okay move ahead */
9363                                 break;
9364                         }
9365                         break;
9366                 case NFS4_CHECK_STATEID_OLD:
9367                         *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9368                         goto end;
9369                 case NFS4_CHECK_STATEID_BAD:
9370                         *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9371                         goto end;
9372                 case NFS4_CHECK_STATEID_EXPIRED:
9373                         *cs->statusp = resp->status = NFS4ERR_EXPIRED;
9374                         goto end;
9375                 case NFS4_CHECK_STATEID_CLOSED:
9376                         *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9377                         goto end;
9378                 case NFS4_CHECK_STATEID_REPLAY:
9379                         switch (rfs4_check_lock_seqid(
9380                             args->locker.locker4_u.lock_owner.lock_seqid,
9381                             lsp, resop)) {
9382                         case NFS4_CHKSEQ_OKAY:
9383                                 /*
9384                                  * This is a replayed stateid; if
9385                                  * seqid matches the next expected,
9386                                  * then client is using wrong seqid.
9387                                  */
9388                         case NFS4_CHKSEQ_BAD:
9389                                 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9390                                 goto end;
9391                         case NFS4_CHKSEQ_REPLAY:
9392                                 rfs4_update_lease(lsp->rls_locker->rl_client);
9393                                 *cs->statusp = status = resp->status;
9394                                 goto end;
9395                         }
9396                         break;
9397                 default:
9398                         ASSERT(FALSE);
9399                         break;
9400                 }
9401 
9402                 rfs4_update_lock_sequence(lsp);
9403                 rfs4_update_lease(lsp->rls_locker->rl_client);
9404         }
9405 
9406         /*
9407          * NFS4 only allows locking on regular files, so
9408          * verify type of object.
9409          */
9410         if (cs->vp->v_type != VREG) {
9411                 if (cs->vp->v_type == VDIR)
9412                         status = NFS4ERR_ISDIR;
9413                 else
9414                         status = NFS4ERR_INVAL;
9415                 goto out;
9416         }
9417 
9418         cp = lsp->rls_state->rs_owner->ro_client;
9419 
9420         if (rfs4_clnt_in_grace(cp) && !args->reclaim) {
9421                 status = NFS4ERR_GRACE;
9422                 goto out;
9423         }
9424 
9425         if (rfs4_clnt_in_grace(cp) && args->reclaim && !cp->rc_can_reclaim) {
9426                 status = NFS4ERR_NO_GRACE;
9427                 goto out;
9428         }
9429 
9430         if (!rfs4_clnt_in_grace(cp) && args->reclaim) {
9431                 status = NFS4ERR_NO_GRACE;
9432                 goto out;
9433         }
9434 
9435         if (lsp->rls_state->rs_finfo->rf_dinfo.rd_dtype == OPEN_DELEGATE_WRITE)
9436                 cs->deleg = TRUE;
9437 
9438         status = rfs4_do_lock(lsp, args->locktype,
9439             args->offset, args->length, cs->cr, resop);
9440 
9441 out:
9442         lsp->rls_skip_seqid_check = FALSE;
9443 
9444         *cs->statusp = resp->status = status;
9445 
9446         if (status == NFS4_OK) {
9447                 resp->LOCK4res_u.lock_stateid = lsp->rls_lockid.stateid;
9448                 lsp->rls_lock_completed = TRUE;
9449         }
9450         /*
9451          * Only update the "OPEN" response here if this was a new
9452          * lock_owner
9453          */
9454         if (sp)
9455                 rfs4_update_open_resp(sp->rs_owner, resop, NULL);
9456 
9457         rfs4_update_lock_resp(lsp, resop);
9458 
9459 end:
9460         if (lsp) {
9461                 if (ls_sw_held)
9462                         rfs4_sw_exit(&lsp->rls_sw);
9463                 /*
9464                  * If an sp obtained, then the lsp does not represent
9465                  * a lock on the file struct.
9466                  */
9467                 if (sp != NULL)
9468                         rfs4_lo_state_rele(lsp, FALSE);
9469                 else
9470                         rfs4_lo_state_rele(lsp, TRUE);
9471         }
9472         if (sp) {
9473                 rfs4_sw_exit(&sp->rs_owner->ro_sw);
9474                 rfs4_state_rele(sp);
9475         }
9476 
9477         DTRACE_NFSV4_2(op__lock__done, struct compound_state *, cs,
9478             LOCK4res *, resp);
9479 }
9480 
9481 /* free function for LOCK/LOCKT */
9482 static void
9483 lock_denied_free(nfs_resop4 *resop)
9484 {
9485         LOCK4denied *dp = NULL;
9486 
9487         switch (resop->resop) {
9488         case OP_LOCK:
9489                 if (resop->nfs_resop4_u.oplock.status == NFS4ERR_DENIED)
9490                         dp = &resop->nfs_resop4_u.oplock.LOCK4res_u.denied;
9491                 break;
9492         case OP_LOCKT:
9493                 if (resop->nfs_resop4_u.oplockt.status == NFS4ERR_DENIED)
9494                         dp = &resop->nfs_resop4_u.oplockt.denied;
9495                 break;
9496         default:
9497                 break;
9498         }
9499 
9500         if (dp)
9501                 kmem_free(dp->owner.owner_val, dp->owner.owner_len);
9502 }
9503 
9504 /*ARGSUSED*/
9505 void
9506 rfs4_op_locku(nfs_argop4 *argop, nfs_resop4 *resop,
9507     struct svc_req *req, struct compound_state *cs)
9508 {
9509         LOCKU4args *args = &argop->nfs_argop4_u.oplocku;
9510         LOCKU4res *resp = &resop->nfs_resop4_u.oplocku;
9511         nfsstat4 status;
9512         stateid4 *stateid = &args->lock_stateid;
9513         rfs4_lo_state_t *lsp;
9514 
9515         DTRACE_NFSV4_2(op__locku__start, struct compound_state *, cs,
9516             LOCKU4args *, args);
9517 
9518         if (cs->vp == NULL) {
9519                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
9520                 DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
9521                     LOCKU4res *, resp);
9522                 return;
9523         }
9524 
9525         if ((status = rfs4_get_lo_state(stateid, &lsp, TRUE)) != NFS4_OK) {
9526                 *cs->statusp = resp->status = status;
9527                 DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
9528                     LOCKU4res *, resp);
9529                 return;
9530         }
9531 
9532         /* Ensure specified filehandle matches */
9533         if (cs->vp != lsp->rls_state->rs_finfo->rf_vp) {
9534                 rfs4_lo_state_rele(lsp, TRUE);
9535                 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9536                 DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
9537                     LOCKU4res *, resp);
9538                 return;
9539         }
9540 
9541         /* hold off other access to lsp while we tinker */
9542         rfs4_sw_enter(&lsp->rls_sw);
9543 
9544         switch (rfs4_check_lo_stateid_seqid(lsp, stateid)) {
9545         case NFS4_CHECK_STATEID_OKAY:
9546                 if (rfs4_check_lock_seqid(args->seqid, lsp, resop)
9547                     != NFS4_CHKSEQ_OKAY) {
9548                         *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9549                         goto end;
9550                 }
9551                 break;
9552         case NFS4_CHECK_STATEID_OLD:
9553                 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9554                 goto end;
9555         case NFS4_CHECK_STATEID_BAD:
9556                 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9557                 goto end;
9558         case NFS4_CHECK_STATEID_EXPIRED:
9559                 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
9560                 goto end;
9561         case NFS4_CHECK_STATEID_CLOSED:
9562                 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9563                 goto end;
9564         case NFS4_CHECK_STATEID_REPLAY:
9565                 switch (rfs4_check_lock_seqid(args->seqid, lsp, resop)) {
9566                 case NFS4_CHKSEQ_OKAY:
9567                                 /*
9568                                  * This is a replayed stateid; if
9569                                  * seqid matches the next expected,
9570                                  * then client is using wrong seqid.
9571                                  */
9572                 case NFS4_CHKSEQ_BAD:
9573                         *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9574                         goto end;
9575                 case NFS4_CHKSEQ_REPLAY:
9576                         rfs4_update_lease(lsp->rls_locker->rl_client);
9577                         *cs->statusp = status = resp->status;
9578                         goto end;
9579                 }
9580                 break;
9581         default:
9582                 ASSERT(FALSE);
9583                 break;
9584         }
9585 
9586         rfs4_update_lock_sequence(lsp);
9587         rfs4_update_lease(lsp->rls_locker->rl_client);
9588 
9589         /*
9590          * NFS4 only allows locking on regular files, so
9591          * verify type of object.
9592          */
9593         if (cs->vp->v_type != VREG) {
9594                 if (cs->vp->v_type == VDIR)
9595                         status = NFS4ERR_ISDIR;
9596                 else
9597                         status = NFS4ERR_INVAL;
9598                 goto out;
9599         }
9600 
9601         if (rfs4_clnt_in_grace(lsp->rls_state->rs_owner->ro_client)) {
9602                 status = NFS4ERR_GRACE;
9603                 goto out;
9604         }
9605 
9606         status = rfs4_do_lock(lsp, args->locktype,
9607             args->offset, args->length, cs->cr, resop);
9608 
9609 out:
9610         *cs->statusp = resp->status = status;
9611 
9612         if (status == NFS4_OK)
9613                 resp->lock_stateid = lsp->rls_lockid.stateid;
9614 
9615         rfs4_update_lock_resp(lsp, resop);
9616 
9617 end:
9618         rfs4_sw_exit(&lsp->rls_sw);
9619         rfs4_lo_state_rele(lsp, TRUE);
9620 
9621         DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
9622             LOCKU4res *, resp);
9623 }
9624 
9625 /*
9626  * LOCKT is a best effort routine, the client can not be guaranteed that
9627  * the status return is still in effect by the time the reply is received.
9628  * They are numerous race conditions in this routine, but we are not required
9629  * and can not be accurate.
9630  */
9631 /*ARGSUSED*/
9632 void
9633 rfs4_op_lockt(nfs_argop4 *argop, nfs_resop4 *resop,
9634     struct svc_req *req, struct compound_state *cs)
9635 {
9636         LOCKT4args *args = &argop->nfs_argop4_u.oplockt;
9637         LOCKT4res *resp = &resop->nfs_resop4_u.oplockt;
9638         rfs4_lockowner_t *lo;
9639         rfs4_client_t *cp;
9640         bool_t create = FALSE;
9641         struct flock64 flk;
9642         int error;
9643         int flag = FREAD | FWRITE;
9644         int ltype;
9645         length4 posix_length;
9646         sysid_t sysid;
9647         pid_t pid;
9648 
9649         DTRACE_NFSV4_2(op__lockt__start, struct compound_state *, cs,
9650             LOCKT4args *, args);
9651 
9652         if (cs->vp == NULL) {
9653                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
9654                 goto out;
9655         }
9656 
9657         /*
9658          * NFS4 only allows locking on regular files, so
9659          * verify type of object.
9660          */
9661         if (cs->vp->v_type != VREG) {
9662                 if (cs->vp->v_type == VDIR)
9663                         *cs->statusp = resp->status = NFS4ERR_ISDIR;
9664                 else
9665                         *cs->statusp = resp->status =  NFS4ERR_INVAL;
9666                 goto out;
9667         }
9668 
9669         /*
9670          * Check out the clientid to ensure the server knows about it
9671          * so that we correctly inform the client of a server reboot.
9672          */
9673         if ((cp = rfs4_findclient_by_id(args->owner.clientid, FALSE))
9674             == NULL) {
9675                 *cs->statusp = resp->status =
9676                     rfs4_check_clientid(&args->owner.clientid, 0);
9677                 goto out;
9678         }
9679         if (rfs4_lease_expired(cp)) {
9680                 rfs4_client_close(cp);
9681                 /*
9682                  * Protocol doesn't allow returning NFS4ERR_STALE as
9683                  * other operations do on this check so STALE_CLIENTID
9684                  * is returned instead
9685                  */
9686                 *cs->statusp = resp->status = NFS4ERR_STALE_CLIENTID;
9687                 goto out;
9688         }
9689 
9690         if (rfs4_clnt_in_grace(cp) && !(cp->rc_can_reclaim)) {
9691                 *cs->statusp = resp->status = NFS4ERR_GRACE;
9692                 rfs4_client_rele(cp);
9693                 goto out;
9694         }
9695         rfs4_client_rele(cp);
9696 
9697         resp->status = NFS4_OK;
9698 
9699         switch (args->locktype) {
9700         case READ_LT:
9701         case READW_LT:
9702                 ltype = F_RDLCK;
9703                 break;
9704         case WRITE_LT:
9705         case WRITEW_LT:
9706                 ltype = F_WRLCK;
9707                 break;
9708         }
9709 
9710         posix_length = args->length;
9711         /* Check for zero length. To lock to end of file use all ones for V4 */
9712         if (posix_length == 0) {
9713                 *cs->statusp = resp->status = NFS4ERR_INVAL;
9714                 goto out;
9715         } else if (posix_length == (length4)(~0)) {
9716                 posix_length = 0;       /* Posix to end of file  */
9717         }
9718 
9719         /* Find or create a lockowner */
9720         lo = rfs4_findlockowner(&args->owner, &create);
9721 
9722         if (lo) {
9723                 pid = lo->rl_pid;
9724                 if ((resp->status =
9725                     rfs4_client_sysid(lo->rl_client, &sysid)) != NFS4_OK)
9726                         goto err;
9727         } else {
9728                 pid = 0;
9729                 sysid = lockt_sysid;
9730         }
9731 retry:
9732         flk.l_type = ltype;
9733         flk.l_whence = 0;               /* SEEK_SET */
9734         flk.l_start = args->offset;
9735         flk.l_len = posix_length;
9736         flk.l_sysid = sysid;
9737         flk.l_pid = pid;
9738         flag |= F_REMOTELOCK;
9739 
9740         LOCK_PRINT(rfs4_debug, "rfs4_op_lockt", F_GETLK, &flk);
9741 
9742         /* Note that length4 is uint64_t but l_len and l_start are off64_t */
9743         if (flk.l_len < 0 || flk.l_start < 0) {
9744                 resp->status = NFS4ERR_INVAL;
9745                 goto err;
9746         }
9747         error = VOP_FRLOCK(cs->vp, F_GETLK, &flk, flag, (u_offset_t)0,
9748             NULL, cs->cr, NULL);
9749 
9750         /*
9751          * N.B. We map error values to nfsv4 errors. This is differrent
9752          * than puterrno4 routine.
9753          */
9754         switch (error) {
9755         case 0:
9756                 if (flk.l_type == F_UNLCK)
9757                         resp->status = NFS4_OK;
9758                 else {
9759                         if (lock_denied(&resp->denied, &flk) == NFS4ERR_EXPIRED)
9760                                 goto retry;
9761                         resp->status = NFS4ERR_DENIED;
9762                 }
9763                 break;
9764         case EOVERFLOW:
9765                 resp->status = NFS4ERR_INVAL;
9766                 break;
9767         case EINVAL:
9768                 resp->status = NFS4ERR_NOTSUPP;
9769                 break;
9770         default:
9771                 cmn_err(CE_WARN, "rfs4_op_lockt: unexpected errno (%d)",
9772                     error);
9773                 resp->status = NFS4ERR_SERVERFAULT;
9774                 break;
9775         }
9776 
9777 err:
9778         if (lo)
9779                 rfs4_lockowner_rele(lo);
9780         *cs->statusp = resp->status;
9781 out:
9782         DTRACE_NFSV4_2(op__lockt__done, struct compound_state *, cs,
9783             LOCKT4res *, resp);
9784 }
9785 
9786 int
9787 rfs4_share(rfs4_state_t *sp, uint32_t access, uint32_t deny)
9788 {
9789         int err;
9790         int cmd;
9791         vnode_t *vp;
9792         struct shrlock shr;
9793         struct shr_locowner shr_loco;
9794         int fflags = 0;
9795 
9796         ASSERT(rfs4_dbe_islocked(sp->rs_dbe));
9797         ASSERT(sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID);
9798 
9799         if (sp->rs_closed)
9800                 return (NFS4ERR_OLD_STATEID);
9801 
9802         vp = sp->rs_finfo->rf_vp;
9803         ASSERT(vp);
9804 
9805         shr.s_access = shr.s_deny = 0;
9806 
9807         if (access & OPEN4_SHARE_ACCESS_READ) {
9808                 fflags |= FREAD;
9809                 shr.s_access |= F_RDACC;
9810         }
9811         if (access & OPEN4_SHARE_ACCESS_WRITE) {
9812                 fflags |= FWRITE;
9813                 shr.s_access |= F_WRACC;
9814         }
9815         ASSERT(shr.s_access);
9816 
9817         if (deny & OPEN4_SHARE_DENY_READ)
9818                 shr.s_deny |= F_RDDNY;
9819         if (deny & OPEN4_SHARE_DENY_WRITE)
9820                 shr.s_deny |= F_WRDNY;
9821 
9822         shr.s_pid = rfs4_dbe_getid(sp->rs_owner->ro_dbe);
9823         shr.s_sysid = sp->rs_owner->ro_client->rc_sysidt;
9824         shr_loco.sl_pid = shr.s_pid;
9825         shr_loco.sl_id = shr.s_sysid;
9826         shr.s_owner = (caddr_t)&shr_loco;
9827         shr.s_own_len = sizeof (shr_loco);
9828 
9829         cmd = nbl_need_check(vp) ? F_SHARE_NBMAND : F_SHARE;
9830 
9831         err = VOP_SHRLOCK(vp, cmd, &shr, fflags, CRED(), NULL);
9832         if (err != 0) {
9833                 if (err == EAGAIN)
9834                         err = NFS4ERR_SHARE_DENIED;
9835                 else
9836                         err = puterrno4(err);
9837                 return (err);
9838         }
9839 
9840         sp->rs_share_access |= access;
9841         sp->rs_share_deny |= deny;
9842 
9843         return (0);
9844 }
9845 
9846 int
9847 rfs4_unshare(rfs4_state_t *sp)
9848 {
9849         int err;
9850         struct shrlock shr;
9851         struct shr_locowner shr_loco;
9852 
9853         ASSERT(rfs4_dbe_islocked(sp->rs_dbe));
9854 
9855         if (sp->rs_closed || sp->rs_share_access == 0)
9856                 return (0);
9857 
9858         ASSERT(sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID);
9859         ASSERT(sp->rs_finfo->rf_vp);
9860 
9861         shr.s_access = shr.s_deny = 0;
9862         shr.s_pid = rfs4_dbe_getid(sp->rs_owner->ro_dbe);
9863         shr.s_sysid = sp->rs_owner->ro_client->rc_sysidt;
9864         shr_loco.sl_pid = shr.s_pid;
9865         shr_loco.sl_id = shr.s_sysid;
9866         shr.s_owner = (caddr_t)&shr_loco;
9867         shr.s_own_len = sizeof (shr_loco);
9868 
9869         err = VOP_SHRLOCK(sp->rs_finfo->rf_vp, F_UNSHARE, &shr, 0, CRED(),
9870             NULL);
9871         if (err != 0) {
9872                 err = puterrno4(err);
9873                 return (err);
9874         }
9875 
9876         sp->rs_share_access = 0;
9877         sp->rs_share_deny = 0;
9878 
9879         return (0);
9880 
9881 }
9882 
9883 static int
9884 rdma_setup_read_data4(READ4args *args, READ4res *rok)
9885 {
9886         struct clist    *wcl;
9887         count4          count = rok->data_len;
9888         int             wlist_len;
9889 
9890         wcl = args->wlist;
9891         if (rdma_setup_read_chunks(wcl, count, &wlist_len) == FALSE) {
9892                 return (FALSE);
9893         }
9894         wcl = args->wlist;
9895         rok->wlist_len = wlist_len;
9896         rok->wlist = wcl;
9897         return (TRUE);
9898 }
9899 
9900 /* tunable to disable server referrals */
9901 int rfs4_no_referrals = 0;
9902 
9903 /*
9904  * Find an NFS record in reparse point data.
9905  * Returns 0 for success and <0 or an errno value on failure.
9906  */
9907 int
9908 vn_find_nfs_record(vnode_t *vp, nvlist_t **nvlp, char **svcp, char **datap)
9909 {
9910         int err;
9911         char *stype, *val;
9912         nvlist_t *nvl;
9913         nvpair_t *curr;
9914 
9915         if ((nvl = reparse_init()) == NULL)
9916                 return (-1);
9917 
9918         if ((err = reparse_vnode_parse(vp, nvl)) != 0) {
9919                 reparse_free(nvl);
9920                 return (err);
9921         }
9922 
9923         curr = NULL;
9924         while ((curr = nvlist_next_nvpair(nvl, curr)) != NULL) {
9925                 if ((stype = nvpair_name(curr)) == NULL) {
9926                         reparse_free(nvl);
9927                         return (-2);
9928                 }
9929                 if (strncasecmp(stype, "NFS", 3) == 0)
9930                         break;
9931         }
9932 
9933         if ((curr == NULL) ||
9934             (nvpair_value_string(curr, &val))) {
9935                 reparse_free(nvl);
9936                 return (-3);
9937         }
9938         *nvlp = nvl;
9939         *svcp = stype;
9940         *datap = val;
9941         return (0);
9942 }
9943 
9944 int
9945 vn_is_nfs_reparse(vnode_t *vp, cred_t *cr)
9946 {
9947         nvlist_t *nvl;
9948         char *s, *d;
9949 
9950         if (rfs4_no_referrals != 0)
9951                 return (B_FALSE);
9952 
9953         if (vn_is_reparse(vp, cr, NULL) == B_FALSE)
9954                 return (B_FALSE);
9955 
9956         if (vn_find_nfs_record(vp, &nvl, &s, &d) != 0)
9957                 return (B_FALSE);
9958 
9959         reparse_free(nvl);
9960 
9961         return (B_TRUE);
9962 }
9963 
9964 /*
9965  * There is a user-level copy of this routine in ref_subr.c.
9966  * Changes should be kept in sync.
9967  */
9968 static int
9969 nfs4_create_components(char *path, component4 *comp4)
9970 {
9971         int slen, plen, ncomp;
9972         char *ori_path, *nxtc, buf[MAXNAMELEN];
9973 
9974         if (path == NULL)
9975                 return (0);
9976 
9977         plen = strlen(path) + 1;        /* include the terminator */
9978         ori_path = path;
9979         ncomp = 0;
9980 
9981         /* count number of components in the path */
9982         for (nxtc = path; nxtc < ori_path + plen; nxtc++) {
9983                 if (*nxtc == '/' || *nxtc == '\0' || *nxtc == '\n') {
9984                         if ((slen = nxtc - path) == 0) {
9985                                 path = nxtc + 1;
9986                                 continue;
9987                         }
9988 
9989                         if (comp4 != NULL) {
9990                                 bcopy(path, buf, slen);
9991                                 buf[slen] = '\0';
9992                                 (void) str_to_utf8(buf, &comp4[ncomp]);
9993                         }
9994 
9995                         ncomp++;        /* 1 valid component */
9996                         path = nxtc + 1;
9997                 }
9998                 if (*nxtc == '\0' || *nxtc == '\n')
9999                         break;
10000         }
10001 
10002         return (ncomp);
10003 }
10004 
10005 /*
10006  * There is a user-level copy of this routine in ref_subr.c.
10007  * Changes should be kept in sync.
10008  */
10009 static int
10010 make_pathname4(char *path, pathname4 *pathname)
10011 {
10012         int ncomp;
10013         component4 *comp4;
10014 
10015         if (pathname == NULL)
10016                 return (0);
10017 
10018         if (path == NULL) {
10019                 pathname->pathname4_val = NULL;
10020                 pathname->pathname4_len = 0;
10021                 return (0);
10022         }
10023 
10024         /* count number of components to alloc buffer */
10025         if ((ncomp = nfs4_create_components(path, NULL)) == 0) {
10026                 pathname->pathname4_val = NULL;
10027                 pathname->pathname4_len = 0;
10028                 return (0);
10029         }
10030         comp4 = kmem_zalloc(ncomp * sizeof (component4), KM_SLEEP);
10031 
10032         /* copy components into allocated buffer */
10033         ncomp = nfs4_create_components(path, comp4);
10034 
10035         pathname->pathname4_val = comp4;
10036         pathname->pathname4_len = ncomp;
10037 
10038         return (ncomp);
10039 }
10040 
10041 #define xdr_fs_locations4 xdr_fattr4_fs_locations
10042 
10043 fs_locations4 *
10044 fetch_referral(vnode_t *vp, cred_t *cr)
10045 {
10046         nvlist_t *nvl;
10047         char *stype, *sdata;
10048         fs_locations4 *result;
10049         char buf[1024];
10050         size_t bufsize;
10051         XDR xdr;
10052         int err;
10053 
10054         /*
10055          * Check attrs to ensure it's a reparse point
10056          */
10057         if (vn_is_reparse(vp, cr, NULL) == B_FALSE)
10058                 return (NULL);
10059 
10060         /*
10061          * Look for an NFS record and get the type and data
10062          */
10063         if (vn_find_nfs_record(vp, &nvl, &stype, &sdata) != 0)
10064                 return (NULL);
10065 
10066         /*
10067          * With the type and data, upcall to get the referral
10068          */
10069         bufsize = sizeof (buf);
10070         bzero(buf, sizeof (buf));
10071         err = reparse_kderef((const char *)stype, (const char *)sdata,
10072             buf, &bufsize);
10073         reparse_free(nvl);
10074 
10075         DTRACE_PROBE4(nfs4serv__func__referral__upcall,
10076             char *, stype, char *, sdata, char *, buf, int, err);
10077         if (err) {
10078                 cmn_err(CE_NOTE,
10079                     "reparsed daemon not running: unable to get referral (%d)",
10080                     err);
10081                 return (NULL);
10082         }
10083 
10084         /*
10085          * We get an XDR'ed record back from the kderef call
10086          */
10087         xdrmem_create(&xdr, buf, bufsize, XDR_DECODE);
10088         result = kmem_alloc(sizeof (fs_locations4), KM_SLEEP);
10089         err = xdr_fs_locations4(&xdr, result);
10090         XDR_DESTROY(&xdr);
10091         if (err != TRUE) {
10092                 DTRACE_PROBE1(nfs4serv__func__referral__upcall__xdrfail,
10093                     int, err);
10094                 return (NULL);
10095         }
10096 
10097         /*
10098          * Look at path to recover fs_root, ignoring the leading '/'
10099          */
10100         (void) make_pathname4(vp->v_path, &result->fs_root);
10101 
10102         return (result);
10103 }
10104 
10105 char *
10106 build_symlink(vnode_t *vp, cred_t *cr, size_t *strsz)
10107 {
10108         fs_locations4 *fsl;
10109         fs_location4 *fs;
10110         char *server, *path, *symbuf;
10111         static char *prefix = "/net/";
10112         int i, size, npaths;
10113         uint_t len;
10114 
10115         /* Get the referral */
10116         if ((fsl = fetch_referral(vp, cr)) == NULL)
10117                 return (NULL);
10118 
10119         /* Deal with only the first location and first server */
10120         fs = &fsl->locations_val[0];
10121         server = utf8_to_str(&fs->server_val[0], &len, NULL);
10122         if (server == NULL) {
10123                 rfs4_free_fs_locations4(fsl);
10124                 kmem_free(fsl, sizeof (fs_locations4));
10125                 return (NULL);
10126         }
10127 
10128         /* Figure out size for "/net/" + host + /path/path/path + NULL */
10129         size = strlen(prefix) + len;
10130         for (i = 0; i < fs->rootpath.pathname4_len; i++)
10131                 size += fs->rootpath.pathname4_val[i].utf8string_len + 1;
10132 
10133         /* Allocate the symlink buffer and fill it */
10134         symbuf = kmem_zalloc(size, KM_SLEEP);
10135         (void) strcat(symbuf, prefix);
10136         (void) strcat(symbuf, server);
10137         kmem_free(server, len);
10138 
10139         npaths = 0;
10140         for (i = 0; i < fs->rootpath.pathname4_len; i++) {
10141                 path = utf8_to_str(&fs->rootpath.pathname4_val[i], &len, NULL);
10142                 if (path == NULL)
10143                         continue;
10144                 (void) strcat(symbuf, "/");
10145                 (void) strcat(symbuf, path);
10146                 npaths++;
10147                 kmem_free(path, len);
10148         }
10149 
10150         rfs4_free_fs_locations4(fsl);
10151         kmem_free(fsl, sizeof (fs_locations4));
10152 
10153         if (strsz != NULL)
10154                 *strsz = size;
10155         return (symbuf);
10156 }
10157 
10158 /*
10159  * Check to see if we have a downrev Solaris client, so that we
10160  * can send it a symlink instead of a referral.
10161  */
10162 int
10163 client_is_downrev(struct svc_req *req)
10164 {
10165         struct sockaddr *ca;
10166         rfs4_clntip_t *ci;
10167         bool_t create = FALSE;
10168         int is_downrev;
10169 
10170         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
10171         ASSERT(ca);
10172         ci = rfs4_find_clntip(ca, &create);
10173         if (ci == NULL)
10174                 return (0);
10175         is_downrev = ci->ri_no_referrals;
10176         rfs4_dbe_rele(ci->ri_dbe);
10177         return (is_downrev);
10178 }
10179 
10180 /*
10181  * Do the main work of handling HA-NFSv4 Resource Group failover on
10182  * Sun Cluster.
10183  * We need to detect whether any RG admin paths have been added or removed,
10184  * and adjust resources accordingly.
10185  * Currently we're using a very inefficient algorithm, ~ 2 * O(n**2). In
10186  * order to scale, the list and array of paths need to be held in more
10187  * suitable data structures.
10188  */
10189 static void
10190 hanfsv4_failover(nfs4_srv_t *nsrv4)
10191 {
10192         int i, start_grace, numadded_paths = 0;
10193         char **added_paths = NULL;
10194         rfs4_dss_path_t *dss_path;
10195 
10196         /*
10197          * Note: currently, dss_pathlist cannot be NULL, since
10198          * it will always include an entry for NFS4_DSS_VAR_DIR. If we
10199          * make the latter dynamically specified too, the following will
10200          * need to be adjusted.
10201          */
10202 
10203         /*
10204          * First, look for removed paths: RGs that have been failed-over
10205          * away from this node.
10206          * Walk the "currently-serving" dss_pathlist and, for each
10207          * path, check if it is on the "passed-in" rfs4_dss_newpaths array
10208          * from nfsd. If not, that RG path has been removed.
10209          *
10210          * Note that nfsd has sorted rfs4_dss_newpaths for us, and removed
10211          * any duplicates.
10212          */
10213         dss_path = nsrv4->dss_pathlist;
10214         do {
10215                 int found = 0;
10216                 char *path = dss_path->path;
10217 
10218                 /* used only for non-HA so may not be removed */
10219                 if (strcmp(path, NFS4_DSS_VAR_DIR) == 0) {
10220                         dss_path = dss_path->next;
10221                         continue;
10222                 }
10223 
10224                 for (i = 0; i < rfs4_dss_numnewpaths; i++) {
10225                         int cmpret;
10226                         char *newpath = rfs4_dss_newpaths[i];
10227 
10228                         /*
10229                          * Since nfsd has sorted rfs4_dss_newpaths for us,
10230                          * once the return from strcmp is negative we know
10231                          * we've passed the point where "path" should be,
10232                          * and can stop searching: "path" has been removed.
10233                          */
10234                         cmpret = strcmp(path, newpath);
10235                         if (cmpret < 0)
10236                                 break;
10237                         if (cmpret == 0) {
10238                                 found = 1;
10239                                 break;
10240                         }
10241                 }
10242 
10243                 if (found == 0) {
10244                         unsigned index = dss_path->index;
10245                         rfs4_servinst_t *sip = dss_path->sip;
10246                         rfs4_dss_path_t *path_next = dss_path->next;
10247 
10248                         /*
10249                          * This path has been removed.
10250                          * We must clear out the servinst reference to
10251                          * it, since it's now owned by another
10252                          * node: we should not attempt to touch it.
10253                          */
10254                         ASSERT(dss_path == sip->dss_paths[index]);
10255                         sip->dss_paths[index] = NULL;
10256 
10257                         /* remove from "currently-serving" list, and destroy */
10258                         remque(dss_path);
10259                         /* allow for NUL */
10260                         kmem_free(dss_path->path, strlen(dss_path->path) + 1);
10261                         kmem_free(dss_path, sizeof (rfs4_dss_path_t));
10262 
10263                         dss_path = path_next;
10264                 } else {
10265                         /* path was found; not removed */
10266                         dss_path = dss_path->next;
10267                 }
10268         } while (dss_path != nsrv4->dss_pathlist);
10269 
10270         /*
10271          * Now, look for added paths: RGs that have been failed-over
10272          * to this node.
10273          * Walk the "passed-in" rfs4_dss_newpaths array from nfsd and,
10274          * for each path, check if it is on the "currently-serving"
10275          * dss_pathlist. If not, that RG path has been added.
10276          *
10277          * Note: we don't do duplicate detection here; nfsd does that for us.
10278          *
10279          * Note: numadded_paths <= rfs4_dss_numnewpaths, which gives us
10280          * an upper bound for the size needed for added_paths[numadded_paths].
10281          */
10282 
10283         /* probably more space than we need, but guaranteed to be enough */
10284         if (rfs4_dss_numnewpaths > 0) {
10285                 size_t sz = rfs4_dss_numnewpaths * sizeof (char *);
10286                 added_paths = kmem_zalloc(sz, KM_SLEEP);
10287         }
10288 
10289         /* walk the "passed-in" rfs4_dss_newpaths array from nfsd */
10290         for (i = 0; i < rfs4_dss_numnewpaths; i++) {
10291                 int found = 0;
10292                 char *newpath = rfs4_dss_newpaths[i];
10293 
10294                 dss_path = nsrv4->dss_pathlist;
10295                 do {
10296                         char *path = dss_path->path;
10297 
10298                         /* used only for non-HA */
10299                         if (strcmp(path, NFS4_DSS_VAR_DIR) == 0) {
10300                                 dss_path = dss_path->next;
10301                                 continue;
10302                         }
10303 
10304                         if (strncmp(path, newpath, strlen(path)) == 0) {
10305                                 found = 1;
10306                                 break;
10307                         }
10308 
10309                         dss_path = dss_path->next;
10310                 } while (dss_path != nsrv4->dss_pathlist);
10311 
10312                 if (found == 0) {
10313                         added_paths[numadded_paths] = newpath;
10314                         numadded_paths++;
10315                 }
10316         }
10317 
10318         /* did we find any added paths? */
10319         if (numadded_paths > 0) {
10320 
10321                 /* create a new server instance, and start its grace period */
10322                 start_grace = 1;
10323                 /* CSTYLED */
10324                 rfs4_servinst_create(nsrv4, start_grace, numadded_paths, added_paths);
10325 
10326                 /* read in the stable storage state from these paths */
10327                 rfs4_dss_readstate(nsrv4, numadded_paths, added_paths);
10328 
10329                 /*
10330                  * Multiple failovers during a grace period will cause
10331                  * clients of the same resource group to be partitioned
10332                  * into different server instances, with different
10333                  * grace periods.  Since clients of the same resource
10334                  * group must be subject to the same grace period,
10335                  * we need to reset all currently active grace periods.
10336                  */
10337                 rfs4_grace_reset_all(nsrv4);
10338         }
10339 
10340         if (rfs4_dss_numnewpaths > 0)
10341                 kmem_free(added_paths, rfs4_dss_numnewpaths * sizeof (char *));
10342 }