1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
  24  */
  25 
  26 /*
  27  *      Copyright (c) 1983,1984,1985,1986,1987,1988,1989  AT&T.
  28  *      All Rights Reserved
  29  */
  30 
  31 /*
  32  * Copyright (c) 2012, 2016 by Delphix. All rights reserved.
  33  * Copyright 2019 Nexenta Systems, Inc.
  34  * Copyright 2019 Nexenta by DDN, Inc.
  35  */
  36 
  37 #include <sys/param.h>
  38 #include <sys/types.h>
  39 #include <sys/systm.h>
  40 #include <sys/cred.h>
  41 #include <sys/buf.h>
  42 #include <sys/vfs.h>
  43 #include <sys/vfs_opreg.h>
  44 #include <sys/vnode.h>
  45 #include <sys/uio.h>
  46 #include <sys/errno.h>
  47 #include <sys/sysmacros.h>
  48 #include <sys/statvfs.h>
  49 #include <sys/kmem.h>
  50 #include <sys/dirent.h>
  51 #include <sys/cmn_err.h>
  52 #include <sys/debug.h>
  53 #include <sys/systeminfo.h>
  54 #include <sys/flock.h>
  55 #include <sys/pathname.h>
  56 #include <sys/nbmlock.h>
  57 #include <sys/share.h>
  58 #include <sys/atomic.h>
  59 #include <sys/policy.h>
  60 #include <sys/fem.h>
  61 #include <sys/sdt.h>
  62 #include <sys/ddi.h>
  63 #include <sys/zone.h>
  64 
  65 #include <fs/fs_reparse.h>
  66 
  67 #include <rpc/types.h>
  68 #include <rpc/auth.h>
  69 #include <rpc/rpcsec_gss.h>
  70 #include <rpc/svc.h>
  71 
  72 #include <nfs/nfs.h>
  73 #include <nfs/nfssys.h>
  74 #include <nfs/export.h>
  75 #include <nfs/nfs_cmd.h>
  76 #include <nfs/lm.h>
  77 #include <nfs/nfs4.h>
  78 #include <nfs/nfs4_drc.h>
  79 
  80 #include <sys/strsubr.h>
  81 #include <sys/strsun.h>
  82 
  83 #include <inet/common.h>
  84 #include <inet/ip.h>
  85 #include <inet/ip6.h>
  86 
  87 #include <sys/tsol/label.h>
  88 #include <sys/tsol/tndb.h>
  89 
  90 #define RFS4_MAXLOCK_TRIES 4    /* Try to get the lock this many times */
  91 static int rfs4_maxlock_tries = RFS4_MAXLOCK_TRIES;
  92 #define RFS4_LOCK_DELAY 10      /* Milliseconds */
  93 static clock_t  rfs4_lock_delay = RFS4_LOCK_DELAY;
  94 extern struct svc_ops rdma_svc_ops;
  95 extern int nfs_loaned_buffers;
  96 /* End of Tunables */
  97 
  98 static int rdma_setup_read_data4(READ4args *, READ4res *);
  99 
 100 /*
 101  * Used to bump the stateid4.seqid value and show changes in the stateid
 102  */
 103 #define next_stateid(sp) (++(sp)->bits.chgseq)
 104 
 105 /*
 106  * RFS4_MINLEN_ENTRY4: XDR-encoded size of smallest possible dirent.
 107  *      This is used to return NFS4ERR_TOOSMALL when clients specify
 108  *      maxcount that isn't large enough to hold the smallest possible
 109  *      XDR encoded dirent.
 110  *
 111  *          sizeof cookie (8 bytes) +
 112  *          sizeof name_len (4 bytes) +
 113  *          sizeof smallest (padded) name (4 bytes) +
 114  *          sizeof bitmap4_len (12 bytes) +   NOTE: we always encode len=2 bm4
 115  *          sizeof attrlist4_len (4 bytes) +
 116  *          sizeof next boolean (4 bytes)
 117  *
 118  * RFS4_MINLEN_RDDIR4: XDR-encoded size of READDIR op reply containing
 119  * the smallest possible entry4 (assumes no attrs requested).
 120  *      sizeof nfsstat4 (4 bytes) +
 121  *      sizeof verifier4 (8 bytes) +
 122  *      sizeof entry4list bool (4 bytes) +
 123  *      sizeof entry4   (36 bytes) +
 124  *      sizeof eof bool  (4 bytes)
 125  *
 126  * RFS4_MINLEN_RDDIR_BUF: minimum length of buffer server will provide to
 127  *      VOP_READDIR.  Its value is the size of the maximum possible dirent
 128  *      for solaris.  The DIRENT64_RECLEN macro returns the size of dirent
 129  *      required for a given name length.  MAXNAMELEN is the maximum
 130  *      filename length allowed in Solaris.  The first two DIRENT64_RECLEN()
 131  *      macros are to allow for . and .. entries -- just a minor tweak to try
 132  *      and guarantee that buffer we give to VOP_READDIR will be large enough
 133  *      to hold ., .., and the largest possible solaris dirent64.
 134  */
 135 #define RFS4_MINLEN_ENTRY4 36
 136 #define RFS4_MINLEN_RDDIR4 (4 + NFS4_VERIFIER_SIZE + 4 + RFS4_MINLEN_ENTRY4 + 4)
 137 #define RFS4_MINLEN_RDDIR_BUF \
 138         (DIRENT64_RECLEN(1) + DIRENT64_RECLEN(2) + DIRENT64_RECLEN(MAXNAMELEN))
 139 
 140 /*
 141  * It would be better to pad to 4 bytes since that's what XDR would do,
 142  * but the dirents UFS gives us are already padded to 8, so just take
 143  * what we're given.  Dircount is only a hint anyway.  Currently the
 144  * solaris kernel is ASCII only, so there's no point in calling the
 145  * UTF8 functions.
 146  *
 147  * dirent64: named padded to provide 8 byte struct alignment
 148  *      d_ino(8) + d_off(8) + d_reclen(2) + d_name(namelen + null(1) + pad)
 149  *
 150  * cookie: uint64_t   +  utf8namelen: uint_t  +   utf8name padded to 8 bytes
 151  *
 152  */
 153 #define DIRENT64_TO_DIRCOUNT(dp) \
 154         (3 * BYTES_PER_XDR_UNIT + DIRENT64_NAMELEN((dp)->d_reclen))
 155 
 156 zone_key_t      rfs4_zone_key;
 157 
 158 static sysid_t          lockt_sysid;    /* dummy sysid for all LOCKT calls */
 159 
 160 u_longlong_t    nfs4_srv_caller_id;
 161 uint_t          nfs4_srv_vkey = 0;
 162 
 163 void    rfs4_init_compound_state(struct compound_state *);
 164 
 165 static void     nullfree(caddr_t);
 166 static void     rfs4_op_inval(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 167                     struct compound_state *);
 168 static void     rfs4_op_access(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 169                     struct compound_state *);
 170 static void     rfs4_op_close(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 171                     struct compound_state *);
 172 static void     rfs4_op_commit(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 173                     struct compound_state *);
 174 static void     rfs4_op_create(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 175                     struct compound_state *);
 176 static void     rfs4_op_create_free(nfs_resop4 *resop);
 177 static void     rfs4_op_delegreturn(nfs_argop4 *, nfs_resop4 *,
 178                     struct svc_req *, struct compound_state *);
 179 static void     rfs4_op_delegpurge(nfs_argop4 *, nfs_resop4 *,
 180                     struct svc_req *, struct compound_state *);
 181 static void     rfs4_op_getattr(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 182                     struct compound_state *);
 183 static void     rfs4_op_getattr_free(nfs_resop4 *);
 184 static void     rfs4_op_getfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 185                     struct compound_state *);
 186 static void     rfs4_op_getfh_free(nfs_resop4 *);
 187 static void     rfs4_op_illegal(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 188                     struct compound_state *);
 189 static void     rfs4_op_link(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 190                     struct compound_state *);
 191 static void     rfs4_op_lock(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 192                     struct compound_state *);
 193 static void     lock_denied_free(nfs_resop4 *);
 194 static void     rfs4_op_locku(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 195                     struct compound_state *);
 196 static void     rfs4_op_lockt(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 197                     struct compound_state *);
 198 static void     rfs4_op_lookup(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 199                     struct compound_state *);
 200 static void     rfs4_op_lookupp(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 201                     struct compound_state *);
 202 static void     rfs4_op_openattr(nfs_argop4 *argop, nfs_resop4 *resop,
 203                     struct svc_req *req, struct compound_state *cs);
 204 static void     rfs4_op_nverify(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 205                     struct compound_state *);
 206 static void     rfs4_op_open(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 207                     struct compound_state *);
 208 static void     rfs4_op_open_confirm(nfs_argop4 *, nfs_resop4 *,
 209                     struct svc_req *, struct compound_state *);
 210 static void     rfs4_op_open_downgrade(nfs_argop4 *, nfs_resop4 *,
 211                     struct svc_req *, struct compound_state *);
 212 static void     rfs4_op_putfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 213                     struct compound_state *);
 214 static void     rfs4_op_putpubfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 215                     struct compound_state *);
 216 static void     rfs4_op_putrootfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 217                     struct compound_state *);
 218 static void     rfs4_op_read(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 219                     struct compound_state *);
 220 static void     rfs4_op_read_free(nfs_resop4 *);
 221 static void     rfs4_op_readdir_free(nfs_resop4 *resop);
 222 static void     rfs4_op_readlink(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 223                     struct compound_state *);
 224 static void     rfs4_op_readlink_free(nfs_resop4 *);
 225 static void     rfs4_op_release_lockowner(nfs_argop4 *, nfs_resop4 *,
 226                     struct svc_req *, struct compound_state *);
 227 static void     rfs4_op_remove(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 228                     struct compound_state *);
 229 static void     rfs4_op_rename(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 230                     struct compound_state *);
 231 static void     rfs4_op_renew(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 232                     struct compound_state *);
 233 static void     rfs4_op_restorefh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 234                     struct compound_state *);
 235 static void     rfs4_op_savefh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 236                     struct compound_state *);
 237 static void     rfs4_op_setattr(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 238                     struct compound_state *);
 239 static void     rfs4_op_verify(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 240                     struct compound_state *);
 241 static void     rfs4_op_write(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 242                     struct compound_state *);
 243 static void     rfs4_op_setclientid(nfs_argop4 *, nfs_resop4 *,
 244                     struct svc_req *, struct compound_state *);
 245 static void     rfs4_op_setclientid_confirm(nfs_argop4 *, nfs_resop4 *,
 246                     struct svc_req *req, struct compound_state *);
 247 static void     rfs4_op_secinfo(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 248                     struct compound_state *);
 249 static void     rfs4_op_secinfo_free(nfs_resop4 *);
 250 
 251 static nfsstat4 check_open_access(uint32_t, struct compound_state *,
 252                     struct svc_req *);
 253 nfsstat4        rfs4_client_sysid(rfs4_client_t *, sysid_t *);
 254 void            rfs4_ss_clid(nfs4_srv_t *, rfs4_client_t *);
 255 
 256 
 257 /*
 258  * translation table for attrs
 259  */
 260 struct nfs4_ntov_table {
 261         union nfs4_attr_u *na;
 262         uint8_t amap[NFS4_MAXNUM_ATTRS];
 263         int attrcnt;
 264         bool_t vfsstat;
 265 };
 266 
 267 static void     nfs4_ntov_table_init(struct nfs4_ntov_table *ntovp);
 268 static void     nfs4_ntov_table_free(struct nfs4_ntov_table *ntovp,
 269                     struct nfs4_svgetit_arg *sargp);
 270 
 271 static nfsstat4 do_rfs4_set_attrs(bitmap4 *resp, fattr4 *fattrp,
 272                     struct compound_state *cs, struct nfs4_svgetit_arg *sargp,
 273                     struct nfs4_ntov_table *ntovp, nfs4_attr_cmd_t cmd);
 274 
 275 static void     hanfsv4_failover(nfs4_srv_t *);
 276 
 277 fem_t           *deleg_rdops;
 278 fem_t           *deleg_wrops;
 279 
 280 /*
 281  * NFS4 op dispatch table
 282  */
 283 
 284 struct rfsv4disp {
 285         void    (*dis_proc)();          /* proc to call */
 286         void    (*dis_resfree)();       /* frees space allocated by proc */
 287         int     dis_flags;              /* RPC_IDEMPOTENT, etc... */
 288 };
 289 
 290 static struct rfsv4disp rfsv4disptab[] = {
 291         /*
 292          * NFS VERSION 4
 293          */
 294 
 295         /* RFS_NULL = 0 */
 296         {rfs4_op_illegal, nullfree, 0},
 297 
 298         /* UNUSED = 1 */
 299         {rfs4_op_illegal, nullfree, 0},
 300 
 301         /* UNUSED = 2 */
 302         {rfs4_op_illegal, nullfree, 0},
 303 
 304         /* OP_ACCESS = 3 */
 305         {rfs4_op_access, nullfree, RPC_IDEMPOTENT},
 306 
 307         /* OP_CLOSE = 4 */
 308         {rfs4_op_close, nullfree, 0},
 309 
 310         /* OP_COMMIT = 5 */
 311         {rfs4_op_commit, nullfree, RPC_IDEMPOTENT},
 312 
 313         /* OP_CREATE = 6 */
 314         {rfs4_op_create, nullfree, 0},
 315 
 316         /* OP_DELEGPURGE = 7 */
 317         {rfs4_op_delegpurge, nullfree, 0},
 318 
 319         /* OP_DELEGRETURN = 8 */
 320         {rfs4_op_delegreturn, nullfree, 0},
 321 
 322         /* OP_GETATTR = 9 */
 323         {rfs4_op_getattr, rfs4_op_getattr_free, RPC_IDEMPOTENT},
 324 
 325         /* OP_GETFH = 10 */
 326         {rfs4_op_getfh, rfs4_op_getfh_free, RPC_ALL},
 327 
 328         /* OP_LINK = 11 */
 329         {rfs4_op_link, nullfree, 0},
 330 
 331         /* OP_LOCK = 12 */
 332         {rfs4_op_lock, lock_denied_free, 0},
 333 
 334         /* OP_LOCKT = 13 */
 335         {rfs4_op_lockt, lock_denied_free, 0},
 336 
 337         /* OP_LOCKU = 14 */
 338         {rfs4_op_locku, nullfree, 0},
 339 
 340         /* OP_LOOKUP = 15 */
 341         {rfs4_op_lookup, nullfree, (RPC_IDEMPOTENT | RPC_PUBLICFH_OK)},
 342 
 343         /* OP_LOOKUPP = 16 */
 344         {rfs4_op_lookupp, nullfree, (RPC_IDEMPOTENT | RPC_PUBLICFH_OK)},
 345 
 346         /* OP_NVERIFY = 17 */
 347         {rfs4_op_nverify, nullfree, RPC_IDEMPOTENT},
 348 
 349         /* OP_OPEN = 18 */
 350         {rfs4_op_open, rfs4_free_reply, 0},
 351 
 352         /* OP_OPENATTR = 19 */
 353         {rfs4_op_openattr, nullfree, 0},
 354 
 355         /* OP_OPEN_CONFIRM = 20 */
 356         {rfs4_op_open_confirm, nullfree, 0},
 357 
 358         /* OP_OPEN_DOWNGRADE = 21 */
 359         {rfs4_op_open_downgrade, nullfree, 0},
 360 
 361         /* OP_OPEN_PUTFH = 22 */
 362         {rfs4_op_putfh, nullfree, RPC_ALL},
 363 
 364         /* OP_PUTPUBFH = 23 */
 365         {rfs4_op_putpubfh, nullfree, RPC_ALL},
 366 
 367         /* OP_PUTROOTFH = 24 */
 368         {rfs4_op_putrootfh, nullfree, RPC_ALL},
 369 
 370         /* OP_READ = 25 */
 371         {rfs4_op_read, rfs4_op_read_free, RPC_IDEMPOTENT},
 372 
 373         /* OP_READDIR = 26 */
 374         {rfs4_op_readdir, rfs4_op_readdir_free, RPC_IDEMPOTENT},
 375 
 376         /* OP_READLINK = 27 */
 377         {rfs4_op_readlink, rfs4_op_readlink_free, RPC_IDEMPOTENT},
 378 
 379         /* OP_REMOVE = 28 */
 380         {rfs4_op_remove, nullfree, 0},
 381 
 382         /* OP_RENAME = 29 */
 383         {rfs4_op_rename, nullfree, 0},
 384 
 385         /* OP_RENEW = 30 */
 386         {rfs4_op_renew, nullfree, 0},
 387 
 388         /* OP_RESTOREFH = 31 */
 389         {rfs4_op_restorefh, nullfree, RPC_ALL},
 390 
 391         /* OP_SAVEFH = 32 */
 392         {rfs4_op_savefh, nullfree, RPC_ALL},
 393 
 394         /* OP_SECINFO = 33 */
 395         {rfs4_op_secinfo, rfs4_op_secinfo_free, 0},
 396 
 397         /* OP_SETATTR = 34 */
 398         {rfs4_op_setattr, nullfree, 0},
 399 
 400         /* OP_SETCLIENTID = 35 */
 401         {rfs4_op_setclientid, nullfree, 0},
 402 
 403         /* OP_SETCLIENTID_CONFIRM = 36 */
 404         {rfs4_op_setclientid_confirm, nullfree, 0},
 405 
 406         /* OP_VERIFY = 37 */
 407         {rfs4_op_verify, nullfree, RPC_IDEMPOTENT},
 408 
 409         /* OP_WRITE = 38 */
 410         {rfs4_op_write, nullfree, 0},
 411 
 412         /* OP_RELEASE_LOCKOWNER = 39 */
 413         {rfs4_op_release_lockowner, nullfree, 0},
 414 };
 415 
 416 static uint_t rfsv4disp_cnt = sizeof (rfsv4disptab) / sizeof (rfsv4disptab[0]);
 417 
 418 #define OP_ILLEGAL_IDX (rfsv4disp_cnt)
 419 
 420 #ifdef DEBUG
 421 
 422 int             rfs4_fillone_debug = 0;
 423 int             rfs4_no_stub_access = 1;
 424 int             rfs4_rddir_debug = 0;
 425 
 426 static char    *rfs4_op_string[] = {
 427         "rfs4_op_null",
 428         "rfs4_op_1 unused",
 429         "rfs4_op_2 unused",
 430         "rfs4_op_access",
 431         "rfs4_op_close",
 432         "rfs4_op_commit",
 433         "rfs4_op_create",
 434         "rfs4_op_delegpurge",
 435         "rfs4_op_delegreturn",
 436         "rfs4_op_getattr",
 437         "rfs4_op_getfh",
 438         "rfs4_op_link",
 439         "rfs4_op_lock",
 440         "rfs4_op_lockt",
 441         "rfs4_op_locku",
 442         "rfs4_op_lookup",
 443         "rfs4_op_lookupp",
 444         "rfs4_op_nverify",
 445         "rfs4_op_open",
 446         "rfs4_op_openattr",
 447         "rfs4_op_open_confirm",
 448         "rfs4_op_open_downgrade",
 449         "rfs4_op_putfh",
 450         "rfs4_op_putpubfh",
 451         "rfs4_op_putrootfh",
 452         "rfs4_op_read",
 453         "rfs4_op_readdir",
 454         "rfs4_op_readlink",
 455         "rfs4_op_remove",
 456         "rfs4_op_rename",
 457         "rfs4_op_renew",
 458         "rfs4_op_restorefh",
 459         "rfs4_op_savefh",
 460         "rfs4_op_secinfo",
 461         "rfs4_op_setattr",
 462         "rfs4_op_setclientid",
 463         "rfs4_op_setclient_confirm",
 464         "rfs4_op_verify",
 465         "rfs4_op_write",
 466         "rfs4_op_release_lockowner",
 467         "rfs4_op_illegal"
 468 };
 469 #endif
 470 
 471 void    rfs4_ss_chkclid(nfs4_srv_t *, rfs4_client_t *);
 472 
 473 extern size_t   strlcpy(char *dst, const char *src, size_t dstsize);
 474 
 475 extern void     rfs4_free_fs_locations4(fs_locations4 *);
 476 
 477 #ifdef  nextdp
 478 #undef nextdp
 479 #endif
 480 #define nextdp(dp)      ((struct dirent64 *)((char *)(dp) + (dp)->d_reclen))
 481 
 482 static const fs_operation_def_t nfs4_rd_deleg_tmpl[] = {
 483         VOPNAME_OPEN,           { .femop_open = deleg_rd_open },
 484         VOPNAME_WRITE,          { .femop_write = deleg_rd_write },
 485         VOPNAME_SETATTR,        { .femop_setattr = deleg_rd_setattr },
 486         VOPNAME_RWLOCK,         { .femop_rwlock = deleg_rd_rwlock },
 487         VOPNAME_SPACE,          { .femop_space = deleg_rd_space },
 488         VOPNAME_SETSECATTR,     { .femop_setsecattr = deleg_rd_setsecattr },
 489         VOPNAME_VNEVENT,        { .femop_vnevent = deleg_rd_vnevent },
 490         NULL,                   NULL
 491 };
 492 static const fs_operation_def_t nfs4_wr_deleg_tmpl[] = {
 493         VOPNAME_OPEN,           { .femop_open = deleg_wr_open },
 494         VOPNAME_READ,           { .femop_read = deleg_wr_read },
 495         VOPNAME_WRITE,          { .femop_write = deleg_wr_write },
 496         VOPNAME_SETATTR,        { .femop_setattr = deleg_wr_setattr },
 497         VOPNAME_RWLOCK,         { .femop_rwlock = deleg_wr_rwlock },
 498         VOPNAME_SPACE,          { .femop_space = deleg_wr_space },
 499         VOPNAME_SETSECATTR,     { .femop_setsecattr = deleg_wr_setsecattr },
 500         VOPNAME_VNEVENT,        { .femop_vnevent = deleg_wr_vnevent },
 501         NULL,                   NULL
 502 };
 503 
 504 /* ARGSUSED */
 505 static void *
 506 rfs4_zone_init(zoneid_t zoneid)
 507 {
 508         nfs4_srv_t *nsrv4;
 509         timespec32_t verf;
 510 
 511         nsrv4 = kmem_zalloc(sizeof (*nsrv4), KM_SLEEP);
 512 
 513         /*
 514          * The following algorithm attempts to find a unique verifier
 515          * to be used as the write verifier returned from the server
 516          * to the client.  It is important that this verifier change
 517          * whenever the server reboots.  Of secondary importance, it
 518          * is important for the verifier to be unique between two
 519          * different servers.
 520          *
 521          * Thus, an attempt is made to use the system hostid and the
 522          * current time in seconds when the nfssrv kernel module is
 523          * loaded.  It is assumed that an NFS server will not be able
 524          * to boot and then to reboot in less than a second.  If the
 525          * hostid has not been set, then the current high resolution
 526          * time is used.  This will ensure different verifiers each
 527          * time the server reboots and minimize the chances that two
 528          * different servers will have the same verifier.
 529          * XXX - this is broken on LP64 kernels.
 530          */
 531         verf.tv_sec = (time_t)zone_get_hostid(NULL);
 532         if (verf.tv_sec != 0) {
 533                 verf.tv_nsec = gethrestime_sec();
 534         } else {
 535                 timespec_t tverf;
 536 
 537                 gethrestime(&tverf);
 538                 verf.tv_sec = (time_t)tverf.tv_sec;
 539                 verf.tv_nsec = tverf.tv_nsec;
 540         }
 541         nsrv4->write4verf = *(uint64_t *)&verf;
 542 
 543         /* Used to manage create/destroy of server state */
 544         nsrv4->nfs4_server_state = NULL;
 545         nsrv4->nfs4_cur_servinst = NULL;
 546         nsrv4->nfs4_deleg_policy = SRV_NEVER_DELEGATE;
 547         mutex_init(&nsrv4->deleg_lock, NULL, MUTEX_DEFAULT, NULL);
 548         mutex_init(&nsrv4->state_lock, NULL, MUTEX_DEFAULT, NULL);
 549         mutex_init(&nsrv4->servinst_lock, NULL, MUTEX_DEFAULT, NULL);
 550         rw_init(&nsrv4->deleg_policy_lock, NULL, RW_DEFAULT, NULL);
 551 
 552         return (nsrv4);
 553 }
 554 
 555 /* ARGSUSED */
 556 static void
 557 rfs4_zone_fini(zoneid_t zoneid, void *data)
 558 {
 559         nfs4_srv_t *nsrv4 = data;
 560 
 561         mutex_destroy(&nsrv4->deleg_lock);
 562         mutex_destroy(&nsrv4->state_lock);
 563         mutex_destroy(&nsrv4->servinst_lock);
 564         rw_destroy(&nsrv4->deleg_policy_lock);
 565 
 566         kmem_free(nsrv4, sizeof (*nsrv4));
 567 }
 568 
 569 void
 570 rfs4_srvrinit(void)
 571 {
 572         extern void rfs4_attr_init();
 573 
 574         zone_key_create(&rfs4_zone_key, rfs4_zone_init, NULL, rfs4_zone_fini);
 575 
 576         rfs4_attr_init();
 577 
 578 
 579         if (fem_create("deleg_rdops", nfs4_rd_deleg_tmpl, &deleg_rdops) != 0) {
 580                 rfs4_disable_delegation();
 581         } else if (fem_create("deleg_wrops", nfs4_wr_deleg_tmpl,
 582             &deleg_wrops) != 0) {
 583                 rfs4_disable_delegation();
 584                 fem_free(deleg_rdops);
 585         }
 586 
 587         nfs4_srv_caller_id = fs_new_caller_id();
 588         lockt_sysid = lm_alloc_sysidt();
 589         vsd_create(&nfs4_srv_vkey, NULL);
 590         rfs4_state_g_init();
 591 }
 592 
 593 void
 594 rfs4_srvrfini(void)
 595 {
 596         if (lockt_sysid != LM_NOSYSID) {
 597                 lm_free_sysidt(lockt_sysid);
 598                 lockt_sysid = LM_NOSYSID;
 599         }
 600 
 601         rfs4_state_g_fini();
 602 
 603         fem_free(deleg_rdops);
 604         fem_free(deleg_wrops);
 605 
 606         (void) zone_key_delete(rfs4_zone_key);
 607 }
 608 
 609 void
 610 rfs4_do_server_start(int server_upordown,
 611     int srv_delegation, int cluster_booted)
 612 {
 613         nfs4_srv_t *nsrv4 = zone_getspecific(rfs4_zone_key, curzone);
 614 
 615         /* Is this a warm start? */
 616         if (server_upordown == NFS_SERVER_QUIESCED) {
 617                 cmn_err(CE_NOTE, "nfs4_srv: "
 618                     "server was previously quiesced; "
 619                     "existing NFSv4 state will be re-used");
 620 
 621                 /*
 622                  * HA-NFSv4: this is also the signal
 623                  * that a Resource Group failover has
 624                  * occurred.
 625                  */
 626                 if (cluster_booted)
 627                         hanfsv4_failover(nsrv4);
 628         } else {
 629                 /* Cold start */
 630                 nsrv4->rfs4_start_time = 0;
 631                 rfs4_state_zone_init(nsrv4);
 632                 nsrv4->nfs4_drc = rfs4_init_drc(nfs4_drc_max,
 633                     nfs4_drc_hash);
 634 
 635                 /*
 636                  * The nfsd service was started with the -s option
 637                  * we need to pull in any state from the paths indicated.
 638                  */
 639                 if (curzone == global_zone && rfs4_dss_numnewpaths > 0) {
 640                         /* read in the stable storage state from these paths */
 641                         rfs4_dss_readstate(nsrv4, rfs4_dss_numnewpaths,
 642                             rfs4_dss_newpaths);
 643                 }
 644         }
 645 
 646         /* Check if delegation is to be enabled */
 647         if (srv_delegation != FALSE)
 648                 rfs4_set_deleg_policy(nsrv4, SRV_NORMAL_DELEGATE);
 649 }
 650 
 651 void
 652 rfs4_init_compound_state(struct compound_state *cs)
 653 {
 654         bzero(cs, sizeof (*cs));
 655         cs->cont = TRUE;
 656         cs->access = CS_ACCESS_DENIED;
 657         cs->deleg = FALSE;
 658         cs->mandlock = FALSE;
 659         cs->fh.nfs_fh4_val = cs->fhbuf;
 660 }
 661 
 662 void
 663 rfs4_grace_start(rfs4_servinst_t *sip)
 664 {
 665         rw_enter(&sip->rwlock, RW_WRITER);
 666         sip->start_time = (time_t)TICK_TO_SEC(ddi_get_lbolt());
 667         sip->grace_period = rfs4_grace_period;
 668         rw_exit(&sip->rwlock);
 669 }
 670 
 671 /*
 672  * returns true if the instance's grace period has never been started
 673  */
 674 int
 675 rfs4_servinst_grace_new(rfs4_servinst_t *sip)
 676 {
 677         time_t start_time;
 678 
 679         rw_enter(&sip->rwlock, RW_READER);
 680         start_time = sip->start_time;
 681         rw_exit(&sip->rwlock);
 682 
 683         return (start_time == 0);
 684 }
 685 
 686 /*
 687  * Indicates if server instance is within the
 688  * grace period.
 689  */
 690 int
 691 rfs4_servinst_in_grace(rfs4_servinst_t *sip)
 692 {
 693         time_t grace_expiry;
 694 
 695         rw_enter(&sip->rwlock, RW_READER);
 696         grace_expiry = sip->start_time + sip->grace_period;
 697         rw_exit(&sip->rwlock);
 698 
 699         return (((time_t)TICK_TO_SEC(ddi_get_lbolt())) < grace_expiry);
 700 }
 701 
 702 int
 703 rfs4_clnt_in_grace(rfs4_client_t *cp)
 704 {
 705         ASSERT(rfs4_dbe_refcnt(cp->rc_dbe) > 0);
 706 
 707         return (rfs4_servinst_in_grace(cp->rc_server_instance));
 708 }
 709 
 710 /*
 711  * reset all currently active grace periods
 712  */
 713 void
 714 rfs4_grace_reset_all(nfs4_srv_t *nsrv4)
 715 {
 716         rfs4_servinst_t *sip;
 717 
 718         mutex_enter(&nsrv4->servinst_lock);
 719         for (sip = nsrv4->nfs4_cur_servinst; sip != NULL; sip = sip->prev)
 720                 if (rfs4_servinst_in_grace(sip))
 721                         rfs4_grace_start(sip);
 722         mutex_exit(&nsrv4->servinst_lock);
 723 }
 724 
 725 /*
 726  * start any new instances' grace periods
 727  */
 728 void
 729 rfs4_grace_start_new(nfs4_srv_t *nsrv4)
 730 {
 731         rfs4_servinst_t *sip;
 732 
 733         mutex_enter(&nsrv4->servinst_lock);
 734         for (sip = nsrv4->nfs4_cur_servinst; sip != NULL; sip = sip->prev)
 735                 if (rfs4_servinst_grace_new(sip))
 736                         rfs4_grace_start(sip);
 737         mutex_exit(&nsrv4->servinst_lock);
 738 }
 739 
 740 static rfs4_dss_path_t *
 741 rfs4_dss_newpath(nfs4_srv_t *nsrv4, rfs4_servinst_t *sip,
 742     char *path, unsigned index)
 743 {
 744         size_t len;
 745         rfs4_dss_path_t *dss_path;
 746 
 747         dss_path = kmem_alloc(sizeof (rfs4_dss_path_t), KM_SLEEP);
 748 
 749         /*
 750          * Take a copy of the string, since the original may be overwritten.
 751          * Sadly, no strdup() in the kernel.
 752          */
 753         /* allow for NUL */
 754         len = strlen(path) + 1;
 755         dss_path->path = kmem_alloc(len, KM_SLEEP);
 756         (void) strlcpy(dss_path->path, path, len);
 757 
 758         /* associate with servinst */
 759         dss_path->sip = sip;
 760         dss_path->index = index;
 761 
 762         /*
 763          * Add to list of served paths.
 764          * No locking required, as we're only ever called at startup.
 765          */
 766         if (nsrv4->dss_pathlist == NULL) {
 767                 /* this is the first dss_path_t */
 768 
 769                 /* needed for insque/remque */
 770                 dss_path->next = dss_path->prev = dss_path;
 771 
 772                 nsrv4->dss_pathlist = dss_path;
 773         } else {
 774                 insque(dss_path, nsrv4->dss_pathlist);
 775         }
 776 
 777         return (dss_path);
 778 }
 779 
 780 /*
 781  * Create a new server instance, and make it the currently active instance.
 782  * Note that starting the grace period too early will reduce the clients'
 783  * recovery window.
 784  */
 785 void
 786 rfs4_servinst_create(nfs4_srv_t *nsrv4, int start_grace,
 787     int dss_npaths, char **dss_paths)
 788 {
 789         unsigned i;
 790         rfs4_servinst_t *sip;
 791         rfs4_oldstate_t *oldstate;
 792 
 793         sip = kmem_alloc(sizeof (rfs4_servinst_t), KM_SLEEP);
 794         rw_init(&sip->rwlock, NULL, RW_DEFAULT, NULL);
 795 
 796         sip->start_time = (time_t)0;
 797         sip->grace_period = (time_t)0;
 798         sip->next = NULL;
 799         sip->prev = NULL;
 800 
 801         rw_init(&sip->oldstate_lock, NULL, RW_DEFAULT, NULL);
 802         /*
 803          * This initial dummy entry is required to setup for insque/remque.
 804          * It must be skipped over whenever the list is traversed.
 805          */
 806         oldstate = kmem_alloc(sizeof (rfs4_oldstate_t), KM_SLEEP);
 807         /* insque/remque require initial list entry to be self-terminated */
 808         oldstate->next = oldstate;
 809         oldstate->prev = oldstate;
 810         sip->oldstate = oldstate;
 811 
 812 
 813         sip->dss_npaths = dss_npaths;
 814         sip->dss_paths = kmem_alloc(dss_npaths *
 815             sizeof (rfs4_dss_path_t *), KM_SLEEP);
 816 
 817         for (i = 0; i < dss_npaths; i++) {
 818                 sip->dss_paths[i] =
 819                     rfs4_dss_newpath(nsrv4, sip, dss_paths[i], i);
 820         }
 821 
 822         mutex_enter(&nsrv4->servinst_lock);
 823         if (nsrv4->nfs4_cur_servinst != NULL) {
 824                 /* add to linked list */
 825                 sip->prev = nsrv4->nfs4_cur_servinst;
 826                 nsrv4->nfs4_cur_servinst->next = sip;
 827         }
 828         if (start_grace)
 829                 rfs4_grace_start(sip);
 830         /* make the new instance "current" */
 831         nsrv4->nfs4_cur_servinst = sip;
 832 
 833         mutex_exit(&nsrv4->servinst_lock);
 834 }
 835 
 836 /*
 837  * In future, we might add a rfs4_servinst_destroy(sip) but, for now, destroy
 838  * all instances directly.
 839  */
 840 void
 841 rfs4_servinst_destroy_all(nfs4_srv_t *nsrv4)
 842 {
 843         rfs4_servinst_t *sip, *prev, *current;
 844 #ifdef DEBUG
 845         int n = 0;
 846 #endif
 847 
 848         mutex_enter(&nsrv4->servinst_lock);
 849         ASSERT(nsrv4->nfs4_cur_servinst != NULL);
 850         current = nsrv4->nfs4_cur_servinst;
 851         nsrv4->nfs4_cur_servinst = NULL;
 852         for (sip = current; sip != NULL; sip = prev) {
 853                 prev = sip->prev;
 854                 rw_destroy(&sip->rwlock);
 855                 if (sip->oldstate)
 856                         kmem_free(sip->oldstate, sizeof (rfs4_oldstate_t));
 857                 if (sip->dss_paths) {
 858                         int i = sip->dss_npaths;
 859 
 860                         while (i > 0) {
 861                                 i--;
 862                                 if (sip->dss_paths[i] != NULL) {
 863                                         char *path = sip->dss_paths[i]->path;
 864 
 865                                         if (path != NULL) {
 866                                                 kmem_free(path,
 867                                                     strlen(path) + 1);
 868                                         }
 869                                         kmem_free(sip->dss_paths[i],
 870                                             sizeof (rfs4_dss_path_t));
 871                                 }
 872                         }
 873                         kmem_free(sip->dss_paths,
 874                             sip->dss_npaths * sizeof (rfs4_dss_path_t *));
 875                 }
 876                 kmem_free(sip, sizeof (rfs4_servinst_t));
 877 #ifdef DEBUG
 878                 n++;
 879 #endif
 880         }
 881         mutex_exit(&nsrv4->servinst_lock);
 882 }
 883 
 884 /*
 885  * Assign the current server instance to a client_t.
 886  * Should be called with cp->rc_dbe held.
 887  */
 888 void
 889 rfs4_servinst_assign(nfs4_srv_t *nsrv4, rfs4_client_t *cp,
 890     rfs4_servinst_t *sip)
 891 {
 892         ASSERT(rfs4_dbe_refcnt(cp->rc_dbe) > 0);
 893 
 894         /*
 895          * The lock ensures that if the current instance is in the process
 896          * of changing, we will see the new one.
 897          */
 898         mutex_enter(&nsrv4->servinst_lock);
 899         cp->rc_server_instance = sip;
 900         mutex_exit(&nsrv4->servinst_lock);
 901 }
 902 
 903 rfs4_servinst_t *
 904 rfs4_servinst(rfs4_client_t *cp)
 905 {
 906         ASSERT(rfs4_dbe_refcnt(cp->rc_dbe) > 0);
 907 
 908         return (cp->rc_server_instance);
 909 }
 910 
 911 /* ARGSUSED */
 912 static void
 913 nullfree(caddr_t resop)
 914 {
 915 }
 916 
 917 /*
 918  * This is a fall-through for invalid or not implemented (yet) ops
 919  */
 920 /* ARGSUSED */
 921 static void
 922 rfs4_op_inval(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
 923     struct compound_state *cs)
 924 {
 925         *cs->statusp = *((nfsstat4 *)&(resop)->nfs_resop4_u) = NFS4ERR_INVAL;
 926 }
 927 
 928 /*
 929  * Check if the security flavor, nfsnum, is in the flavor_list.
 930  */
 931 bool_t
 932 in_flavor_list(int nfsnum, int *flavor_list, int count)
 933 {
 934         int i;
 935 
 936         for (i = 0; i < count; i++) {
 937                 if (nfsnum == flavor_list[i])
 938                         return (TRUE);
 939         }
 940         return (FALSE);
 941 }
 942 
 943 /*
 944  * Used by rfs4_op_secinfo to get the security information from the
 945  * export structure associated with the component.
 946  */
 947 /* ARGSUSED */
 948 static nfsstat4
 949 do_rfs4_op_secinfo(struct compound_state *cs, char *nm, SECINFO4res *resp)
 950 {
 951         int error, different_export = 0;
 952         vnode_t *dvp, *vp;
 953         struct exportinfo *exi = NULL;
 954         fid_t fid;
 955         uint_t count, i;
 956         secinfo4 *resok_val;
 957         struct secinfo *secp;
 958         seconfig_t *si;
 959         bool_t did_traverse = FALSE;
 960         int dotdot, walk;
 961         nfs_export_t *ne = nfs_get_export();
 962 
 963         dvp = cs->vp;
 964         dotdot = (nm[0] == '.' && nm[1] == '.' && nm[2] == '\0');
 965 
 966         /*
 967          * If dotdotting, then need to check whether it's above the
 968          * root of a filesystem, or above an export point.
 969          */
 970         if (dotdot) {
 971 
 972                 /*
 973                  * If dotdotting at the root of a filesystem, then
 974                  * need to traverse back to the mounted-on filesystem
 975                  * and do the dotdot lookup there.
 976                  */
 977                 if ((cs->vp->v_flag & VROOT) || VN_IS_CURZONEROOT(cs->vp)) {
 978 
 979                         /*
 980                          * If at the system root, then can
 981                          * go up no further.
 982                          */
 983                         if (VN_CMP(dvp, ZONE_ROOTVP()))
 984                                 return (puterrno4(ENOENT));
 985 
 986                         /*
 987                          * Traverse back to the mounted-on filesystem
 988                          */
 989                         dvp = untraverse(cs->vp);
 990 
 991                         /*
 992                          * Set the different_export flag so we remember
 993                          * to pick up a new exportinfo entry for
 994                          * this new filesystem.
 995                          */
 996                         different_export = 1;
 997                 } else {
 998 
 999                         /*
1000                          * If dotdotting above an export point then set
1001                          * the different_export to get new export info.
1002                          */
1003                         different_export = nfs_exported(cs->exi, cs->vp);
1004                 }
1005         }
1006 
1007         /*
1008          * Get the vnode for the component "nm".
1009          */
1010         error = VOP_LOOKUP(dvp, nm, &vp, NULL, 0, NULL, cs->cr,
1011             NULL, NULL, NULL);
1012         if (error)
1013                 return (puterrno4(error));
1014 
1015         /*
1016          * If the vnode is in a pseudo filesystem, or if the security flavor
1017          * used in the request is valid but not an explicitly shared flavor,
1018          * or the access bit indicates that this is a limited access,
1019          * check whether this vnode is visible.
1020          */
1021         if (!different_export &&
1022             (PSEUDO(cs->exi) || ! is_exported_sec(cs->nfsflavor, cs->exi) ||
1023             cs->access & CS_ACCESS_LIMITED)) {
1024                 if (! nfs_visible(cs->exi, vp, &different_export)) {
1025                         VN_RELE(vp);
1026                         return (puterrno4(ENOENT));
1027                 }
1028         }
1029 
1030         /*
1031          * If it's a mountpoint, then traverse it.
1032          */
1033         if (vn_ismntpt(vp)) {
1034                 if ((error = traverse(&vp)) != 0) {
1035                         VN_RELE(vp);
1036                         return (puterrno4(error));
1037                 }
1038                 /* remember that we had to traverse mountpoint */
1039                 did_traverse = TRUE;
1040                 different_export = 1;
1041         } else if (vp->v_vfsp != dvp->v_vfsp) {
1042                 /*
1043                  * If vp isn't a mountpoint and the vfs ptrs aren't the same,
1044                  * then vp is probably an LOFS object.  We don't need the
1045                  * realvp, we just need to know that we might have crossed
1046                  * a server fs boundary and need to call checkexport4.
1047                  * (LOFS lookup hides server fs mountpoints, and actually calls
1048                  * traverse)
1049                  */
1050                 different_export = 1;
1051         }
1052 
1053         /*
1054          * Get the export information for it.
1055          */
1056         if (different_export) {
1057 
1058                 bzero(&fid, sizeof (fid));
1059                 fid.fid_len = MAXFIDSZ;
1060                 error = vop_fid_pseudo(vp, &fid);
1061                 if (error) {
1062                         VN_RELE(vp);
1063                         return (puterrno4(error));
1064                 }
1065 
1066                 if (dotdot)
1067                         exi = nfs_vptoexi(NULL, vp, cs->cr, &walk, NULL, TRUE);
1068                 else
1069                         exi = checkexport4(&vp->v_vfsp->vfs_fsid, &fid, vp);
1070 
1071                 if (exi == NULL) {
1072                         if (did_traverse == TRUE) {
1073                                 /*
1074                                  * If this vnode is a mounted-on vnode,
1075                                  * but the mounted-on file system is not
1076                                  * exported, send back the secinfo for
1077                                  * the exported node that the mounted-on
1078                                  * vnode lives in.
1079                                  */
1080                                 exi = cs->exi;
1081                         } else {
1082                                 VN_RELE(vp);
1083                                 return (puterrno4(EACCES));
1084                         }
1085                 }
1086         } else {
1087                 exi = cs->exi;
1088         }
1089         ASSERT(exi != NULL);
1090 
1091 
1092         /*
1093          * Create the secinfo result based on the security information
1094          * from the exportinfo structure (exi).
1095          *
1096          * Return all flavors for a pseudo node.
1097          * For a real export node, return the flavor that the client
1098          * has access with.
1099          */
1100         ASSERT(RW_LOCK_HELD(&ne->exported_lock));
1101         if (PSEUDO(exi)) {
1102                 count = exi->exi_export.ex_seccnt; /* total sec count */
1103                 resok_val = kmem_alloc(count * sizeof (secinfo4), KM_SLEEP);
1104                 secp = exi->exi_export.ex_secinfo;
1105 
1106                 for (i = 0; i < count; i++) {
1107                         si = &secp[i].s_secinfo;
1108                         resok_val[i].flavor = si->sc_rpcnum;
1109                         if (resok_val[i].flavor == RPCSEC_GSS) {
1110                                 rpcsec_gss_info *info;
1111 
1112                                 info = &resok_val[i].flavor_info;
1113                                 info->qop = si->sc_qop;
1114                                 info->service = (rpc_gss_svc_t)si->sc_service;
1115 
1116                                 /* get oid opaque data */
1117                                 info->oid.sec_oid4_len =
1118                                     si->sc_gss_mech_type->length;
1119                                 info->oid.sec_oid4_val = kmem_alloc(
1120                                     si->sc_gss_mech_type->length, KM_SLEEP);
1121                                 bcopy(
1122                                     si->sc_gss_mech_type->elements,
1123                                     info->oid.sec_oid4_val,
1124                                     info->oid.sec_oid4_len);
1125                         }
1126                 }
1127                 resp->SECINFO4resok_len = count;
1128                 resp->SECINFO4resok_val = resok_val;
1129         } else {
1130                 int ret_cnt = 0, k = 0;
1131                 int *flavor_list;
1132 
1133                 count = exi->exi_export.ex_seccnt; /* total sec count */
1134                 secp = exi->exi_export.ex_secinfo;
1135 
1136                 flavor_list = kmem_alloc(count * sizeof (int), KM_SLEEP);
1137                 /* find out which flavors to return */
1138                 for (i = 0; i < count; i ++) {
1139                         int access, flavor, perm;
1140 
1141                         flavor = secp[i].s_secinfo.sc_nfsnum;
1142                         perm = secp[i].s_flags;
1143 
1144                         access = nfsauth4_secinfo_access(exi, cs->req,
1145                             flavor, perm, cs->basecr);
1146 
1147                         if (! (access & NFSAUTH_DENIED) &&
1148                             ! (access & NFSAUTH_WRONGSEC)) {
1149                                 flavor_list[ret_cnt] = flavor;
1150                                 ret_cnt++;
1151                         }
1152                 }
1153 
1154                 /* Create the returning SECINFO value */
1155                 resok_val = kmem_alloc(ret_cnt * sizeof (secinfo4), KM_SLEEP);
1156 
1157                 for (i = 0; i < count; i++) {
1158                         /*
1159                          * If the flavor is in the flavor list,
1160                          * fill in resok_val.
1161                          */
1162                         si = &secp[i].s_secinfo;
1163                         if (in_flavor_list(si->sc_nfsnum,
1164                             flavor_list, ret_cnt)) {
1165                                 resok_val[k].flavor = si->sc_rpcnum;
1166                                 if (resok_val[k].flavor == RPCSEC_GSS) {
1167                                         rpcsec_gss_info *info;
1168 
1169                                         info = &resok_val[k].flavor_info;
1170                                         info->qop = si->sc_qop;
1171                                         info->service = (rpc_gss_svc_t)
1172                                             si->sc_service;
1173 
1174                                         /* get oid opaque data */
1175                                         info->oid.sec_oid4_len =
1176                                             si->sc_gss_mech_type->length;
1177                                         info->oid.sec_oid4_val = kmem_alloc(
1178                                             si->sc_gss_mech_type->length,
1179                                             KM_SLEEP);
1180                                         bcopy(si->sc_gss_mech_type->elements,
1181                                             info->oid.sec_oid4_val,
1182                                             info->oid.sec_oid4_len);
1183                                 }
1184                                 k++;
1185                         }
1186                         if (k >= ret_cnt)
1187                                 break;
1188                 }
1189                 resp->SECINFO4resok_len = ret_cnt;
1190                 resp->SECINFO4resok_val = resok_val;
1191                 kmem_free(flavor_list, count * sizeof (int));
1192         }
1193 
1194         VN_RELE(vp);
1195         return (NFS4_OK);
1196 }
1197 
1198 /*
1199  * SECINFO (Operation 33): Obtain required security information on
1200  * the component name in the format of (security-mechanism-oid, qop, service)
1201  * triplets.
1202  */
1203 /* ARGSUSED */
1204 static void
1205 rfs4_op_secinfo(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1206     struct compound_state *cs)
1207 {
1208         SECINFO4args *args = &argop->nfs_argop4_u.opsecinfo;
1209         SECINFO4res *resp = &resop->nfs_resop4_u.opsecinfo;
1210         utf8string *utfnm = &args->name;
1211         uint_t len;
1212         char *nm;
1213         struct sockaddr *ca;
1214         char *name = NULL;
1215         nfsstat4 status = NFS4_OK;
1216 
1217         DTRACE_NFSV4_2(op__secinfo__start, struct compound_state *, cs,
1218             SECINFO4args *, args);
1219 
1220         /*
1221          * Current file handle (cfh) should have been set before getting
1222          * into this function. If not, return error.
1223          */
1224         if (cs->vp == NULL) {
1225                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1226                 goto out;
1227         }
1228 
1229         if (cs->vp->v_type != VDIR) {
1230                 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
1231                 goto out;
1232         }
1233 
1234         /*
1235          * Verify the component name. If failed, error out, but
1236          * do not error out if the component name is a "..".
1237          * SECINFO will return its parents secinfo data for SECINFO "..".
1238          */
1239         status = utf8_dir_verify(utfnm);
1240         if (status != NFS4_OK) {
1241                 if (utfnm->utf8string_len != 2 ||
1242                     utfnm->utf8string_val[0] != '.' ||
1243                     utfnm->utf8string_val[1] != '.') {
1244                         *cs->statusp = resp->status = status;
1245                         goto out;
1246                 }
1247         }
1248 
1249         nm = utf8_to_str(utfnm, &len, NULL);
1250         if (nm == NULL) {
1251                 *cs->statusp = resp->status = NFS4ERR_INVAL;
1252                 goto out;
1253         }
1254 
1255         if (len > MAXNAMELEN) {
1256                 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
1257                 kmem_free(nm, len);
1258                 goto out;
1259         }
1260 
1261         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
1262         name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
1263             MAXPATHLEN  + 1);
1264 
1265         if (name == NULL) {
1266                 *cs->statusp = resp->status = NFS4ERR_INVAL;
1267                 kmem_free(nm, len);
1268                 goto out;
1269         }
1270 
1271 
1272         *cs->statusp = resp->status = do_rfs4_op_secinfo(cs, name, resp);
1273 
1274         if (name != nm)
1275                 kmem_free(name, MAXPATHLEN + 1);
1276         kmem_free(nm, len);
1277 
1278 out:
1279         DTRACE_NFSV4_2(op__secinfo__done, struct compound_state *, cs,
1280             SECINFO4res *, resp);
1281 }
1282 
1283 /*
1284  * Free SECINFO result.
1285  */
1286 /* ARGSUSED */
1287 static void
1288 rfs4_op_secinfo_free(nfs_resop4 *resop)
1289 {
1290         SECINFO4res *resp = &resop->nfs_resop4_u.opsecinfo;
1291         int count, i;
1292         secinfo4 *resok_val;
1293 
1294         /* If this is not an Ok result, nothing to free. */
1295         if (resp->status != NFS4_OK) {
1296                 return;
1297         }
1298 
1299         count = resp->SECINFO4resok_len;
1300         resok_val = resp->SECINFO4resok_val;
1301 
1302         for (i = 0; i < count; i++) {
1303                 if (resok_val[i].flavor == RPCSEC_GSS) {
1304                         rpcsec_gss_info *info;
1305 
1306                         info = &resok_val[i].flavor_info;
1307                         kmem_free(info->oid.sec_oid4_val,
1308                             info->oid.sec_oid4_len);
1309                 }
1310         }
1311         kmem_free(resok_val, count * sizeof (secinfo4));
1312         resp->SECINFO4resok_len = 0;
1313         resp->SECINFO4resok_val = NULL;
1314 }
1315 
1316 /* ARGSUSED */
1317 static void
1318 rfs4_op_access(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1319     struct compound_state *cs)
1320 {
1321         ACCESS4args *args = &argop->nfs_argop4_u.opaccess;
1322         ACCESS4res *resp = &resop->nfs_resop4_u.opaccess;
1323         int error;
1324         vnode_t *vp;
1325         struct vattr va;
1326         int checkwriteperm;
1327         cred_t *cr = cs->cr;
1328         bslabel_t *clabel, *slabel;
1329         ts_label_t *tslabel;
1330         boolean_t admin_low_client;
1331 
1332         DTRACE_NFSV4_2(op__access__start, struct compound_state *, cs,
1333             ACCESS4args *, args);
1334 
1335 #if 0   /* XXX allow access even if !cs->access. Eventually only pseudo fs */
1336         if (cs->access == CS_ACCESS_DENIED) {
1337                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
1338                 goto out;
1339         }
1340 #endif
1341         if (cs->vp == NULL) {
1342                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1343                 goto out;
1344         }
1345 
1346         ASSERT(cr != NULL);
1347 
1348         vp = cs->vp;
1349 
1350         /*
1351          * If the file system is exported read only, it is not appropriate
1352          * to check write permissions for regular files and directories.
1353          * Special files are interpreted by the client, so the underlying
1354          * permissions are sent back to the client for interpretation.
1355          */
1356         if (rdonly4(req, cs) &&
1357             (vp->v_type == VREG || vp->v_type == VDIR))
1358                 checkwriteperm = 0;
1359         else
1360                 checkwriteperm = 1;
1361 
1362         /*
1363          * XXX
1364          * We need the mode so that we can correctly determine access
1365          * permissions relative to a mandatory lock file.  Access to
1366          * mandatory lock files is denied on the server, so it might
1367          * as well be reflected to the server during the open.
1368          */
1369         va.va_mask = AT_MODE;
1370         error = VOP_GETATTR(vp, &va, 0, cr, NULL);
1371         if (error) {
1372                 *cs->statusp = resp->status = puterrno4(error);
1373                 goto out;
1374         }
1375         resp->access = 0;
1376         resp->supported = 0;
1377 
1378         if (is_system_labeled()) {
1379                 ASSERT(req->rq_label != NULL);
1380                 clabel = req->rq_label;
1381                 DTRACE_PROBE2(tx__rfs4__log__info__opaccess__clabel, char *,
1382                     "got client label from request(1)",
1383                     struct svc_req *, req);
1384                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
1385                         if ((tslabel = nfs_getflabel(vp, cs->exi)) == NULL) {
1386                                 *cs->statusp = resp->status = puterrno4(EACCES);
1387                                 goto out;
1388                         }
1389                         slabel = label2bslabel(tslabel);
1390                         DTRACE_PROBE3(tx__rfs4__log__info__opaccess__slabel,
1391                             char *, "got server label(1) for vp(2)",
1392                             bslabel_t *, slabel, vnode_t *, vp);
1393 
1394                         admin_low_client = B_FALSE;
1395                 } else
1396                         admin_low_client = B_TRUE;
1397         }
1398 
1399         if (args->access & ACCESS4_READ) {
1400                 error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
1401                 if (!error && !MANDLOCK(vp, va.va_mode) &&
1402                     (!is_system_labeled() || admin_low_client ||
1403                     bldominates(clabel, slabel)))
1404                         resp->access |= ACCESS4_READ;
1405                 resp->supported |= ACCESS4_READ;
1406         }
1407         if ((args->access & ACCESS4_LOOKUP) && vp->v_type == VDIR) {
1408                 error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL);
1409                 if (!error && (!is_system_labeled() || admin_low_client ||
1410                     bldominates(clabel, slabel)))
1411                         resp->access |= ACCESS4_LOOKUP;
1412                 resp->supported |= ACCESS4_LOOKUP;
1413         }
1414         if (checkwriteperm &&
1415             (args->access & (ACCESS4_MODIFY|ACCESS4_EXTEND))) {
1416                 error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
1417                 if (!error && !MANDLOCK(vp, va.va_mode) &&
1418                     (!is_system_labeled() || admin_low_client ||
1419                     blequal(clabel, slabel)))
1420                         resp->access |=
1421                             (args->access & (ACCESS4_MODIFY | ACCESS4_EXTEND));
1422                 resp->supported |=
1423                     resp->access & (ACCESS4_MODIFY | ACCESS4_EXTEND);
1424         }
1425 
1426         if (checkwriteperm &&
1427             (args->access & ACCESS4_DELETE) && vp->v_type == VDIR) {
1428                 error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
1429                 if (!error && (!is_system_labeled() || admin_low_client ||
1430                     blequal(clabel, slabel)))
1431                         resp->access |= ACCESS4_DELETE;
1432                 resp->supported |= ACCESS4_DELETE;
1433         }
1434         if (args->access & ACCESS4_EXECUTE && vp->v_type != VDIR) {
1435                 error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL);
1436                 if (!error && !MANDLOCK(vp, va.va_mode) &&
1437                     (!is_system_labeled() || admin_low_client ||
1438                     bldominates(clabel, slabel)))
1439                         resp->access |= ACCESS4_EXECUTE;
1440                 resp->supported |= ACCESS4_EXECUTE;
1441         }
1442 
1443         if (is_system_labeled() && !admin_low_client)
1444                 label_rele(tslabel);
1445 
1446         *cs->statusp = resp->status = NFS4_OK;
1447 out:
1448         DTRACE_NFSV4_2(op__access__done, struct compound_state *, cs,
1449             ACCESS4res *, resp);
1450 }
1451 
1452 /* ARGSUSED */
1453 static void
1454 rfs4_op_commit(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1455     struct compound_state *cs)
1456 {
1457         COMMIT4args *args = &argop->nfs_argop4_u.opcommit;
1458         COMMIT4res *resp = &resop->nfs_resop4_u.opcommit;
1459         int error;
1460         vnode_t *vp = cs->vp;
1461         cred_t *cr = cs->cr;
1462         vattr_t va;
1463         nfs4_srv_t *nsrv4;
1464 
1465         DTRACE_NFSV4_2(op__commit__start, struct compound_state *, cs,
1466             COMMIT4args *, args);
1467 
1468         if (vp == NULL) {
1469                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1470                 goto out;
1471         }
1472         if (cs->access == CS_ACCESS_DENIED) {
1473                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
1474                 goto out;
1475         }
1476 
1477         if (args->offset + args->count < args->offset) {
1478                 *cs->statusp = resp->status = NFS4ERR_INVAL;
1479                 goto out;
1480         }
1481 
1482         va.va_mask = AT_UID;
1483         error = VOP_GETATTR(vp, &va, 0, cr, NULL);
1484 
1485         /*
1486          * If we can't get the attributes, then we can't do the
1487          * right access checking.  So, we'll fail the request.
1488          */
1489         if (error) {
1490                 *cs->statusp = resp->status = puterrno4(error);
1491                 goto out;
1492         }
1493         if (rdonly4(req, cs)) {
1494                 *cs->statusp = resp->status = NFS4ERR_ROFS;
1495                 goto out;
1496         }
1497 
1498         if (vp->v_type != VREG) {
1499                 if (vp->v_type == VDIR)
1500                         resp->status = NFS4ERR_ISDIR;
1501                 else
1502                         resp->status = NFS4ERR_INVAL;
1503                 *cs->statusp = resp->status;
1504                 goto out;
1505         }
1506 
1507         if (crgetuid(cr) != va.va_uid &&
1508             (error = VOP_ACCESS(vp, VWRITE, 0, cs->cr, NULL))) {
1509                 *cs->statusp = resp->status = puterrno4(error);
1510                 goto out;
1511         }
1512 
1513         error = VOP_FSYNC(vp, FSYNC, cr, NULL);
1514 
1515         if (error) {
1516                 *cs->statusp = resp->status = puterrno4(error);
1517                 goto out;
1518         }
1519 
1520         nsrv4 = zone_getspecific(rfs4_zone_key, curzone);
1521         *cs->statusp = resp->status = NFS4_OK;
1522         resp->writeverf = nsrv4->write4verf;
1523 out:
1524         DTRACE_NFSV4_2(op__commit__done, struct compound_state *, cs,
1525             COMMIT4res *, resp);
1526 }
1527 
1528 /*
1529  * rfs4_op_mknod is called from rfs4_op_create after all initial verification
1530  * was completed. It does the nfsv4 create for special files.
1531  */
1532 /* ARGSUSED */
1533 static vnode_t *
1534 do_rfs4_op_mknod(CREATE4args *args, CREATE4res *resp, struct svc_req *req,
1535     struct compound_state *cs, vattr_t *vap, char *nm)
1536 {
1537         int error;
1538         cred_t *cr = cs->cr;
1539         vnode_t *dvp = cs->vp;
1540         vnode_t *vp = NULL;
1541         int mode;
1542         enum vcexcl excl;
1543 
1544         switch (args->type) {
1545         case NF4CHR:
1546         case NF4BLK:
1547                 if (secpolicy_sys_devices(cr) != 0) {
1548                         *cs->statusp = resp->status = NFS4ERR_PERM;
1549                         return (NULL);
1550                 }
1551                 if (args->type == NF4CHR)
1552                         vap->va_type = VCHR;
1553                 else
1554                         vap->va_type = VBLK;
1555                 vap->va_rdev = makedevice(args->ftype4_u.devdata.specdata1,
1556                     args->ftype4_u.devdata.specdata2);
1557                 vap->va_mask |= AT_RDEV;
1558                 break;
1559         case NF4SOCK:
1560                 vap->va_type = VSOCK;
1561                 break;
1562         case NF4FIFO:
1563                 vap->va_type = VFIFO;
1564                 break;
1565         default:
1566                 *cs->statusp = resp->status = NFS4ERR_BADTYPE;
1567                 return (NULL);
1568         }
1569 
1570         /*
1571          * Must specify the mode.
1572          */
1573         if (!(vap->va_mask & AT_MODE)) {
1574                 *cs->statusp = resp->status = NFS4ERR_INVAL;
1575                 return (NULL);
1576         }
1577 
1578         excl = EXCL;
1579 
1580         mode = 0;
1581 
1582         error = VOP_CREATE(dvp, nm, vap, excl, mode, &vp, cr, 0, NULL, NULL);
1583         if (error) {
1584                 *cs->statusp = resp->status = puterrno4(error);
1585                 return (NULL);
1586         }
1587         return (vp);
1588 }
1589 
1590 /*
1591  * nfsv4 create is used to create non-regular files. For regular files,
1592  * use nfsv4 open.
1593  */
1594 /* ARGSUSED */
1595 static void
1596 rfs4_op_create(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1597     struct compound_state *cs)
1598 {
1599         CREATE4args *args = &argop->nfs_argop4_u.opcreate;
1600         CREATE4res *resp = &resop->nfs_resop4_u.opcreate;
1601         int error;
1602         struct vattr bva, iva, iva2, ava, *vap;
1603         cred_t *cr = cs->cr;
1604         vnode_t *dvp = cs->vp;
1605         vnode_t *vp = NULL;
1606         vnode_t *realvp;
1607         char *nm, *lnm;
1608         uint_t len, llen;
1609         int syncval = 0;
1610         struct nfs4_svgetit_arg sarg;
1611         struct nfs4_ntov_table ntov;
1612         struct statvfs64 sb;
1613         nfsstat4 status;
1614         struct sockaddr *ca;
1615         char *name = NULL;
1616         char *lname = NULL;
1617 
1618         DTRACE_NFSV4_2(op__create__start, struct compound_state *, cs,
1619             CREATE4args *, args);
1620 
1621         resp->attrset = 0;
1622 
1623         if (dvp == NULL) {
1624                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1625                 goto out;
1626         }
1627 
1628         /*
1629          * If there is an unshared filesystem mounted on this vnode,
1630          * do not allow to create an object in this directory.
1631          */
1632         if (vn_ismntpt(dvp)) {
1633                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
1634                 goto out;
1635         }
1636 
1637         /* Verify that type is correct */
1638         switch (args->type) {
1639         case NF4LNK:
1640         case NF4BLK:
1641         case NF4CHR:
1642         case NF4SOCK:
1643         case NF4FIFO:
1644         case NF4DIR:
1645                 break;
1646         default:
1647                 *cs->statusp = resp->status = NFS4ERR_BADTYPE;
1648                 goto out;
1649         };
1650 
1651         if (cs->access == CS_ACCESS_DENIED) {
1652                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
1653                 goto out;
1654         }
1655         if (dvp->v_type != VDIR) {
1656                 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
1657                 goto out;
1658         }
1659         status = utf8_dir_verify(&args->objname);
1660         if (status != NFS4_OK) {
1661                 *cs->statusp = resp->status = status;
1662                 goto out;
1663         }
1664 
1665         if (rdonly4(req, cs)) {
1666                 *cs->statusp = resp->status = NFS4ERR_ROFS;
1667                 goto out;
1668         }
1669 
1670         /*
1671          * Name of newly created object
1672          */
1673         nm = utf8_to_fn(&args->objname, &len, NULL);
1674         if (nm == NULL) {
1675                 *cs->statusp = resp->status = NFS4ERR_INVAL;
1676                 goto out;
1677         }
1678 
1679         if (len > MAXNAMELEN) {
1680                 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
1681                 kmem_free(nm, len);
1682                 goto out;
1683         }
1684 
1685         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
1686         name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
1687             MAXPATHLEN  + 1);
1688 
1689         if (name == NULL) {
1690                 *cs->statusp = resp->status = NFS4ERR_INVAL;
1691                 kmem_free(nm, len);
1692                 goto out;
1693         }
1694 
1695         resp->attrset = 0;
1696 
1697         sarg.sbp = &sb;
1698         sarg.is_referral = B_FALSE;
1699         nfs4_ntov_table_init(&ntov);
1700 
1701         status = do_rfs4_set_attrs(&resp->attrset,
1702             &args->createattrs, cs, &sarg, &ntov, NFS4ATTR_SETIT);
1703 
1704         if (sarg.vap->va_mask == 0 && status == NFS4_OK)
1705                 status = NFS4ERR_INVAL;
1706 
1707         if (status != NFS4_OK) {
1708                 *cs->statusp = resp->status = status;
1709                 if (name != nm)
1710                         kmem_free(name, MAXPATHLEN + 1);
1711                 kmem_free(nm, len);
1712                 nfs4_ntov_table_free(&ntov, &sarg);
1713                 resp->attrset = 0;
1714                 goto out;
1715         }
1716 
1717         /* Get "before" change value */
1718         bva.va_mask = AT_CTIME|AT_SEQ|AT_MODE;
1719         error = VOP_GETATTR(dvp, &bva, 0, cr, NULL);
1720         if (error) {
1721                 *cs->statusp = resp->status = puterrno4(error);
1722                 if (name != nm)
1723                         kmem_free(name, MAXPATHLEN + 1);
1724                 kmem_free(nm, len);
1725                 nfs4_ntov_table_free(&ntov, &sarg);
1726                 resp->attrset = 0;
1727                 goto out;
1728         }
1729         NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bva.va_ctime)
1730 
1731         vap = sarg.vap;
1732 
1733         /*
1734          * Set the default initial values for attributes when the parent
1735          * directory does not have the VSUID/VSGID bit set and they have
1736          * not been specified in createattrs.
1737          */
1738         if (!(bva.va_mode & VSUID) && (vap->va_mask & AT_UID) == 0) {
1739                 vap->va_uid = crgetuid(cr);
1740                 vap->va_mask |= AT_UID;
1741         }
1742         if (!(bva.va_mode & VSGID) && (vap->va_mask & AT_GID) == 0) {
1743                 vap->va_gid = crgetgid(cr);
1744                 vap->va_mask |= AT_GID;
1745         }
1746 
1747         vap->va_mask |= AT_TYPE;
1748         switch (args->type) {
1749         case NF4DIR:
1750                 vap->va_type = VDIR;
1751                 if ((vap->va_mask & AT_MODE) == 0) {
1752                         vap->va_mode = 0700; /* default: owner rwx only */
1753                         vap->va_mask |= AT_MODE;
1754                 }
1755                 error = VOP_MKDIR(dvp, name, vap, &vp, cr, NULL, 0, NULL);
1756                 if (error)
1757                         break;
1758 
1759                 /*
1760                  * Get the initial "after" sequence number, if it fails,
1761                  * set to zero
1762                  */
1763                 iva.va_mask = AT_SEQ;
1764                 if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL))
1765                         iva.va_seq = 0;
1766                 break;
1767         case NF4LNK:
1768                 vap->va_type = VLNK;
1769                 if ((vap->va_mask & AT_MODE) == 0) {
1770                         vap->va_mode = 0700; /* default: owner rwx only */
1771                         vap->va_mask |= AT_MODE;
1772                 }
1773 
1774                 /*
1775                  * symlink names must be treated as data
1776                  */
1777                 lnm = utf8_to_str((utf8string *)&args->ftype4_u.linkdata,
1778                     &llen, NULL);
1779 
1780                 if (lnm == NULL) {
1781                         *cs->statusp = resp->status = NFS4ERR_INVAL;
1782                         if (name != nm)
1783                                 kmem_free(name, MAXPATHLEN + 1);
1784                         kmem_free(nm, len);
1785                         nfs4_ntov_table_free(&ntov, &sarg);
1786                         resp->attrset = 0;
1787                         goto out;
1788                 }
1789 
1790                 if (llen > MAXPATHLEN) {
1791                         *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
1792                         if (name != nm)
1793                                 kmem_free(name, MAXPATHLEN + 1);
1794                         kmem_free(nm, len);
1795                         kmem_free(lnm, llen);
1796                         nfs4_ntov_table_free(&ntov, &sarg);
1797                         resp->attrset = 0;
1798                         goto out;
1799                 }
1800 
1801                 lname = nfscmd_convname(ca, cs->exi, lnm,
1802                     NFSCMD_CONV_INBOUND, MAXPATHLEN  + 1);
1803 
1804                 if (lname == NULL) {
1805                         *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
1806                         if (name != nm)
1807                                 kmem_free(name, MAXPATHLEN + 1);
1808                         kmem_free(nm, len);
1809                         kmem_free(lnm, llen);
1810                         nfs4_ntov_table_free(&ntov, &sarg);
1811                         resp->attrset = 0;
1812                         goto out;
1813                 }
1814 
1815                 error = VOP_SYMLINK(dvp, name, vap, lname, cr, NULL, 0);
1816                 if (lname != lnm)
1817                         kmem_free(lname, MAXPATHLEN + 1);
1818                 kmem_free(lnm, llen);
1819                 if (error)
1820                         break;
1821 
1822                 /*
1823                  * Get the initial "after" sequence number, if it fails,
1824                  * set to zero
1825                  */
1826                 iva.va_mask = AT_SEQ;
1827                 if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL))
1828                         iva.va_seq = 0;
1829 
1830                 error = VOP_LOOKUP(dvp, name, &vp, NULL, 0, NULL, cr,
1831                     NULL, NULL, NULL);
1832                 if (error)
1833                         break;
1834 
1835                 /*
1836                  * va_seq is not safe over VOP calls, check it again
1837                  * if it has changed zero out iva to force atomic = FALSE.
1838                  */
1839                 iva2.va_mask = AT_SEQ;
1840                 if (VOP_GETATTR(dvp, &iva2, 0, cs->cr, NULL) ||
1841                     iva2.va_seq != iva.va_seq)
1842                         iva.va_seq = 0;
1843                 break;
1844         default:
1845                 /*
1846                  * probably a special file.
1847                  */
1848                 if ((vap->va_mask & AT_MODE) == 0) {
1849                         vap->va_mode = 0600; /* default: owner rw only */
1850                         vap->va_mask |= AT_MODE;
1851                 }
1852                 syncval = FNODSYNC;
1853                 /*
1854                  * We know this will only generate one VOP call
1855                  */
1856                 vp = do_rfs4_op_mknod(args, resp, req, cs, vap, name);
1857 
1858                 if (vp == NULL) {
1859                         if (name != nm)
1860                                 kmem_free(name, MAXPATHLEN + 1);
1861                         kmem_free(nm, len);
1862                         nfs4_ntov_table_free(&ntov, &sarg);
1863                         resp->attrset = 0;
1864                         goto out;
1865                 }
1866 
1867                 /*
1868                  * Get the initial "after" sequence number, if it fails,
1869                  * set to zero
1870                  */
1871                 iva.va_mask = AT_SEQ;
1872                 if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL))
1873                         iva.va_seq = 0;
1874 
1875                 break;
1876         }
1877         if (name != nm)
1878                 kmem_free(name, MAXPATHLEN + 1);
1879         kmem_free(nm, len);
1880 
1881         if (error) {
1882                 *cs->statusp = resp->status = puterrno4(error);
1883         }
1884 
1885         /*
1886          * Force modified data and metadata out to stable storage.
1887          */
1888         (void) VOP_FSYNC(dvp, 0, cr, NULL);
1889 
1890         if (resp->status != NFS4_OK) {
1891                 if (vp != NULL)
1892                         VN_RELE(vp);
1893                 nfs4_ntov_table_free(&ntov, &sarg);
1894                 resp->attrset = 0;
1895                 goto out;
1896         }
1897 
1898         /*
1899          * Finish setup of cinfo response, "before" value already set.
1900          * Get "after" change value, if it fails, simply return the
1901          * before value.
1902          */
1903         ava.va_mask = AT_CTIME|AT_SEQ;
1904         if (VOP_GETATTR(dvp, &ava, 0, cr, NULL)) {
1905                 ava.va_ctime = bva.va_ctime;
1906                 ava.va_seq = 0;
1907         }
1908         NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, ava.va_ctime);
1909 
1910         /*
1911          * True verification that object was created with correct
1912          * attrs is impossible.  The attrs could have been changed
1913          * immediately after object creation.  If attributes did
1914          * not verify, the only recourse for the server is to
1915          * destroy the object.  Maybe if some attrs (like gid)
1916          * are set incorrectly, the object should be destroyed;
1917          * however, seems bad as a default policy.  Do we really
1918          * want to destroy an object over one of the times not
1919          * verifying correctly?  For these reasons, the server
1920          * currently sets bits in attrset for createattrs
1921          * that were set; however, no verification is done.
1922          *
1923          * vmask_to_nmask accounts for vattr bits set on create
1924          *      [do_rfs4_set_attrs() only sets resp bits for
1925          *       non-vattr/vfs bits.]
1926          * Mask off any bits set by default so as not to return
1927          * more attrset bits than were requested in createattrs
1928          */
1929         nfs4_vmask_to_nmask(sarg.vap->va_mask, &resp->attrset);
1930         resp->attrset &= args->createattrs.attrmask;
1931         nfs4_ntov_table_free(&ntov, &sarg);
1932 
1933         error = makefh4(&cs->fh, vp, cs->exi);
1934         if (error) {
1935                 *cs->statusp = resp->status = puterrno4(error);
1936         }
1937 
1938         /*
1939          * The cinfo.atomic = TRUE only if we got no errors, we have
1940          * non-zero va_seq's, and it has incremented by exactly one
1941          * during the creation and it didn't change during the VOP_LOOKUP
1942          * or VOP_FSYNC.
1943          */
1944         if (!error && bva.va_seq && iva.va_seq && ava.va_seq &&
1945             iva.va_seq == (bva.va_seq + 1) && iva.va_seq == ava.va_seq)
1946                 resp->cinfo.atomic = TRUE;
1947         else
1948                 resp->cinfo.atomic = FALSE;
1949 
1950         /*
1951          * Force modified metadata out to stable storage.
1952          *
1953          * if a underlying vp exists, pass it to VOP_FSYNC
1954          */
1955         if (VOP_REALVP(vp, &realvp, NULL) == 0)
1956                 (void) VOP_FSYNC(realvp, syncval, cr, NULL);
1957         else
1958                 (void) VOP_FSYNC(vp, syncval, cr, NULL);
1959 
1960         if (resp->status != NFS4_OK) {
1961                 VN_RELE(vp);
1962                 goto out;
1963         }
1964         if (cs->vp)
1965                 VN_RELE(cs->vp);
1966 
1967         cs->vp = vp;
1968         *cs->statusp = resp->status = NFS4_OK;
1969 out:
1970         DTRACE_NFSV4_2(op__create__done, struct compound_state *, cs,
1971             CREATE4res *, resp);
1972 }
1973 
1974 /*ARGSUSED*/
1975 static void
1976 rfs4_op_delegpurge(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1977     struct compound_state *cs)
1978 {
1979         DTRACE_NFSV4_2(op__delegpurge__start, struct compound_state *, cs,
1980             DELEGPURGE4args *, &argop->nfs_argop4_u.opdelegpurge);
1981 
1982         rfs4_op_inval(argop, resop, req, cs);
1983 
1984         DTRACE_NFSV4_2(op__delegpurge__done, struct compound_state *, cs,
1985             DELEGPURGE4res *, &resop->nfs_resop4_u.opdelegpurge);
1986 }
1987 
1988 /*ARGSUSED*/
1989 static void
1990 rfs4_op_delegreturn(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1991     struct compound_state *cs)
1992 {
1993         DELEGRETURN4args *args = &argop->nfs_argop4_u.opdelegreturn;
1994         DELEGRETURN4res *resp = &resop->nfs_resop4_u.opdelegreturn;
1995         rfs4_deleg_state_t *dsp;
1996         nfsstat4 status;
1997 
1998         DTRACE_NFSV4_2(op__delegreturn__start, struct compound_state *, cs,
1999             DELEGRETURN4args *, args);
2000 
2001         status = rfs4_get_deleg_state(&args->deleg_stateid, &dsp);
2002         resp->status = *cs->statusp = status;
2003         if (status != NFS4_OK)
2004                 goto out;
2005 
2006         /* Ensure specified filehandle matches */
2007         if (cs->vp != dsp->rds_finfo->rf_vp) {
2008                 resp->status = *cs->statusp = NFS4ERR_BAD_STATEID;
2009         } else
2010                 rfs4_return_deleg(dsp, FALSE);
2011 
2012         rfs4_update_lease(dsp->rds_client);
2013 
2014         rfs4_deleg_state_rele(dsp);
2015 out:
2016         DTRACE_NFSV4_2(op__delegreturn__done, struct compound_state *, cs,
2017             DELEGRETURN4res *, resp);
2018 }
2019 
2020 /*
2021  * Check to see if a given "flavor" is an explicitly shared flavor.
2022  * The assumption of this routine is the "flavor" is already a valid
2023  * flavor in the secinfo list of "exi".
2024  *
2025  *      e.g.
2026  *              # share -o sec=flavor1 /export
2027  *              # share -o sec=flavor2 /export/home
2028  *
2029  *              flavor2 is not an explicitly shared flavor for /export,
2030  *              however it is in the secinfo list for /export thru the
2031  *              server namespace setup.
2032  */
2033 int
2034 is_exported_sec(int flavor, struct exportinfo *exi)
2035 {
2036         int     i;
2037         struct secinfo *sp;
2038 
2039         sp = exi->exi_export.ex_secinfo;
2040         for (i = 0; i < exi->exi_export.ex_seccnt; i++) {
2041                 if (flavor == sp[i].s_secinfo.sc_nfsnum ||
2042                     sp[i].s_secinfo.sc_nfsnum == AUTH_NONE) {
2043                         return (SEC_REF_EXPORTED(&sp[i]));
2044                 }
2045         }
2046 
2047         /* Should not reach this point based on the assumption */
2048         return (0);
2049 }
2050 
2051 /*
2052  * Check if the security flavor used in the request matches what is
2053  * required at the export point or at the root pseudo node (exi_root).
2054  *
2055  * returns 1 if there's a match or if exported with AUTH_NONE; 0 otherwise.
2056  *
2057  */
2058 static int
2059 secinfo_match_or_authnone(struct compound_state *cs)
2060 {
2061         int     i;
2062         struct secinfo *sp;
2063 
2064         /*
2065          * Check cs->nfsflavor (from the request) against
2066          * the current export data in cs->exi.
2067          */
2068         sp = cs->exi->exi_export.ex_secinfo;
2069         for (i = 0; i < cs->exi->exi_export.ex_seccnt; i++) {
2070                 if (cs->nfsflavor == sp[i].s_secinfo.sc_nfsnum ||
2071                     sp[i].s_secinfo.sc_nfsnum == AUTH_NONE)
2072                         return (1);
2073         }
2074 
2075         return (0);
2076 }
2077 
2078 /*
2079  * Check the access authority for the client and return the correct error.
2080  */
2081 nfsstat4
2082 call_checkauth4(struct compound_state *cs, struct svc_req *req)
2083 {
2084         int     authres;
2085 
2086         /*
2087          * First, check if the security flavor used in the request
2088          * are among the flavors set in the server namespace.
2089          */
2090         if (!secinfo_match_or_authnone(cs)) {
2091                 *cs->statusp = NFS4ERR_WRONGSEC;
2092                 return (*cs->statusp);
2093         }
2094 
2095         authres = checkauth4(cs, req);
2096 
2097         if (authres > 0) {
2098                 *cs->statusp = NFS4_OK;
2099                 if (! (cs->access & CS_ACCESS_LIMITED))
2100                         cs->access = CS_ACCESS_OK;
2101         } else if (authres == 0) {
2102                 *cs->statusp = NFS4ERR_ACCESS;
2103         } else if (authres == -2) {
2104                 *cs->statusp = NFS4ERR_WRONGSEC;
2105         } else {
2106                 *cs->statusp = NFS4ERR_DELAY;
2107         }
2108         return (*cs->statusp);
2109 }
2110 
2111 /*
2112  * bitmap4_to_attrmask is called by getattr and readdir.
2113  * It sets up the vattr mask and determines whether vfsstat call is needed
2114  * based on the input bitmap.
2115  * Returns nfsv4 status.
2116  */
2117 static nfsstat4
2118 bitmap4_to_attrmask(bitmap4 breq, struct nfs4_svgetit_arg *sargp)
2119 {
2120         int i;
2121         uint_t  va_mask;
2122         struct statvfs64 *sbp = sargp->sbp;
2123 
2124         sargp->sbp = NULL;
2125         sargp->flag = 0;
2126         sargp->rdattr_error = NFS4_OK;
2127         sargp->mntdfid_set = FALSE;
2128         if (sargp->cs->vp)
2129                 sargp->xattr = get_fh4_flag(&sargp->cs->fh,
2130                     FH4_ATTRDIR | FH4_NAMEDATTR);
2131         else
2132                 sargp->xattr = 0;
2133 
2134         /*
2135          * Set rdattr_error_req to true if return error per
2136          * failed entry rather than fail the readdir.
2137          */
2138         if (breq & FATTR4_RDATTR_ERROR_MASK)
2139                 sargp->rdattr_error_req = 1;
2140         else
2141                 sargp->rdattr_error_req = 0;
2142 
2143         /*
2144          * generate the va_mask
2145          * Handle the easy cases first
2146          */
2147         switch (breq) {
2148         case NFS4_NTOV_ATTR_MASK:
2149                 sargp->vap->va_mask = NFS4_NTOV_ATTR_AT_MASK;
2150                 return (NFS4_OK);
2151 
2152         case NFS4_FS_ATTR_MASK:
2153                 sargp->vap->va_mask = NFS4_FS_ATTR_AT_MASK;
2154                 sargp->sbp = sbp;
2155                 return (NFS4_OK);
2156 
2157         case NFS4_NTOV_ATTR_CACHE_MASK:
2158                 sargp->vap->va_mask = NFS4_NTOV_ATTR_CACHE_AT_MASK;
2159                 return (NFS4_OK);
2160 
2161         case FATTR4_LEASE_TIME_MASK:
2162                 sargp->vap->va_mask = 0;
2163                 return (NFS4_OK);
2164 
2165         default:
2166                 va_mask = 0;
2167                 for (i = 0; i < nfs4_ntov_map_size; i++) {
2168                         if ((breq & nfs4_ntov_map[i].fbit) &&
2169                             nfs4_ntov_map[i].vbit)
2170                                 va_mask |= nfs4_ntov_map[i].vbit;
2171                 }
2172 
2173                 /*
2174                  * Check is vfsstat is needed
2175                  */
2176                 if (breq & NFS4_FS_ATTR_MASK)
2177                         sargp->sbp = sbp;
2178 
2179                 sargp->vap->va_mask = va_mask;
2180                 return (NFS4_OK);
2181         }
2182         /* NOTREACHED */
2183 }
2184 
2185 /*
2186  * bitmap4_get_sysattrs is called by getattr and readdir.
2187  * It calls both VOP_GETATTR and VFS_STATVFS calls to get the attrs.
2188  * Returns nfsv4 status.
2189  */
2190 static nfsstat4
2191 bitmap4_get_sysattrs(struct nfs4_svgetit_arg *sargp)
2192 {
2193         int error;
2194         struct compound_state *cs = sargp->cs;
2195         vnode_t *vp = cs->vp;
2196 
2197         if (sargp->sbp != NULL) {
2198                 if (error = VFS_STATVFS(vp->v_vfsp, sargp->sbp)) {
2199                         sargp->sbp = NULL;   /* to identify error */
2200                         return (puterrno4(error));
2201                 }
2202         }
2203 
2204         return (rfs4_vop_getattr(vp, sargp->vap, 0, cs->cr));
2205 }
2206 
2207 static void
2208 nfs4_ntov_table_init(struct nfs4_ntov_table *ntovp)
2209 {
2210         ntovp->na = kmem_zalloc(sizeof (union nfs4_attr_u) * nfs4_ntov_map_size,
2211             KM_SLEEP);
2212         ntovp->attrcnt = 0;
2213         ntovp->vfsstat = FALSE;
2214 }
2215 
2216 static void
2217 nfs4_ntov_table_free(struct nfs4_ntov_table *ntovp,
2218     struct nfs4_svgetit_arg *sargp)
2219 {
2220         int i;
2221         union nfs4_attr_u *na;
2222         uint8_t *amap;
2223 
2224         /*
2225          * XXX Should do the same checks for whether the bit is set
2226          */
2227         for (i = 0, na = ntovp->na, amap = ntovp->amap;
2228             i < ntovp->attrcnt; i++, na++, amap++) {
2229                 (void) (*nfs4_ntov_map[*amap].sv_getit)(
2230                     NFS4ATTR_FREEIT, sargp, na);
2231         }
2232         if ((sargp->op == NFS4ATTR_SETIT) || (sargp->op == NFS4ATTR_VERIT)) {
2233                 /*
2234                  * xdr_free for getattr will be done later
2235                  */
2236                 for (i = 0, na = ntovp->na, amap = ntovp->amap;
2237                     i < ntovp->attrcnt; i++, na++, amap++) {
2238                         xdr_free(nfs4_ntov_map[*amap].xfunc, (caddr_t)na);
2239                 }
2240         }
2241         kmem_free(ntovp->na, sizeof (union nfs4_attr_u) * nfs4_ntov_map_size);
2242 }
2243 
2244 /*
2245  * do_rfs4_op_getattr gets the system attrs and converts into fattr4.
2246  */
2247 static nfsstat4
2248 do_rfs4_op_getattr(bitmap4 breq, fattr4 *fattrp,
2249     struct nfs4_svgetit_arg *sargp)
2250 {
2251         int error = 0;
2252         int i, k;
2253         struct nfs4_ntov_table ntov;
2254         XDR xdr;
2255         ulong_t xdr_size;
2256         char *xdr_attrs;
2257         nfsstat4 status = NFS4_OK;
2258         nfsstat4 prev_rdattr_error = sargp->rdattr_error;
2259         union nfs4_attr_u *na;
2260         uint8_t *amap;
2261 
2262         sargp->op = NFS4ATTR_GETIT;
2263         sargp->flag = 0;
2264 
2265         fattrp->attrmask = 0;
2266         /* if no bits requested, then return empty fattr4 */
2267         if (breq == 0) {
2268                 fattrp->attrlist4_len = 0;
2269                 fattrp->attrlist4 = NULL;
2270                 return (NFS4_OK);
2271         }
2272 
2273         /*
2274          * return NFS4ERR_INVAL when client requests write-only attrs
2275          */
2276         if (breq & (FATTR4_TIME_ACCESS_SET_MASK | FATTR4_TIME_MODIFY_SET_MASK))
2277                 return (NFS4ERR_INVAL);
2278 
2279         nfs4_ntov_table_init(&ntov);
2280         na = ntov.na;
2281         amap = ntov.amap;
2282 
2283         /*
2284          * Now loop to get or verify the attrs
2285          */
2286         for (i = 0; i < nfs4_ntov_map_size; i++) {
2287                 if (breq & nfs4_ntov_map[i].fbit) {
2288                         if ((*nfs4_ntov_map[i].sv_getit)(
2289                             NFS4ATTR_SUPPORTED, sargp, NULL) == 0) {
2290 
2291                                 error = (*nfs4_ntov_map[i].sv_getit)(
2292                                     NFS4ATTR_GETIT, sargp, na);
2293 
2294                                 /*
2295                                  * Possible error values:
2296                                  * >0 if sv_getit failed to
2297                                  * get the attr; 0 if succeeded;
2298                                  * <0 if rdattr_error and the
2299                                  * attribute cannot be returned.
2300                                  */
2301                                 if (error && !(sargp->rdattr_error_req))
2302                                         goto done;
2303                                 /*
2304                                  * If error then just for entry
2305                                  */
2306                                 if (error == 0) {
2307                                         fattrp->attrmask |=
2308                                             nfs4_ntov_map[i].fbit;
2309                                         *amap++ =
2310                                             (uint8_t)nfs4_ntov_map[i].nval;
2311                                         na++;
2312                                         (ntov.attrcnt)++;
2313                                 } else if ((error > 0) &&
2314                                     (sargp->rdattr_error == NFS4_OK)) {
2315                                         sargp->rdattr_error = puterrno4(error);
2316                                 }
2317                                 error = 0;
2318                         }
2319                 }
2320         }
2321 
2322         /*
2323          * If rdattr_error was set after the return value for it was assigned,
2324          * update it.
2325          */
2326         if (prev_rdattr_error != sargp->rdattr_error) {
2327                 na = ntov.na;
2328                 amap = ntov.amap;
2329                 for (i = 0; i < ntov.attrcnt; i++, na++, amap++) {
2330                         k = *amap;
2331                         if (k < FATTR4_RDATTR_ERROR) {
2332                                 continue;
2333                         }
2334                         if ((k == FATTR4_RDATTR_ERROR) &&
2335                             ((*nfs4_ntov_map[k].sv_getit)(
2336                             NFS4ATTR_SUPPORTED, sargp, NULL) == 0)) {
2337 
2338                                 (void) (*nfs4_ntov_map[k].sv_getit)(
2339                                     NFS4ATTR_GETIT, sargp, na);
2340                         }
2341                         break;
2342                 }
2343         }
2344 
2345         xdr_size = 0;
2346         na = ntov.na;
2347         amap = ntov.amap;
2348         for (i = 0; i < ntov.attrcnt; i++, na++, amap++) {
2349                 xdr_size += xdr_sizeof(nfs4_ntov_map[*amap].xfunc, na);
2350         }
2351 
2352         fattrp->attrlist4_len = xdr_size;
2353         if (xdr_size) {
2354                 /* freed by rfs4_op_getattr_free() */
2355                 fattrp->attrlist4 = xdr_attrs = kmem_zalloc(xdr_size, KM_SLEEP);
2356 
2357                 xdrmem_create(&xdr, xdr_attrs, xdr_size, XDR_ENCODE);
2358 
2359                 na = ntov.na;
2360                 amap = ntov.amap;
2361                 for (i = 0; i < ntov.attrcnt; i++, na++, amap++) {
2362                         if (!(*nfs4_ntov_map[*amap].xfunc)(&xdr, na)) {
2363                                 DTRACE_PROBE1(nfss__e__getattr4_encfail,
2364                                     int, *amap);
2365                                 status = NFS4ERR_SERVERFAULT;
2366                                 break;
2367                         }
2368                 }
2369                 /* xdrmem_destroy(&xdrs); */        /* NO-OP */
2370         } else {
2371                 fattrp->attrlist4 = NULL;
2372         }
2373 done:
2374 
2375         nfs4_ntov_table_free(&ntov, sargp);
2376 
2377         if (error != 0)
2378                 status = puterrno4(error);
2379 
2380         return (status);
2381 }
2382 
2383 /* ARGSUSED */
2384 static void
2385 rfs4_op_getattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2386     struct compound_state *cs)
2387 {
2388         GETATTR4args *args = &argop->nfs_argop4_u.opgetattr;
2389         GETATTR4res *resp = &resop->nfs_resop4_u.opgetattr;
2390         struct nfs4_svgetit_arg sarg;
2391         struct statvfs64 sb;
2392         nfsstat4 status;
2393 
2394         DTRACE_NFSV4_2(op__getattr__start, struct compound_state *, cs,
2395             GETATTR4args *, args);
2396 
2397         if (cs->vp == NULL) {
2398                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2399                 goto out;
2400         }
2401 
2402         if (cs->access == CS_ACCESS_DENIED) {
2403                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
2404                 goto out;
2405         }
2406 
2407         sarg.sbp = &sb;
2408         sarg.cs = cs;
2409         sarg.is_referral = B_FALSE;
2410 
2411         status = bitmap4_to_attrmask(args->attr_request, &sarg);
2412         if (status == NFS4_OK) {
2413 
2414                 status = bitmap4_get_sysattrs(&sarg);
2415                 if (status == NFS4_OK) {
2416 
2417                         /* Is this a referral? */
2418                         if (vn_is_nfs_reparse(cs->vp, cs->cr)) {
2419                                 /* Older V4 Solaris client sees a link */
2420                                 if (client_is_downrev(req))
2421                                         sarg.vap->va_type = VLNK;
2422                                 else
2423                                         sarg.is_referral = B_TRUE;
2424                         }
2425 
2426                         status = do_rfs4_op_getattr(args->attr_request,
2427                             &resp->obj_attributes, &sarg);
2428                 }
2429         }
2430         *cs->statusp = resp->status = status;
2431 out:
2432         DTRACE_NFSV4_2(op__getattr__done, struct compound_state *, cs,
2433             GETATTR4res *, resp);
2434 }
2435 
2436 static void
2437 rfs4_op_getattr_free(nfs_resop4 *resop)
2438 {
2439         GETATTR4res *resp = &resop->nfs_resop4_u.opgetattr;
2440 
2441         nfs4_fattr4_free(&resp->obj_attributes);
2442 }
2443 
2444 /* ARGSUSED */
2445 static void
2446 rfs4_op_getfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2447     struct compound_state *cs)
2448 {
2449         GETFH4res *resp = &resop->nfs_resop4_u.opgetfh;
2450 
2451         DTRACE_NFSV4_1(op__getfh__start, struct compound_state *, cs);
2452 
2453         if (cs->vp == NULL) {
2454                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2455                 goto out;
2456         }
2457         if (cs->access == CS_ACCESS_DENIED) {
2458                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
2459                 goto out;
2460         }
2461 
2462         /* check for reparse point at the share point */
2463         if (cs->exi->exi_moved || vn_is_nfs_reparse(cs->exi->exi_vp, cs->cr)) {
2464                 /* it's all bad */
2465                 cs->exi->exi_moved = 1;
2466                 *cs->statusp = resp->status = NFS4ERR_MOVED;
2467                 DTRACE_PROBE2(nfs4serv__func__referral__shared__moved,
2468                     vnode_t *, cs->vp, char *, "rfs4_op_getfh");
2469                 return;
2470         }
2471 
2472         /* check for reparse point at vp */
2473         if (vn_is_nfs_reparse(cs->vp, cs->cr) && !client_is_downrev(req)) {
2474                 /* it's not all bad */
2475                 *cs->statusp = resp->status = NFS4ERR_MOVED;
2476                 DTRACE_PROBE2(nfs4serv__func__referral__moved,
2477                     vnode_t *, cs->vp, char *, "rfs4_op_getfh");
2478                 return;
2479         }
2480 
2481         resp->object.nfs_fh4_val =
2482             kmem_alloc(cs->fh.nfs_fh4_len, KM_SLEEP);
2483         nfs_fh4_copy(&cs->fh, &resp->object);
2484         *cs->statusp = resp->status = NFS4_OK;
2485 out:
2486         DTRACE_NFSV4_2(op__getfh__done, struct compound_state *, cs,
2487             GETFH4res *, resp);
2488 }
2489 
2490 static void
2491 rfs4_op_getfh_free(nfs_resop4 *resop)
2492 {
2493         GETFH4res *resp = &resop->nfs_resop4_u.opgetfh;
2494 
2495         if (resp->status == NFS4_OK &&
2496             resp->object.nfs_fh4_val != NULL) {
2497                 kmem_free(resp->object.nfs_fh4_val, resp->object.nfs_fh4_len);
2498                 resp->object.nfs_fh4_val = NULL;
2499                 resp->object.nfs_fh4_len = 0;
2500         }
2501 }
2502 
2503 /*
2504  * illegal: args: void
2505  *          res : status (NFS4ERR_OP_ILLEGAL)
2506  */
2507 /* ARGSUSED */
2508 static void
2509 rfs4_op_illegal(nfs_argop4 *argop, nfs_resop4 *resop,
2510     struct svc_req *req, struct compound_state *cs)
2511 {
2512         ILLEGAL4res *resp = &resop->nfs_resop4_u.opillegal;
2513 
2514         resop->resop = OP_ILLEGAL;
2515         *cs->statusp = resp->status = NFS4ERR_OP_ILLEGAL;
2516 }
2517 
2518 /*
2519  * link: args: SAVED_FH: file, CURRENT_FH: target directory
2520  *       res: status. If success - CURRENT_FH unchanged, return change_info
2521  */
2522 /* ARGSUSED */
2523 static void
2524 rfs4_op_link(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2525     struct compound_state *cs)
2526 {
2527         LINK4args *args = &argop->nfs_argop4_u.oplink;
2528         LINK4res *resp = &resop->nfs_resop4_u.oplink;
2529         int error;
2530         vnode_t *vp;
2531         vnode_t *dvp;
2532         struct vattr bdva, idva, adva;
2533         char *nm;
2534         uint_t  len;
2535         struct sockaddr *ca;
2536         char *name = NULL;
2537         nfsstat4 status;
2538 
2539         DTRACE_NFSV4_2(op__link__start, struct compound_state *, cs,
2540             LINK4args *, args);
2541 
2542         /* SAVED_FH: source object */
2543         vp = cs->saved_vp;
2544         if (vp == NULL) {
2545                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2546                 goto out;
2547         }
2548 
2549         /* CURRENT_FH: target directory */
2550         dvp = cs->vp;
2551         if (dvp == NULL) {
2552                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2553                 goto out;
2554         }
2555 
2556         /*
2557          * If there is a non-shared filesystem mounted on this vnode,
2558          * do not allow to link any file in this directory.
2559          */
2560         if (vn_ismntpt(dvp)) {
2561                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
2562                 goto out;
2563         }
2564 
2565         if (cs->access == CS_ACCESS_DENIED) {
2566                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
2567                 goto out;
2568         }
2569 
2570         /* Check source object's type validity */
2571         if (vp->v_type == VDIR) {
2572                 *cs->statusp = resp->status = NFS4ERR_ISDIR;
2573                 goto out;
2574         }
2575 
2576         /* Check target directory's type */
2577         if (dvp->v_type != VDIR) {
2578                 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
2579                 goto out;
2580         }
2581 
2582         if (cs->saved_exi != cs->exi) {
2583                 *cs->statusp = resp->status = NFS4ERR_XDEV;
2584                 goto out;
2585         }
2586 
2587         status = utf8_dir_verify(&args->newname);
2588         if (status != NFS4_OK) {
2589                 *cs->statusp = resp->status = status;
2590                 goto out;
2591         }
2592 
2593         nm = utf8_to_fn(&args->newname, &len, NULL);
2594         if (nm == NULL) {
2595                 *cs->statusp = resp->status = NFS4ERR_INVAL;
2596                 goto out;
2597         }
2598 
2599         if (len > MAXNAMELEN) {
2600                 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
2601                 kmem_free(nm, len);
2602                 goto out;
2603         }
2604 
2605         if (rdonly4(req, cs)) {
2606                 *cs->statusp = resp->status = NFS4ERR_ROFS;
2607                 kmem_free(nm, len);
2608                 goto out;
2609         }
2610 
2611         /* Get "before" change value */
2612         bdva.va_mask = AT_CTIME|AT_SEQ;
2613         error = VOP_GETATTR(dvp, &bdva, 0, cs->cr, NULL);
2614         if (error) {
2615                 *cs->statusp = resp->status = puterrno4(error);
2616                 kmem_free(nm, len);
2617                 goto out;
2618         }
2619 
2620         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2621         name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
2622             MAXPATHLEN  + 1);
2623 
2624         if (name == NULL) {
2625                 *cs->statusp = resp->status = NFS4ERR_INVAL;
2626                 kmem_free(nm, len);
2627                 goto out;
2628         }
2629 
2630         NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bdva.va_ctime)
2631 
2632         error = VOP_LINK(dvp, vp, name, cs->cr, NULL, 0);
2633 
2634         if (nm != name)
2635                 kmem_free(name, MAXPATHLEN + 1);
2636         kmem_free(nm, len);
2637 
2638         /*
2639          * Get the initial "after" sequence number, if it fails, set to zero
2640          */
2641         idva.va_mask = AT_SEQ;
2642         if (VOP_GETATTR(dvp, &idva, 0, cs->cr, NULL))
2643                 idva.va_seq = 0;
2644 
2645         /*
2646          * Force modified data and metadata out to stable storage.
2647          */
2648         (void) VOP_FSYNC(vp, FNODSYNC, cs->cr, NULL);
2649         (void) VOP_FSYNC(dvp, 0, cs->cr, NULL);
2650 
2651         if (error) {
2652                 *cs->statusp = resp->status = puterrno4(error);
2653                 goto out;
2654         }
2655 
2656         /*
2657          * Get "after" change value, if it fails, simply return the
2658          * before value.
2659          */
2660         adva.va_mask = AT_CTIME|AT_SEQ;
2661         if (VOP_GETATTR(dvp, &adva, 0, cs->cr, NULL)) {
2662                 adva.va_ctime = bdva.va_ctime;
2663                 adva.va_seq = 0;
2664         }
2665 
2666         NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, adva.va_ctime)
2667 
2668         /*
2669          * The cinfo.atomic = TRUE only if we have
2670          * non-zero va_seq's, and it has incremented by exactly one
2671          * during the VOP_LINK and it didn't change during the VOP_FSYNC.
2672          */
2673         if (bdva.va_seq && idva.va_seq && adva.va_seq &&
2674             idva.va_seq == (bdva.va_seq + 1) && idva.va_seq == adva.va_seq)
2675                 resp->cinfo.atomic = TRUE;
2676         else
2677                 resp->cinfo.atomic = FALSE;
2678 
2679         *cs->statusp = resp->status = NFS4_OK;
2680 out:
2681         DTRACE_NFSV4_2(op__link__done, struct compound_state *, cs,
2682             LINK4res *, resp);
2683 }
2684 
2685 /*
2686  * Used by rfs4_op_lookup and rfs4_op_lookupp to do the actual work.
2687  */
2688 
2689 /* ARGSUSED */
2690 static nfsstat4
2691 do_rfs4_op_lookup(char *nm, struct svc_req *req, struct compound_state *cs)
2692 {
2693         int error;
2694         int different_export = 0;
2695         vnode_t *vp, *pre_tvp = NULL, *oldvp = NULL;
2696         struct exportinfo *exi = NULL, *pre_exi = NULL;
2697         nfsstat4 stat;
2698         fid_t fid;
2699         int attrdir, dotdot, walk;
2700         bool_t is_newvp = FALSE;
2701 
2702         if (cs->vp->v_flag & V_XATTRDIR) {
2703                 attrdir = 1;
2704                 ASSERT(get_fh4_flag(&cs->fh, FH4_ATTRDIR));
2705         } else {
2706                 attrdir = 0;
2707                 ASSERT(! get_fh4_flag(&cs->fh, FH4_ATTRDIR));
2708         }
2709 
2710         dotdot = (nm[0] == '.' && nm[1] == '.' && nm[2] == '\0');
2711 
2712         /*
2713          * If dotdotting, then need to check whether it's
2714          * above the root of a filesystem, or above an
2715          * export point.
2716          */
2717         if (dotdot) {
2718 
2719                 /*
2720                  * If dotdotting at the root of a filesystem, then
2721                  * need to traverse back to the mounted-on filesystem
2722                  * and do the dotdot lookup there.
2723                  */
2724                 if ((cs->vp->v_flag & VROOT) || VN_IS_CURZONEROOT(cs->vp)) {
2725 
2726                         /*
2727                          * If at the system root, then can
2728                          * go up no further.
2729                          */
2730                         if (VN_CMP(cs->vp, ZONE_ROOTVP()))
2731                                 return (puterrno4(ENOENT));
2732 
2733                         /*
2734                          * Traverse back to the mounted-on filesystem
2735                          */
2736                         cs->vp = untraverse(cs->vp);
2737 
2738                         /*
2739                          * Set the different_export flag so we remember
2740                          * to pick up a new exportinfo entry for
2741                          * this new filesystem.
2742                          */
2743                         different_export = 1;
2744                 } else {
2745 
2746                         /*
2747                          * If dotdotting above an export point then set
2748                          * the different_export to get new export info.
2749                          */
2750                         different_export = nfs_exported(cs->exi, cs->vp);
2751                 }
2752         }
2753 
2754         error = VOP_LOOKUP(cs->vp, nm, &vp, NULL, 0, NULL, cs->cr,
2755             NULL, NULL, NULL);
2756         if (error)
2757                 return (puterrno4(error));
2758 
2759         /*
2760          * If the vnode is in a pseudo filesystem, check whether it is visible.
2761          *
2762          * XXX if the vnode is a symlink and it is not visible in
2763          * a pseudo filesystem, return ENOENT (not following symlink).
2764          * V4 client can not mount such symlink. This is a regression
2765          * from V2/V3.
2766          *
2767          * In the same exported filesystem, if the security flavor used
2768          * is not an explicitly shared flavor, limit the view to the visible
2769          * list entries only. This is not a WRONGSEC case because it's already
2770          * checked via PUTROOTFH/PUTPUBFH or PUTFH.
2771          */
2772         if (!different_export &&
2773             (PSEUDO(cs->exi) || ! is_exported_sec(cs->nfsflavor, cs->exi) ||
2774             cs->access & CS_ACCESS_LIMITED)) {
2775                 if (! nfs_visible(cs->exi, vp, &different_export)) {
2776                         VN_RELE(vp);
2777                         return (puterrno4(ENOENT));
2778                 }
2779         }
2780 
2781         /*
2782          * If it's a mountpoint, then traverse it.
2783          */
2784         if (vn_ismntpt(vp)) {
2785                 pre_exi = cs->exi;   /* save pre-traversed exportinfo */
2786                 pre_tvp = vp;           /* save pre-traversed vnode     */
2787 
2788                 /*
2789                  * hold pre_tvp to counteract rele by traverse.  We will
2790                  * need pre_tvp below if checkexport4 fails
2791                  */
2792                 VN_HOLD(pre_tvp);
2793                 if ((error = traverse(&vp)) != 0) {
2794                         VN_RELE(vp);
2795                         VN_RELE(pre_tvp);
2796                         return (puterrno4(error));
2797                 }
2798                 different_export = 1;
2799         } else if (vp->v_vfsp != cs->vp->v_vfsp) {
2800                 /*
2801                  * The vfsp comparison is to handle the case where
2802                  * a LOFS mount is shared.  lo_lookup traverses mount points,
2803                  * and NFS is unaware of local fs transistions because
2804                  * v_vfsmountedhere isn't set.  For this special LOFS case,
2805                  * the dir and the obj returned by lookup will have different
2806                  * vfs ptrs.
2807                  */
2808                 different_export = 1;
2809         }
2810 
2811         if (different_export) {
2812 
2813                 bzero(&fid, sizeof (fid));
2814                 fid.fid_len = MAXFIDSZ;
2815                 error = vop_fid_pseudo(vp, &fid);
2816                 if (error) {
2817                         VN_RELE(vp);
2818                         if (pre_tvp)
2819                                 VN_RELE(pre_tvp);
2820                         return (puterrno4(error));
2821                 }
2822 
2823                 if (dotdot)
2824                         exi = nfs_vptoexi(NULL, vp, cs->cr, &walk, NULL, TRUE);
2825                 else
2826                         exi = checkexport4(&vp->v_vfsp->vfs_fsid, &fid, vp);
2827 
2828                 if (exi == NULL) {
2829                         if (pre_tvp) {
2830                                 /*
2831                                  * If this vnode is a mounted-on vnode,
2832                                  * but the mounted-on file system is not
2833                                  * exported, send back the filehandle for
2834                                  * the mounted-on vnode, not the root of
2835                                  * the mounted-on file system.
2836                                  */
2837                                 VN_RELE(vp);
2838                                 vp = pre_tvp;
2839                                 exi = pre_exi;
2840                         } else {
2841                                 VN_RELE(vp);
2842                                 return (puterrno4(EACCES));
2843                         }
2844                 } else if (pre_tvp) {
2845                         /* we're done with pre_tvp now. release extra hold */
2846                         VN_RELE(pre_tvp);
2847                 }
2848 
2849                 cs->exi = exi;
2850 
2851                 /*
2852                  * Now we do a checkauth4. The reason is that
2853                  * this client/user may not have access to the new
2854                  * exported file system, and if they do,
2855                  * the client/user may be mapped to a different uid.
2856                  *
2857                  * We start with a new cr, because the checkauth4 done
2858                  * in the PUT*FH operation over wrote the cred's uid,
2859                  * gid, etc, and we want the real thing before calling
2860                  * checkauth4()
2861                  */
2862                 crfree(cs->cr);
2863                 cs->cr = crdup(cs->basecr);
2864 
2865                 oldvp = cs->vp;
2866                 cs->vp = vp;
2867                 is_newvp = TRUE;
2868 
2869                 stat = call_checkauth4(cs, req);
2870                 if (stat != NFS4_OK) {
2871                         VN_RELE(cs->vp);
2872                         cs->vp = oldvp;
2873                         return (stat);
2874                 }
2875         }
2876 
2877         /*
2878          * After various NFS checks, do a label check on the path
2879          * component. The label on this path should either be the
2880          * global zone's label or a zone's label. We are only
2881          * interested in the zone's label because exported files
2882          * in global zone is accessible (though read-only) to
2883          * clients. The exportability/visibility check is already
2884          * done before reaching this code.
2885          */
2886         if (is_system_labeled()) {
2887                 bslabel_t *clabel;
2888 
2889                 ASSERT(req->rq_label != NULL);
2890                 clabel = req->rq_label;
2891                 DTRACE_PROBE2(tx__rfs4__log__info__oplookup__clabel, char *,
2892                     "got client label from request(1)", struct svc_req *, req);
2893 
2894                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2895                         if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
2896                             cs->exi)) {
2897                                 error = EACCES;
2898                                 goto err_out;
2899                         }
2900                 } else {
2901                         /*
2902                          * We grant access to admin_low label clients
2903                          * only if the client is trusted, i.e. also
2904                          * running Solaris Trusted Extension.
2905                          */
2906                         struct sockaddr *ca;
2907                         int             addr_type;
2908                         void            *ipaddr;
2909                         tsol_tpc_t      *tp;
2910 
2911                         ca = (struct sockaddr *)svc_getrpccaller(
2912                             req->rq_xprt)->buf;
2913                         if (ca->sa_family == AF_INET) {
2914                                 addr_type = IPV4_VERSION;
2915                                 ipaddr = &((struct sockaddr_in *)ca)->sin_addr;
2916                         } else if (ca->sa_family == AF_INET6) {
2917                                 addr_type = IPV6_VERSION;
2918                                 ipaddr = &((struct sockaddr_in6 *)
2919                                     ca)->sin6_addr;
2920                         }
2921                         tp = find_tpc(ipaddr, addr_type, B_FALSE);
2922                         if (tp == NULL || tp->tpc_tp.tp_doi !=
2923                             l_admin_low->tsl_doi || tp->tpc_tp.host_type !=
2924                             SUN_CIPSO) {
2925                                 if (tp != NULL)
2926                                         TPC_RELE(tp);
2927                                 error = EACCES;
2928                                 goto err_out;
2929                         }
2930                         TPC_RELE(tp);
2931                 }
2932         }
2933 
2934         error = makefh4(&cs->fh, vp, cs->exi);
2935 
2936 err_out:
2937         if (error) {
2938                 if (is_newvp) {
2939                         VN_RELE(cs->vp);
2940                         cs->vp = oldvp;
2941                 } else
2942                         VN_RELE(vp);
2943                 return (puterrno4(error));
2944         }
2945 
2946         if (!is_newvp) {
2947                 if (cs->vp)
2948                         VN_RELE(cs->vp);
2949                 cs->vp = vp;
2950         } else if (oldvp)
2951                 VN_RELE(oldvp);
2952 
2953         /*
2954          * if did lookup on attrdir and didn't lookup .., set named
2955          * attr fh flag
2956          */
2957         if (attrdir && ! dotdot)
2958                 set_fh4_flag(&cs->fh, FH4_NAMEDATTR);
2959 
2960         /* Assume false for now, open proc will set this */
2961         cs->mandlock = FALSE;
2962 
2963         return (NFS4_OK);
2964 }
2965 
2966 /* ARGSUSED */
2967 static void
2968 rfs4_op_lookup(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2969     struct compound_state *cs)
2970 {
2971         LOOKUP4args *args = &argop->nfs_argop4_u.oplookup;
2972         LOOKUP4res *resp = &resop->nfs_resop4_u.oplookup;
2973         char *nm;
2974         uint_t len;
2975         struct sockaddr *ca;
2976         char *name = NULL;
2977         nfsstat4 status;
2978 
2979         DTRACE_NFSV4_2(op__lookup__start, struct compound_state *, cs,
2980             LOOKUP4args *, args);
2981 
2982         if (cs->vp == NULL) {
2983                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2984                 goto out;
2985         }
2986 
2987         if (cs->vp->v_type == VLNK) {
2988                 *cs->statusp = resp->status = NFS4ERR_SYMLINK;
2989                 goto out;
2990         }
2991 
2992         if (cs->vp->v_type != VDIR) {
2993                 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
2994                 goto out;
2995         }
2996 
2997         status = utf8_dir_verify(&args->objname);
2998         if (status != NFS4_OK) {
2999                 *cs->statusp = resp->status = status;
3000                 goto out;
3001         }
3002 
3003         nm = utf8_to_str(&args->objname, &len, NULL);
3004         if (nm == NULL) {
3005                 *cs->statusp = resp->status = NFS4ERR_INVAL;
3006                 goto out;
3007         }
3008 
3009         if (len > MAXNAMELEN) {
3010                 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
3011                 kmem_free(nm, len);
3012                 goto out;
3013         }
3014 
3015         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
3016         name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
3017             MAXPATHLEN  + 1);
3018 
3019         if (name == NULL) {
3020                 *cs->statusp = resp->status = NFS4ERR_INVAL;
3021                 kmem_free(nm, len);
3022                 goto out;
3023         }
3024 
3025         *cs->statusp = resp->status = do_rfs4_op_lookup(name, req, cs);
3026 
3027         if (name != nm)
3028                 kmem_free(name, MAXPATHLEN + 1);
3029         kmem_free(nm, len);
3030 
3031 out:
3032         DTRACE_NFSV4_2(op__lookup__done, struct compound_state *, cs,
3033             LOOKUP4res *, resp);
3034 }
3035 
3036 /* ARGSUSED */
3037 static void
3038 rfs4_op_lookupp(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req,
3039     struct compound_state *cs)
3040 {
3041         LOOKUPP4res *resp = &resop->nfs_resop4_u.oplookupp;
3042 
3043         DTRACE_NFSV4_1(op__lookupp__start, struct compound_state *, cs);
3044 
3045         if (cs->vp == NULL) {
3046                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3047                 goto out;
3048         }
3049 
3050         if (cs->vp->v_type != VDIR) {
3051                 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
3052                 goto out;
3053         }
3054 
3055         *cs->statusp = resp->status = do_rfs4_op_lookup("..", req, cs);
3056 
3057         /*
3058          * From NFSV4 Specification, LOOKUPP should not check for
3059          * NFS4ERR_WRONGSEC. Retrun NFS4_OK instead.
3060          */
3061         if (resp->status == NFS4ERR_WRONGSEC) {
3062                 *cs->statusp = resp->status = NFS4_OK;
3063         }
3064 
3065 out:
3066         DTRACE_NFSV4_2(op__lookupp__done, struct compound_state *, cs,
3067             LOOKUPP4res *, resp);
3068 }
3069 
3070 
3071 /*ARGSUSED2*/
3072 static void
3073 rfs4_op_openattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3074     struct compound_state *cs)
3075 {
3076         OPENATTR4args   *args = &argop->nfs_argop4_u.opopenattr;
3077         OPENATTR4res    *resp = &resop->nfs_resop4_u.opopenattr;
3078         vnode_t         *avp = NULL;
3079         int             lookup_flags = LOOKUP_XATTR, error;
3080         int             exp_ro = 0;
3081 
3082         DTRACE_NFSV4_2(op__openattr__start, struct compound_state *, cs,
3083             OPENATTR4args *, args);
3084 
3085         if (cs->vp == NULL) {
3086                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3087                 goto out;
3088         }
3089 
3090         if ((cs->vp->v_vfsp->vfs_flag & VFS_XATTR) == 0 &&
3091             !vfs_has_feature(cs->vp->v_vfsp, VFSFT_SYSATTR_VIEWS)) {
3092                 *cs->statusp = resp->status = puterrno4(ENOTSUP);
3093                 goto out;
3094         }
3095 
3096         /*
3097          * If file system supports passing ACE mask to VOP_ACCESS then
3098          * check for ACE_READ_NAMED_ATTRS, otherwise do legacy checks
3099          */
3100 
3101         if (vfs_has_feature(cs->vp->v_vfsp, VFSFT_ACEMASKONACCESS))
3102                 error = VOP_ACCESS(cs->vp, ACE_READ_NAMED_ATTRS,
3103                     V_ACE_MASK, cs->cr, NULL);
3104         else
3105                 error = ((VOP_ACCESS(cs->vp, VREAD, 0, cs->cr, NULL) != 0) &&
3106                     (VOP_ACCESS(cs->vp, VWRITE, 0, cs->cr, NULL) != 0) &&
3107                     (VOP_ACCESS(cs->vp, VEXEC, 0, cs->cr, NULL) != 0));
3108 
3109         if (error) {
3110                 *cs->statusp = resp->status = puterrno4(EACCES);
3111                 goto out;
3112         }
3113 
3114         /*
3115          * The CREATE_XATTR_DIR VOP flag cannot be specified if
3116          * the file system is exported read-only -- regardless of
3117          * createdir flag.  Otherwise the attrdir would be created
3118          * (assuming server fs isn't mounted readonly locally).  If
3119          * VOP_LOOKUP returns ENOENT in this case, the error will
3120          * be translated into EROFS.  ENOSYS is mapped to ENOTSUP
3121          * because specfs has no VOP_LOOKUP op, so the macro would
3122          * return ENOSYS.  EINVAL is returned by all (current)
3123          * Solaris file system implementations when any of their
3124          * restrictions are violated (xattr(dir) can't have xattrdir).
3125          * Returning NOTSUPP is more appropriate in this case
3126          * because the object will never be able to have an attrdir.
3127          */
3128         if (args->createdir && ! (exp_ro = rdonly4(req, cs)))
3129                 lookup_flags |= CREATE_XATTR_DIR;
3130 
3131         error = VOP_LOOKUP(cs->vp, "", &avp, NULL, lookup_flags, NULL, cs->cr,
3132             NULL, NULL, NULL);
3133 
3134         if (error) {
3135                 if (error == ENOENT && args->createdir && exp_ro)
3136                         *cs->statusp = resp->status = puterrno4(EROFS);
3137                 else if (error == EINVAL || error == ENOSYS)
3138                         *cs->statusp = resp->status = puterrno4(ENOTSUP);
3139                 else
3140                         *cs->statusp = resp->status = puterrno4(error);
3141                 goto out;
3142         }
3143 
3144         ASSERT(avp->v_flag & V_XATTRDIR);
3145 
3146         error = makefh4(&cs->fh, avp, cs->exi);
3147 
3148         if (error) {
3149                 VN_RELE(avp);
3150                 *cs->statusp = resp->status = puterrno4(error);
3151                 goto out;
3152         }
3153 
3154         VN_RELE(cs->vp);
3155         cs->vp = avp;
3156 
3157         /*
3158          * There is no requirement for an attrdir fh flag
3159          * because the attrdir has a vnode flag to distinguish
3160          * it from regular (non-xattr) directories.  The
3161          * FH4_ATTRDIR flag is set for future sanity checks.
3162          */
3163         set_fh4_flag(&cs->fh, FH4_ATTRDIR);
3164         *cs->statusp = resp->status = NFS4_OK;
3165 
3166 out:
3167         DTRACE_NFSV4_2(op__openattr__done, struct compound_state *, cs,
3168             OPENATTR4res *, resp);
3169 }
3170 
3171 static int
3172 do_io(int direction, vnode_t *vp, struct uio *uio, int ioflag, cred_t *cred,
3173     caller_context_t *ct)
3174 {
3175         int error;
3176         int i;
3177         clock_t delaytime;
3178 
3179         delaytime = MSEC_TO_TICK_ROUNDUP(rfs4_lock_delay);
3180 
3181         /*
3182          * Don't block on mandatory locks. If this routine returns
3183          * EAGAIN, the caller should return NFS4ERR_LOCKED.
3184          */
3185         uio->uio_fmode = FNONBLOCK;
3186 
3187         for (i = 0; i < rfs4_maxlock_tries; i++) {
3188 
3189 
3190                 if (direction == FREAD) {
3191                         (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, ct);
3192                         error = VOP_READ(vp, uio, ioflag, cred, ct);
3193                         VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, ct);
3194                 } else {
3195                         (void) VOP_RWLOCK(vp, V_WRITELOCK_TRUE, ct);
3196                         error = VOP_WRITE(vp, uio, ioflag, cred, ct);
3197                         VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, ct);
3198                 }
3199 
3200                 if (error != EAGAIN)
3201                         break;
3202 
3203                 if (i < rfs4_maxlock_tries - 1) {
3204                         delay(delaytime);
3205                         delaytime *= 2;
3206                 }
3207         }
3208 
3209         return (error);
3210 }
3211 
3212 /* ARGSUSED */
3213 static void
3214 rfs4_op_read(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3215     struct compound_state *cs)
3216 {
3217         READ4args *args = &argop->nfs_argop4_u.opread;
3218         READ4res *resp = &resop->nfs_resop4_u.opread;
3219         int error;
3220         int verror;
3221         vnode_t *vp;
3222         struct vattr va;
3223         struct iovec iov, *iovp = NULL;
3224         int iovcnt;
3225         struct uio uio;
3226         u_offset_t offset;
3227         bool_t *deleg = &cs->deleg;
3228         nfsstat4 stat;
3229         int in_crit = 0;
3230         mblk_t *mp = NULL;
3231         int alloc_err = 0;
3232         int rdma_used = 0;
3233         int loaned_buffers;
3234         caller_context_t ct;
3235         struct uio *uiop;
3236 
3237         DTRACE_NFSV4_2(op__read__start, struct compound_state *, cs,
3238             READ4args, args);
3239 
3240         vp = cs->vp;
3241         if (vp == NULL) {
3242                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3243                 goto out;
3244         }
3245         if (cs->access == CS_ACCESS_DENIED) {
3246                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
3247                 goto out;
3248         }
3249 
3250         if ((stat = rfs4_check_stateid(FREAD, vp, &args->stateid, FALSE,
3251             deleg, TRUE, &ct)) != NFS4_OK) {
3252                 *cs->statusp = resp->status = stat;
3253                 goto out;
3254         }
3255 
3256         /*
3257          * Enter the critical region before calling VOP_RWLOCK
3258          * to avoid a deadlock with write requests.
3259          */
3260         if (nbl_need_check(vp)) {
3261                 nbl_start_crit(vp, RW_READER);
3262                 in_crit = 1;
3263                 if (nbl_conflict(vp, NBL_READ, args->offset, args->count, 0,
3264                     &ct)) {
3265                         *cs->statusp = resp->status = NFS4ERR_LOCKED;
3266                         goto out;
3267                 }
3268         }
3269 
3270         if (args->wlist) {
3271                 if (args->count > clist_len(args->wlist)) {
3272                         *cs->statusp = resp->status = NFS4ERR_INVAL;
3273                         goto out;
3274                 }
3275                 rdma_used = 1;
3276         }
3277 
3278         /* use loaned buffers for TCP */
3279         loaned_buffers = (nfs_loaned_buffers && !rdma_used) ? 1 : 0;
3280 
3281         va.va_mask = AT_MODE|AT_SIZE|AT_UID;
3282         verror = VOP_GETATTR(vp, &va, 0, cs->cr, &ct);
3283 
3284         /*
3285          * If we can't get the attributes, then we can't do the
3286          * right access checking.  So, we'll fail the request.
3287          */
3288         if (verror) {
3289                 *cs->statusp = resp->status = puterrno4(verror);
3290                 goto out;
3291         }
3292 
3293         if (vp->v_type != VREG) {
3294                 *cs->statusp = resp->status =
3295                     ((vp->v_type == VDIR) ? NFS4ERR_ISDIR : NFS4ERR_INVAL);
3296                 goto out;
3297         }
3298 
3299         if (crgetuid(cs->cr) != va.va_uid &&
3300             (error = VOP_ACCESS(vp, VREAD, 0, cs->cr, &ct)) &&
3301             (error = VOP_ACCESS(vp, VEXEC, 0, cs->cr, &ct))) {
3302                 *cs->statusp = resp->status = puterrno4(error);
3303                 goto out;
3304         }
3305 
3306         if (MANDLOCK(vp, va.va_mode)) { /* XXX - V4 supports mand locking */
3307                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
3308                 goto out;
3309         }
3310 
3311         offset = args->offset;
3312         if (offset >= va.va_size) {
3313                 *cs->statusp = resp->status = NFS4_OK;
3314                 resp->eof = TRUE;
3315                 resp->data_len = 0;
3316                 resp->data_val = NULL;
3317                 resp->mblk = NULL;
3318                 /* RDMA */
3319                 resp->wlist = args->wlist;
3320                 resp->wlist_len = resp->data_len;
3321                 *cs->statusp = resp->status = NFS4_OK;
3322                 if (resp->wlist)
3323                         clist_zero_len(resp->wlist);
3324                 goto out;
3325         }
3326 
3327         if (args->count == 0) {
3328                 *cs->statusp = resp->status = NFS4_OK;
3329                 resp->eof = FALSE;
3330                 resp->data_len = 0;
3331                 resp->data_val = NULL;
3332                 resp->mblk = NULL;
3333                 /* RDMA */
3334                 resp->wlist = args->wlist;
3335                 resp->wlist_len = resp->data_len;
3336                 if (resp->wlist)
3337                         clist_zero_len(resp->wlist);
3338                 goto out;
3339         }
3340 
3341         /*
3342          * Do not allocate memory more than maximum allowed
3343          * transfer size
3344          */
3345         if (args->count > rfs4_tsize(req))
3346                 args->count = rfs4_tsize(req);
3347 
3348         if (loaned_buffers) {
3349                 uiop = (uio_t *)rfs_setup_xuio(vp);
3350                 ASSERT(uiop != NULL);
3351                 uiop->uio_segflg = UIO_SYSSPACE;
3352                 uiop->uio_loffset = args->offset;
3353                 uiop->uio_resid = args->count;
3354 
3355                 /* Jump to do the read if successful */
3356                 if (!VOP_REQZCBUF(vp, UIO_READ, (xuio_t *)uiop, cs->cr, &ct)) {
3357                         /*
3358                          * Need to hold the vnode until after VOP_RETZCBUF()
3359                          * is called.
3360                          */
3361                         VN_HOLD(vp);
3362                         goto doio_read;
3363                 }
3364 
3365                 DTRACE_PROBE2(nfss__i__reqzcbuf_failed, int,
3366                     uiop->uio_loffset, int, uiop->uio_resid);
3367 
3368                 uiop->uio_extflg = 0;
3369 
3370                 /* failure to setup for zero copy */
3371                 rfs_free_xuio((void *)uiop);
3372                 loaned_buffers = 0;
3373         }
3374 
3375         /*
3376          * If returning data via RDMA Write, then grab the chunk list. If we
3377          * aren't returning READ data w/RDMA_WRITE, then grab a mblk.
3378          */
3379         if (rdma_used) {
3380                 mp = NULL;
3381                 (void) rdma_get_wchunk(req, &iov, args->wlist);
3382                 uio.uio_iov = &iov;
3383                 uio.uio_iovcnt = 1;
3384         } else {
3385                 /*
3386                  * mp will contain the data to be sent out in the read reply.
3387                  * It will be freed after the reply has been sent.
3388                  */
3389                 mp = rfs_read_alloc(args->count, &iovp, &iovcnt);
3390                 ASSERT(mp != NULL);
3391                 ASSERT(alloc_err == 0);
3392                 uio.uio_iov = iovp;
3393                 uio.uio_iovcnt = iovcnt;
3394         }
3395 
3396         uio.uio_segflg = UIO_SYSSPACE;
3397         uio.uio_extflg = UIO_COPY_CACHED;
3398         uio.uio_loffset = args->offset;
3399         uio.uio_resid = args->count;
3400         uiop = &uio;
3401 
3402 doio_read:
3403         error = do_io(FREAD, vp, uiop, 0, cs->cr, &ct);
3404 
3405         va.va_mask = AT_SIZE;
3406         verror = VOP_GETATTR(vp, &va, 0, cs->cr, &ct);
3407 
3408         if (error) {
3409                 if (mp)
3410                         freemsg(mp);
3411                 *cs->statusp = resp->status = puterrno4(error);
3412                 goto out;
3413         }
3414 
3415         /* make mblk using zc buffers */
3416         if (loaned_buffers) {
3417                 mp = uio_to_mblk(uiop);
3418                 ASSERT(mp != NULL);
3419         }
3420 
3421         *cs->statusp = resp->status = NFS4_OK;
3422 
3423         ASSERT(uiop->uio_resid >= 0);
3424         resp->data_len = args->count - uiop->uio_resid;
3425         if (mp) {
3426                 resp->data_val = (char *)mp->b_datap->db_base;
3427                 rfs_rndup_mblks(mp, resp->data_len, loaned_buffers);
3428         } else {
3429                 resp->data_val = (caddr_t)iov.iov_base;
3430         }
3431 
3432         resp->mblk = mp;
3433 
3434         if (!verror && offset + resp->data_len == va.va_size)
3435                 resp->eof = TRUE;
3436         else
3437                 resp->eof = FALSE;
3438 
3439         if (rdma_used) {
3440                 if (!rdma_setup_read_data4(args, resp)) {
3441                         *cs->statusp = resp->status = NFS4ERR_INVAL;
3442                 }
3443         } else {
3444                 resp->wlist = NULL;
3445         }
3446 
3447 out:
3448         if (in_crit)
3449                 nbl_end_crit(vp);
3450 
3451         if (iovp != NULL)
3452                 kmem_free(iovp, iovcnt * sizeof (struct iovec));
3453 
3454         DTRACE_NFSV4_2(op__read__done, struct compound_state *, cs,
3455             READ4res *, resp);
3456 }
3457 
3458 static void
3459 rfs4_op_read_free(nfs_resop4 *resop)
3460 {
3461         READ4res        *resp = &resop->nfs_resop4_u.opread;
3462 
3463         if (resp->status == NFS4_OK && resp->mblk != NULL) {
3464                 freemsg(resp->mblk);
3465                 resp->mblk = NULL;
3466                 resp->data_val = NULL;
3467                 resp->data_len = 0;
3468         }
3469 }
3470 
3471 static void
3472 rfs4_op_readdir_free(nfs_resop4 * resop)
3473 {
3474         READDIR4res    *resp = &resop->nfs_resop4_u.opreaddir;
3475 
3476         if (resp->status == NFS4_OK && resp->mblk != NULL) {
3477                 freeb(resp->mblk);
3478                 resp->mblk = NULL;
3479                 resp->data_len = 0;
3480         }
3481 }
3482 
3483 
3484 /* ARGSUSED */
3485 static void
3486 rfs4_op_putpubfh(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req,
3487     struct compound_state *cs)
3488 {
3489         PUTPUBFH4res    *resp = &resop->nfs_resop4_u.opputpubfh;
3490         int             error;
3491         vnode_t         *vp;
3492         struct exportinfo *exi, *sav_exi;
3493         nfs_fh4_fmt_t   *fh_fmtp;
3494         nfs_export_t *ne = nfs_get_export();
3495 
3496         DTRACE_NFSV4_1(op__putpubfh__start, struct compound_state *, cs);
3497 
3498         if (cs->vp) {
3499                 VN_RELE(cs->vp);
3500                 cs->vp = NULL;
3501         }
3502 
3503         if (cs->cr)
3504                 crfree(cs->cr);
3505 
3506         cs->cr = crdup(cs->basecr);
3507 
3508         vp = ne->exi_public->exi_vp;
3509         if (vp == NULL) {
3510                 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
3511                 goto out;
3512         }
3513 
3514         error = makefh4(&cs->fh, vp, ne->exi_public);
3515         if (error != 0) {
3516                 *cs->statusp = resp->status = puterrno4(error);
3517                 goto out;
3518         }
3519         sav_exi = cs->exi;
3520         if (ne->exi_public == ne->exi_root) {
3521                 /*
3522                  * No filesystem is actually shared public, so we default
3523                  * to exi_root. In this case, we must check whether root
3524                  * is exported.
3525                  */
3526                 fh_fmtp = (nfs_fh4_fmt_t *)cs->fh.nfs_fh4_val;
3527 
3528                 /*
3529                  * if root filesystem is exported, the exportinfo struct that we
3530                  * should use is what checkexport4 returns, because root_exi is
3531                  * actually a mostly empty struct.
3532                  */
3533                 exi = checkexport4(&fh_fmtp->fh4_fsid,
3534                     (fid_t *)&fh_fmtp->fh4_xlen, NULL);
3535                 cs->exi = ((exi != NULL) ? exi : ne->exi_public);
3536         } else {
3537                 /*
3538                  * it's a properly shared filesystem
3539                  */
3540                 cs->exi = ne->exi_public;
3541         }
3542 
3543         if (is_system_labeled()) {
3544                 bslabel_t *clabel;
3545 
3546                 ASSERT(req->rq_label != NULL);
3547                 clabel = req->rq_label;
3548                 DTRACE_PROBE2(tx__rfs4__log__info__opputpubfh__clabel, char *,
3549                     "got client label from request(1)",
3550                     struct svc_req *, req);
3551                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3552                         if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3553                             cs->exi)) {
3554                                 *cs->statusp = resp->status =
3555                                     NFS4ERR_SERVERFAULT;
3556                                 goto out;
3557                         }
3558                 }
3559         }
3560 
3561         VN_HOLD(vp);
3562         cs->vp = vp;
3563 
3564         if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) {
3565                 VN_RELE(cs->vp);
3566                 cs->vp = NULL;
3567                 cs->exi = sav_exi;
3568                 goto out;
3569         }
3570 
3571         *cs->statusp = resp->status = NFS4_OK;
3572 out:
3573         DTRACE_NFSV4_2(op__putpubfh__done, struct compound_state *, cs,
3574             PUTPUBFH4res *, resp);
3575 }
3576 
3577 /*
3578  * XXX - issue with put*fh operations. Suppose /export/home is exported.
3579  * Suppose an NFS client goes to mount /export/home/joe. If /export, home,
3580  * or joe have restrictive search permissions, then we shouldn't let
3581  * the client get a file handle. This is easy to enforce. However, we
3582  * don't know what security flavor should be used until we resolve the
3583  * path name. Another complication is uid mapping. If root is
3584  * the user, then it will be mapped to the anonymous user by default,
3585  * but we won't know that till we've resolved the path name. And we won't
3586  * know what the anonymous user is.
3587  * Luckily, SECINFO is specified to take a full filename.
3588  * So what we will have to in rfs4_op_lookup is check that flavor of
3589  * the target object matches that of the request, and if root was the
3590  * caller, check for the root= and anon= options, and if necessary,
3591  * repeat the lookup using the right cred_t. But that's not done yet.
3592  */
3593 /* ARGSUSED */
3594 static void
3595 rfs4_op_putfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3596     struct compound_state *cs)
3597 {
3598         PUTFH4args *args = &argop->nfs_argop4_u.opputfh;
3599         PUTFH4res *resp = &resop->nfs_resop4_u.opputfh;
3600         nfs_fh4_fmt_t *fh_fmtp;
3601 
3602         DTRACE_NFSV4_2(op__putfh__start, struct compound_state *, cs,
3603             PUTFH4args *, args);
3604 
3605         if (cs->vp) {
3606                 VN_RELE(cs->vp);
3607                 cs->vp = NULL;
3608         }
3609 
3610         if (cs->cr) {
3611                 crfree(cs->cr);
3612                 cs->cr = NULL;
3613         }
3614 
3615 
3616         if (args->object.nfs_fh4_len < NFS_FH4_LEN) {
3617                 *cs->statusp = resp->status = NFS4ERR_BADHANDLE;
3618                 goto out;
3619         }
3620 
3621         fh_fmtp = (nfs_fh4_fmt_t *)args->object.nfs_fh4_val;
3622         cs->exi = checkexport4(&fh_fmtp->fh4_fsid, (fid_t *)&fh_fmtp->fh4_xlen,
3623             NULL);
3624 
3625         if (cs->exi == NULL) {
3626                 *cs->statusp = resp->status = NFS4ERR_STALE;
3627                 goto out;
3628         }
3629 
3630         cs->cr = crdup(cs->basecr);
3631 
3632         ASSERT(cs->cr != NULL);
3633 
3634         if (! (cs->vp = nfs4_fhtovp(&args->object, cs->exi, &resp->status))) {
3635                 *cs->statusp = resp->status;
3636                 goto out;
3637         }
3638 
3639         if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) {
3640                 VN_RELE(cs->vp);
3641                 cs->vp = NULL;
3642                 goto out;
3643         }
3644 
3645         nfs_fh4_copy(&args->object, &cs->fh);
3646         *cs->statusp = resp->status = NFS4_OK;
3647         cs->deleg = FALSE;
3648 
3649 out:
3650         DTRACE_NFSV4_2(op__putfh__done, struct compound_state *, cs,
3651             PUTFH4res *, resp);
3652 }
3653 
3654 /* ARGSUSED */
3655 static void
3656 rfs4_op_putrootfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3657     struct compound_state *cs)
3658 {
3659         PUTROOTFH4res *resp = &resop->nfs_resop4_u.opputrootfh;
3660         int error;
3661         fid_t fid;
3662         struct exportinfo *exi, *sav_exi;
3663 
3664         DTRACE_NFSV4_1(op__putrootfh__start, struct compound_state *, cs);
3665 
3666         if (cs->vp) {
3667                 VN_RELE(cs->vp);
3668                 cs->vp = NULL;
3669         }
3670 
3671         if (cs->cr)
3672                 crfree(cs->cr);
3673 
3674         cs->cr = crdup(cs->basecr);
3675 
3676         /*
3677          * Using rootdir, the system root vnode,
3678          * get its fid.
3679          */
3680         bzero(&fid, sizeof (fid));
3681         fid.fid_len = MAXFIDSZ;
3682         error = vop_fid_pseudo(ZONE_ROOTVP(), &fid);
3683         if (error != 0) {
3684                 *cs->statusp = resp->status = puterrno4(error);
3685                 goto out;
3686         }
3687 
3688         /*
3689          * Then use the root fsid & fid it to find out if it's exported
3690          *
3691          * If the server root isn't exported directly, then
3692          * it should at least be a pseudo export based on
3693          * one or more exports further down in the server's
3694          * file tree.
3695          */
3696         exi = checkexport4(&ZONE_ROOTVP()->v_vfsp->vfs_fsid, &fid, NULL);
3697         if (exi == NULL || exi->exi_export.ex_flags & EX_PUBLIC) {
3698                 NFS4_DEBUG(rfs4_debug,
3699                     (CE_WARN, "rfs4_op_putrootfh: export check failure"));
3700                 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
3701                 goto out;
3702         }
3703 
3704         /*
3705          * Now make a filehandle based on the root
3706          * export and root vnode.
3707          */
3708         error = makefh4(&cs->fh, ZONE_ROOTVP(), exi);
3709         if (error != 0) {
3710                 *cs->statusp = resp->status = puterrno4(error);
3711                 goto out;
3712         }
3713 
3714         sav_exi = cs->exi;
3715         cs->exi = exi;
3716 
3717         VN_HOLD(ZONE_ROOTVP());
3718         cs->vp = ZONE_ROOTVP();
3719 
3720         if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) {
3721                 VN_RELE(cs->vp);
3722                 cs->vp = NULL;
3723                 cs->exi = sav_exi;
3724                 goto out;
3725         }
3726 
3727         *cs->statusp = resp->status = NFS4_OK;
3728         cs->deleg = FALSE;
3729 out:
3730         DTRACE_NFSV4_2(op__putrootfh__done, struct compound_state *, cs,
3731             PUTROOTFH4res *, resp);
3732 }
3733 
3734 /*
3735  * readlink: args: CURRENT_FH.
3736  *      res: status. If success - CURRENT_FH unchanged, return linktext.
3737  */
3738 
3739 /* ARGSUSED */
3740 static void
3741 rfs4_op_readlink(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3742     struct compound_state *cs)
3743 {
3744         READLINK4res *resp = &resop->nfs_resop4_u.opreadlink;
3745         int error;
3746         vnode_t *vp;
3747         struct iovec iov;
3748         struct vattr va;
3749         struct uio uio;
3750         char *data;
3751         struct sockaddr *ca;
3752         char *name = NULL;
3753         int is_referral;
3754 
3755         DTRACE_NFSV4_1(op__readlink__start, struct compound_state *, cs);
3756 
3757         /* CURRENT_FH: directory */
3758         vp = cs->vp;
3759         if (vp == NULL) {
3760                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3761                 goto out;
3762         }
3763 
3764         if (cs->access == CS_ACCESS_DENIED) {
3765                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
3766                 goto out;
3767         }
3768 
3769         /* Is it a referral? */
3770         if (vn_is_nfs_reparse(vp, cs->cr) && client_is_downrev(req)) {
3771 
3772                 is_referral = 1;
3773 
3774         } else {
3775 
3776                 is_referral = 0;
3777 
3778                 if (vp->v_type == VDIR) {
3779                         *cs->statusp = resp->status = NFS4ERR_ISDIR;
3780                         goto out;
3781                 }
3782 
3783                 if (vp->v_type != VLNK) {
3784                         *cs->statusp = resp->status = NFS4ERR_INVAL;
3785                         goto out;
3786                 }
3787 
3788         }
3789 
3790         va.va_mask = AT_MODE;
3791         error = VOP_GETATTR(vp, &va, 0, cs->cr, NULL);
3792         if (error) {
3793                 *cs->statusp = resp->status = puterrno4(error);
3794                 goto out;
3795         }
3796 
3797         if (MANDLOCK(vp, va.va_mode)) {
3798                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
3799                 goto out;
3800         }
3801 
3802         data = kmem_alloc(MAXPATHLEN + 1, KM_SLEEP);
3803 
3804         if (is_referral) {
3805                 char *s;
3806                 size_t strsz;
3807 
3808                 /* Get an artificial symlink based on a referral */
3809                 s = build_symlink(vp, cs->cr, &strsz);
3810                 global_svstat_ptr[4][NFS_REFERLINKS].value.ui64++;
3811                 DTRACE_PROBE2(nfs4serv__func__referral__reflink,
3812                     vnode_t *, vp, char *, s);
3813                 if (s == NULL)
3814                         error = EINVAL;
3815                 else {
3816                         error = 0;
3817                         (void) strlcpy(data, s, MAXPATHLEN + 1);
3818                         kmem_free(s, strsz);
3819                 }
3820 
3821         } else {
3822 
3823                 iov.iov_base = data;
3824                 iov.iov_len = MAXPATHLEN;
3825                 uio.uio_iov = &iov;
3826                 uio.uio_iovcnt = 1;
3827                 uio.uio_segflg = UIO_SYSSPACE;
3828                 uio.uio_extflg = UIO_COPY_CACHED;
3829                 uio.uio_loffset = 0;
3830                 uio.uio_resid = MAXPATHLEN;
3831 
3832                 error = VOP_READLINK(vp, &uio, cs->cr, NULL);
3833 
3834                 if (!error)
3835                         *(data + MAXPATHLEN - uio.uio_resid) = '\0';
3836         }
3837 
3838         if (error) {
3839                 kmem_free((caddr_t)data, (uint_t)MAXPATHLEN + 1);
3840                 *cs->statusp = resp->status = puterrno4(error);
3841                 goto out;
3842         }
3843 
3844         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
3845         name = nfscmd_convname(ca, cs->exi, data, NFSCMD_CONV_OUTBOUND,
3846             MAXPATHLEN  + 1);
3847 
3848         if (name == NULL) {
3849                 /*
3850                  * Even though the conversion failed, we return
3851                  * something. We just don't translate it.
3852                  */
3853                 name = data;
3854         }
3855 
3856         /*
3857          * treat link name as data
3858          */
3859         (void) str_to_utf8(name, (utf8string *)&resp->link);
3860 
3861         if (name != data)
3862                 kmem_free(name, MAXPATHLEN + 1);
3863         kmem_free((caddr_t)data, (uint_t)MAXPATHLEN + 1);
3864         *cs->statusp = resp->status = NFS4_OK;
3865 
3866 out:
3867         DTRACE_NFSV4_2(op__readlink__done, struct compound_state *, cs,
3868             READLINK4res *, resp);
3869 }
3870 
3871 static void
3872 rfs4_op_readlink_free(nfs_resop4 *resop)
3873 {
3874         READLINK4res *resp = &resop->nfs_resop4_u.opreadlink;
3875         utf8string *symlink = (utf8string *)&resp->link;
3876 
3877         if (symlink->utf8string_val) {
3878                 UTF8STRING_FREE(*symlink)
3879         }
3880 }
3881 
3882 /*
3883  * release_lockowner:
3884  *      Release any state associated with the supplied
3885  *      lockowner. Note if any lo_state is holding locks we will not
3886  *      rele that lo_state and thus the lockowner will not be destroyed.
3887  *      A client using lock after the lock owner stateid has been released
3888  *      will suffer the consequence of NFS4ERR_BAD_STATEID and would have
3889  *      to reissue the lock with new_lock_owner set to TRUE.
3890  *      args: lock_owner
3891  *      res:  status
3892  */
3893 /* ARGSUSED */
3894 static void
3895 rfs4_op_release_lockowner(nfs_argop4 *argop, nfs_resop4 *resop,
3896     struct svc_req *req, struct compound_state *cs)
3897 {
3898         RELEASE_LOCKOWNER4args *ap = &argop->nfs_argop4_u.oprelease_lockowner;
3899         RELEASE_LOCKOWNER4res *resp = &resop->nfs_resop4_u.oprelease_lockowner;
3900         rfs4_lockowner_t *lo;
3901         rfs4_openowner_t *oo;
3902         rfs4_state_t *sp;
3903         rfs4_lo_state_t *lsp;
3904         rfs4_client_t *cp;
3905         bool_t create = FALSE;
3906         locklist_t *llist;
3907         sysid_t sysid;
3908 
3909         DTRACE_NFSV4_2(op__release__lockowner__start, struct compound_state *,
3910             cs, RELEASE_LOCKOWNER4args *, ap);
3911 
3912         /* Make sure there is a clientid around for this request */
3913         cp = rfs4_findclient_by_id(ap->lock_owner.clientid, FALSE);
3914 
3915         if (cp == NULL) {
3916                 *cs->statusp = resp->status =
3917                     rfs4_check_clientid(&ap->lock_owner.clientid, 0);
3918                 goto out;
3919         }
3920         rfs4_client_rele(cp);
3921 
3922         lo = rfs4_findlockowner(&ap->lock_owner, &create);
3923         if (lo == NULL) {
3924                 *cs->statusp = resp->status = NFS4_OK;
3925                 goto out;
3926         }
3927         ASSERT(lo->rl_client != NULL);
3928 
3929         /*
3930          * Check for EXPIRED client. If so will reap state with in a lease
3931          * period or on next set_clientid_confirm step
3932          */
3933         if (rfs4_lease_expired(lo->rl_client)) {
3934                 rfs4_lockowner_rele(lo);
3935                 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
3936                 goto out;
3937         }
3938 
3939         /*
3940          * If no sysid has been assigned, then no locks exist; just return.
3941          */
3942         rfs4_dbe_lock(lo->rl_client->rc_dbe);
3943         if (lo->rl_client->rc_sysidt == LM_NOSYSID) {
3944                 rfs4_lockowner_rele(lo);
3945                 rfs4_dbe_unlock(lo->rl_client->rc_dbe);
3946                 goto out;
3947         }
3948 
3949         sysid = lo->rl_client->rc_sysidt;
3950         rfs4_dbe_unlock(lo->rl_client->rc_dbe);
3951 
3952         /*
3953          * Mark the lockowner invalid.
3954          */
3955         rfs4_dbe_hide(lo->rl_dbe);
3956 
3957         /*
3958          * sysid-pid pair should now not be used since the lockowner is
3959          * invalid. If the client were to instantiate the lockowner again
3960          * it would be assigned a new pid. Thus we can get the list of
3961          * current locks.
3962          */
3963 
3964         llist = flk_get_active_locks(sysid, lo->rl_pid);
3965         /* If we are still holding locks fail */
3966         if (llist != NULL) {
3967 
3968                 *cs->statusp = resp->status = NFS4ERR_LOCKS_HELD;
3969 
3970                 flk_free_locklist(llist);
3971                 /*
3972                  * We need to unhide the lockowner so the client can
3973                  * try it again. The bad thing here is if the client
3974                  * has a logic error that took it here in the first place
3975                  * they probably have lost accounting of the locks that it
3976                  * is holding. So we may have dangling state until the
3977                  * open owner state is reaped via close. One scenario
3978                  * that could possibly occur is that the client has
3979                  * sent the unlock request(s) in separate threads
3980                  * and has not waited for the replies before sending the
3981                  * RELEASE_LOCKOWNER request. Presumably, it would expect
3982                  * and deal appropriately with NFS4ERR_LOCKS_HELD, by
3983                  * reissuing the request.
3984                  */
3985                 rfs4_dbe_unhide(lo->rl_dbe);
3986                 rfs4_lockowner_rele(lo);
3987                 goto out;
3988         }
3989 
3990         /*
3991          * For the corresponding client we need to check each open
3992          * owner for any opens that have lockowner state associated
3993          * with this lockowner.
3994          */
3995 
3996         rfs4_dbe_lock(lo->rl_client->rc_dbe);
3997         for (oo = list_head(&lo->rl_client->rc_openownerlist); oo != NULL;
3998             oo = list_next(&lo->rl_client->rc_openownerlist, oo)) {
3999 
4000                 rfs4_dbe_lock(oo->ro_dbe);
4001                 for (sp = list_head(&oo->ro_statelist); sp != NULL;
4002                     sp = list_next(&oo->ro_statelist, sp)) {
4003 
4004                         rfs4_dbe_lock(sp->rs_dbe);
4005                         for (lsp = list_head(&sp->rs_lostatelist);
4006                             lsp != NULL;
4007                             lsp = list_next(&sp->rs_lostatelist, lsp)) {
4008                                 if (lsp->rls_locker == lo) {
4009                                         rfs4_dbe_lock(lsp->rls_dbe);
4010                                         rfs4_dbe_invalidate(lsp->rls_dbe);
4011                                         rfs4_dbe_unlock(lsp->rls_dbe);
4012                                 }
4013                         }
4014                         rfs4_dbe_unlock(sp->rs_dbe);
4015                 }
4016                 rfs4_dbe_unlock(oo->ro_dbe);
4017         }
4018         rfs4_dbe_unlock(lo->rl_client->rc_dbe);
4019 
4020         rfs4_lockowner_rele(lo);
4021 
4022         *cs->statusp = resp->status = NFS4_OK;
4023 
4024 out:
4025         DTRACE_NFSV4_2(op__release__lockowner__done, struct compound_state *,
4026             cs, RELEASE_LOCKOWNER4res *, resp);
4027 }
4028 
4029 /*
4030  * short utility function to lookup a file and recall the delegation
4031  */
4032 static rfs4_file_t *
4033 rfs4_lookup_and_findfile(vnode_t *dvp, char *nm, vnode_t **vpp,
4034     int *lkup_error, cred_t *cr)
4035 {
4036         vnode_t *vp;
4037         rfs4_file_t *fp = NULL;
4038         bool_t fcreate = FALSE;
4039         int error;
4040 
4041         if (vpp)
4042                 *vpp = NULL;
4043 
4044         if ((error = VOP_LOOKUP(dvp, nm, &vp, NULL, 0, NULL, cr, NULL, NULL,
4045             NULL)) == 0) {
4046                 if (vp->v_type == VREG)
4047                         fp = rfs4_findfile(vp, NULL, &fcreate);
4048                 if (vpp)
4049                         *vpp = vp;
4050                 else
4051                         VN_RELE(vp);
4052         }
4053 
4054         if (lkup_error)
4055                 *lkup_error = error;
4056 
4057         return (fp);
4058 }
4059 
4060 /*
4061  * remove: args: CURRENT_FH: directory; name.
4062  *      res: status. If success - CURRENT_FH unchanged, return change_info
4063  *              for directory.
4064  */
4065 /* ARGSUSED */
4066 static void
4067 rfs4_op_remove(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4068     struct compound_state *cs)
4069 {
4070         REMOVE4args *args = &argop->nfs_argop4_u.opremove;
4071         REMOVE4res *resp = &resop->nfs_resop4_u.opremove;
4072         int error;
4073         vnode_t *dvp, *vp;
4074         struct vattr bdva, idva, adva;
4075         char *nm;
4076         uint_t len;
4077         rfs4_file_t *fp;
4078         int in_crit = 0;
4079         bslabel_t *clabel;
4080         struct sockaddr *ca;
4081         char *name = NULL;
4082         nfsstat4 status;
4083 
4084         DTRACE_NFSV4_2(op__remove__start, struct compound_state *, cs,
4085             REMOVE4args *, args);
4086 
4087         /* CURRENT_FH: directory */
4088         dvp = cs->vp;
4089         if (dvp == NULL) {
4090                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4091                 goto out;
4092         }
4093 
4094         if (cs->access == CS_ACCESS_DENIED) {
4095                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4096                 goto out;
4097         }
4098 
4099         /*
4100          * If there is an unshared filesystem mounted on this vnode,
4101          * Do not allow to remove anything in this directory.
4102          */
4103         if (vn_ismntpt(dvp)) {
4104                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4105                 goto out;
4106         }
4107 
4108         if (dvp->v_type != VDIR) {
4109                 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
4110                 goto out;
4111         }
4112 
4113         status = utf8_dir_verify(&args->target);
4114         if (status != NFS4_OK) {
4115                 *cs->statusp = resp->status = status;
4116                 goto out;
4117         }
4118 
4119         /*
4120          * Lookup the file so that we can check if it's a directory
4121          */
4122         nm = utf8_to_fn(&args->target, &len, NULL);
4123         if (nm == NULL) {
4124                 *cs->statusp = resp->status = NFS4ERR_INVAL;
4125                 goto out;
4126         }
4127 
4128         if (len > MAXNAMELEN) {
4129                 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
4130                 kmem_free(nm, len);
4131                 goto out;
4132         }
4133 
4134         if (rdonly4(req, cs)) {
4135                 *cs->statusp = resp->status = NFS4ERR_ROFS;
4136                 kmem_free(nm, len);
4137                 goto out;
4138         }
4139 
4140         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
4141         name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
4142             MAXPATHLEN  + 1);
4143 
4144         if (name == NULL) {
4145                 *cs->statusp = resp->status = NFS4ERR_INVAL;
4146                 kmem_free(nm, len);
4147                 goto out;
4148         }
4149 
4150         /*
4151          * Lookup the file to determine type and while we are see if
4152          * there is a file struct around and check for delegation.
4153          * We don't need to acquire va_seq before this lookup, if
4154          * it causes an update, cinfo.before will not match, which will
4155          * trigger a cache flush even if atomic is TRUE.
4156          */
4157         if (fp = rfs4_lookup_and_findfile(dvp, name, &vp, &error, cs->cr)) {
4158                 if (rfs4_check_delegated_byfp(FWRITE, fp, TRUE, TRUE, TRUE,
4159                     NULL)) {
4160                         VN_RELE(vp);
4161                         rfs4_file_rele(fp);
4162                         *cs->statusp = resp->status = NFS4ERR_DELAY;
4163                         if (nm != name)
4164                                 kmem_free(name, MAXPATHLEN + 1);
4165                         kmem_free(nm, len);
4166                         goto out;
4167                 }
4168         }
4169 
4170         /* Didn't find anything to remove */
4171         if (vp == NULL) {
4172                 *cs->statusp = resp->status = error;
4173                 if (nm != name)
4174                         kmem_free(name, MAXPATHLEN + 1);
4175                 kmem_free(nm, len);
4176                 goto out;
4177         }
4178 
4179         if (nbl_need_check(vp)) {
4180                 nbl_start_crit(vp, RW_READER);
4181                 in_crit = 1;
4182                 if (nbl_conflict(vp, NBL_REMOVE, 0, 0, 0, NULL)) {
4183                         *cs->statusp = resp->status = NFS4ERR_FILE_OPEN;
4184                         if (nm != name)
4185                                 kmem_free(name, MAXPATHLEN + 1);
4186                         kmem_free(nm, len);
4187                         nbl_end_crit(vp);
4188                         VN_RELE(vp);
4189                         if (fp) {
4190                                 rfs4_clear_dont_grant(fp);
4191                                 rfs4_file_rele(fp);
4192                         }
4193                         goto out;
4194                 }
4195         }
4196 
4197         /* check label before allowing removal */
4198         if (is_system_labeled()) {
4199                 ASSERT(req->rq_label != NULL);
4200                 clabel = req->rq_label;
4201                 DTRACE_PROBE2(tx__rfs4__log__info__opremove__clabel, char *,
4202                     "got client label from request(1)",
4203                     struct svc_req *, req);
4204                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
4205                         if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
4206                             cs->exi)) {
4207                                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4208                                 if (name != nm)
4209                                         kmem_free(name, MAXPATHLEN + 1);
4210                                 kmem_free(nm, len);
4211                                 if (in_crit)
4212                                         nbl_end_crit(vp);
4213                                 VN_RELE(vp);
4214                                 if (fp) {
4215                                         rfs4_clear_dont_grant(fp);
4216                                         rfs4_file_rele(fp);
4217                                 }
4218                                 goto out;
4219                         }
4220                 }
4221         }
4222 
4223         /* Get dir "before" change value */
4224         bdva.va_mask = AT_CTIME|AT_SEQ;
4225         error = VOP_GETATTR(dvp, &bdva, 0, cs->cr, NULL);
4226         if (error) {
4227                 *cs->statusp = resp->status = puterrno4(error);
4228                 if (nm != name)
4229                         kmem_free(name, MAXPATHLEN + 1);
4230                 kmem_free(nm, len);
4231                 if (in_crit)
4232                         nbl_end_crit(vp);
4233                 VN_RELE(vp);
4234                 if (fp) {
4235                         rfs4_clear_dont_grant(fp);
4236                         rfs4_file_rele(fp);
4237                 }
4238                 goto out;
4239         }
4240         NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bdva.va_ctime)
4241 
4242         /* Actually do the REMOVE operation */
4243         if (vp->v_type == VDIR) {
4244                 /*
4245                  * Can't remove a directory that has a mounted-on filesystem.
4246                  */
4247                 if (vn_ismntpt(vp)) {
4248                         error = EACCES;
4249                 } else {
4250                         /*
4251                          * System V defines rmdir to return EEXIST,
4252                          * not ENOTEMPTY, if the directory is not
4253                          * empty.  A System V NFS server needs to map
4254                          * NFS4ERR_EXIST to NFS4ERR_NOTEMPTY to
4255                          * transmit over the wire.
4256                          */
4257                         if ((error = VOP_RMDIR(dvp, name, ZONE_ROOTVP(), cs->cr,
4258                             NULL, 0)) == EEXIST)
4259                                 error = ENOTEMPTY;
4260                 }
4261         } else {
4262                 if ((error = VOP_REMOVE(dvp, name, cs->cr, NULL, 0)) == 0 &&
4263                     fp != NULL) {
4264                         struct vattr va;
4265                         vnode_t *tvp;
4266 
4267                         rfs4_dbe_lock(fp->rf_dbe);
4268                         tvp = fp->rf_vp;
4269                         if (tvp)
4270                                 VN_HOLD(tvp);
4271                         rfs4_dbe_unlock(fp->rf_dbe);
4272 
4273                         if (tvp) {
4274                                 /*
4275                                  * This is va_seq safe because we are not
4276                                  * manipulating dvp.
4277                                  */
4278                                 va.va_mask = AT_NLINK;
4279                                 if (!VOP_GETATTR(tvp, &va, 0, cs->cr, NULL) &&
4280                                     va.va_nlink == 0) {
4281                                         /* Remove state on file remove */
4282                                         if (in_crit) {
4283                                                 nbl_end_crit(vp);
4284                                                 in_crit = 0;
4285                                         }
4286                                         rfs4_close_all_state(fp);
4287                                 }
4288                                 VN_RELE(tvp);
4289                         }
4290                 }
4291         }
4292 
4293         if (in_crit)
4294                 nbl_end_crit(vp);
4295         VN_RELE(vp);
4296 
4297         if (fp) {
4298                 rfs4_clear_dont_grant(fp);
4299                 rfs4_file_rele(fp);
4300         }
4301         if (nm != name)
4302                 kmem_free(name, MAXPATHLEN + 1);
4303         kmem_free(nm, len);
4304 
4305         if (error) {
4306                 *cs->statusp = resp->status = puterrno4(error);
4307                 goto out;
4308         }
4309 
4310         /*
4311          * Get the initial "after" sequence number, if it fails, set to zero
4312          */
4313         idva.va_mask = AT_SEQ;
4314         if (VOP_GETATTR(dvp, &idva, 0, cs->cr, NULL))
4315                 idva.va_seq = 0;
4316 
4317         /*
4318          * Force modified data and metadata out to stable storage.
4319          */
4320         (void) VOP_FSYNC(dvp, 0, cs->cr, NULL);
4321 
4322         /*
4323          * Get "after" change value, if it fails, simply return the
4324          * before value.
4325          */
4326         adva.va_mask = AT_CTIME|AT_SEQ;
4327         if (VOP_GETATTR(dvp, &adva, 0, cs->cr, NULL)) {
4328                 adva.va_ctime = bdva.va_ctime;
4329                 adva.va_seq = 0;
4330         }
4331 
4332         NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, adva.va_ctime)
4333 
4334         /*
4335          * The cinfo.atomic = TRUE only if we have
4336          * non-zero va_seq's, and it has incremented by exactly one
4337          * during the VOP_REMOVE/RMDIR and it didn't change during
4338          * the VOP_FSYNC.
4339          */
4340         if (bdva.va_seq && idva.va_seq && adva.va_seq &&
4341             idva.va_seq == (bdva.va_seq + 1) && idva.va_seq == adva.va_seq)
4342                 resp->cinfo.atomic = TRUE;
4343         else
4344                 resp->cinfo.atomic = FALSE;
4345 
4346         *cs->statusp = resp->status = NFS4_OK;
4347 
4348 out:
4349         DTRACE_NFSV4_2(op__remove__done, struct compound_state *, cs,
4350             REMOVE4res *, resp);
4351 }
4352 
4353 /*
4354  * rename: args: SAVED_FH: from directory, CURRENT_FH: target directory,
4355  *              oldname and newname.
4356  *      res: status. If success - CURRENT_FH unchanged, return change_info
4357  *              for both from and target directories.
4358  */
4359 /* ARGSUSED */
4360 static void
4361 rfs4_op_rename(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4362     struct compound_state *cs)
4363 {
4364         RENAME4args *args = &argop->nfs_argop4_u.oprename;
4365         RENAME4res *resp = &resop->nfs_resop4_u.oprename;
4366         int error;
4367         vnode_t *odvp;
4368         vnode_t *ndvp;
4369         vnode_t *srcvp, *targvp, *tvp;
4370         struct vattr obdva, oidva, oadva;
4371         struct vattr nbdva, nidva, nadva;
4372         char *onm, *nnm;
4373         uint_t olen, nlen;
4374         rfs4_file_t *fp, *sfp;
4375         int in_crit_src, in_crit_targ;
4376         int fp_rele_grant_hold, sfp_rele_grant_hold;
4377         int unlinked;
4378         bslabel_t *clabel;
4379         struct sockaddr *ca;
4380         char *converted_onm = NULL;
4381         char *converted_nnm = NULL;
4382         nfsstat4 status;
4383 
4384         DTRACE_NFSV4_2(op__rename__start, struct compound_state *, cs,
4385             RENAME4args *, args);
4386 
4387         fp = sfp = NULL;
4388         srcvp = targvp = tvp = NULL;
4389         in_crit_src = in_crit_targ = 0;
4390         fp_rele_grant_hold = sfp_rele_grant_hold = 0;
4391         unlinked = 0;
4392 
4393         /* CURRENT_FH: target directory */
4394         ndvp = cs->vp;
4395         if (ndvp == NULL) {
4396                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4397                 goto out;
4398         }
4399 
4400         /* SAVED_FH: from directory */
4401         odvp = cs->saved_vp;
4402         if (odvp == NULL) {
4403                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4404                 goto out;
4405         }
4406 
4407         if (cs->access == CS_ACCESS_DENIED) {
4408                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4409                 goto out;
4410         }
4411 
4412         /*
4413          * If there is an unshared filesystem mounted on this vnode,
4414          * do not allow to rename objects in this directory.
4415          */
4416         if (vn_ismntpt(odvp)) {
4417                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4418                 goto out;
4419         }
4420 
4421         /*
4422          * If there is an unshared filesystem mounted on this vnode,
4423          * do not allow to rename to this directory.
4424          */
4425         if (vn_ismntpt(ndvp)) {
4426                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4427                 goto out;
4428         }
4429 
4430         if (odvp->v_type != VDIR || ndvp->v_type != VDIR) {
4431                 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
4432                 goto out;
4433         }
4434 
4435         if (cs->saved_exi != cs->exi) {
4436                 *cs->statusp = resp->status = NFS4ERR_XDEV;
4437                 goto out;
4438         }
4439 
4440         status = utf8_dir_verify(&args->oldname);
4441         if (status != NFS4_OK) {
4442                 *cs->statusp = resp->status = status;
4443                 goto out;
4444         }
4445 
4446         status = utf8_dir_verify(&args->newname);
4447         if (status != NFS4_OK) {
4448                 *cs->statusp = resp->status = status;
4449                 goto out;
4450         }
4451 
4452         onm = utf8_to_fn(&args->oldname, &olen, NULL);
4453         if (onm == NULL) {
4454                 *cs->statusp = resp->status = NFS4ERR_INVAL;
4455                 goto out;
4456         }
4457         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
4458         nlen = MAXPATHLEN + 1;
4459         converted_onm = nfscmd_convname(ca, cs->exi, onm, NFSCMD_CONV_INBOUND,
4460             nlen);
4461 
4462         if (converted_onm == NULL) {
4463                 *cs->statusp = resp->status = NFS4ERR_INVAL;
4464                 kmem_free(onm, olen);
4465                 goto out;
4466         }
4467 
4468         nnm = utf8_to_fn(&args->newname, &nlen, NULL);
4469         if (nnm == NULL) {
4470                 *cs->statusp = resp->status = NFS4ERR_INVAL;
4471                 if (onm != converted_onm)
4472                         kmem_free(converted_onm, MAXPATHLEN + 1);
4473                 kmem_free(onm, olen);
4474                 goto out;
4475         }
4476         converted_nnm = nfscmd_convname(ca, cs->exi, nnm, NFSCMD_CONV_INBOUND,
4477             MAXPATHLEN  + 1);
4478 
4479         if (converted_nnm == NULL) {
4480                 *cs->statusp = resp->status = NFS4ERR_INVAL;
4481                 kmem_free(nnm, nlen);
4482                 nnm = NULL;
4483                 if (onm != converted_onm)
4484                         kmem_free(converted_onm, MAXPATHLEN + 1);
4485                 kmem_free(onm, olen);
4486                 goto out;
4487         }
4488 
4489 
4490         if (olen > MAXNAMELEN || nlen > MAXNAMELEN) {
4491                 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
4492                 kmem_free(onm, olen);
4493                 kmem_free(nnm, nlen);
4494                 goto out;
4495         }
4496 
4497 
4498         if (rdonly4(req, cs)) {
4499                 *cs->statusp = resp->status = NFS4ERR_ROFS;
4500                 if (onm != converted_onm)
4501                         kmem_free(converted_onm, MAXPATHLEN + 1);
4502                 kmem_free(onm, olen);
4503                 if (nnm != converted_nnm)
4504                         kmem_free(converted_nnm, MAXPATHLEN + 1);
4505                 kmem_free(nnm, nlen);
4506                 goto out;
4507         }
4508 
4509         /* check label of the target dir */
4510         if (is_system_labeled()) {
4511                 ASSERT(req->rq_label != NULL);
4512                 clabel = req->rq_label;
4513                 DTRACE_PROBE2(tx__rfs4__log__info__oprename__clabel, char *,
4514                     "got client label from request(1)",
4515                     struct svc_req *, req);
4516                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
4517                         if (!do_rfs_label_check(clabel, ndvp,
4518                             EQUALITY_CHECK, cs->exi)) {
4519                                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4520                                 goto err_out;
4521                         }
4522                 }
4523         }
4524 
4525         /*
4526          * Is the source a file and have a delegation?
4527          * We don't need to acquire va_seq before these lookups, if
4528          * it causes an update, cinfo.before will not match, which will
4529          * trigger a cache flush even if atomic is TRUE.
4530          */
4531         if (sfp = rfs4_lookup_and_findfile(odvp, converted_onm, &srcvp,
4532             &error, cs->cr)) {
4533                 if (rfs4_check_delegated_byfp(FWRITE, sfp, TRUE, TRUE, TRUE,
4534                     NULL)) {
4535                         *cs->statusp = resp->status = NFS4ERR_DELAY;
4536                         goto err_out;
4537                 }
4538         }
4539 
4540         if (srcvp == NULL) {
4541                 *cs->statusp = resp->status = puterrno4(error);
4542                 if (onm != converted_onm)
4543                         kmem_free(converted_onm, MAXPATHLEN + 1);
4544                 kmem_free(onm, olen);
4545                 if (nnm != converted_nnm)
4546                         kmem_free(converted_nnm, MAXPATHLEN + 1);
4547                 kmem_free(nnm, nlen);
4548                 goto out;
4549         }
4550 
4551         sfp_rele_grant_hold = 1;
4552 
4553         /* Does the destination exist and a file and have a delegation? */
4554         if (fp = rfs4_lookup_and_findfile(ndvp, converted_nnm, &targvp,
4555             NULL, cs->cr)) {
4556                 if (rfs4_check_delegated_byfp(FWRITE, fp, TRUE, TRUE, TRUE,
4557                     NULL)) {
4558                         *cs->statusp = resp->status = NFS4ERR_DELAY;
4559                         goto err_out;
4560                 }
4561         }
4562         fp_rele_grant_hold = 1;
4563 
4564         /* Check for NBMAND lock on both source and target */
4565         if (nbl_need_check(srcvp)) {
4566                 nbl_start_crit(srcvp, RW_READER);
4567                 in_crit_src = 1;
4568                 if (nbl_conflict(srcvp, NBL_RENAME, 0, 0, 0, NULL)) {
4569                         *cs->statusp = resp->status = NFS4ERR_FILE_OPEN;
4570                         goto err_out;
4571                 }
4572         }
4573 
4574         if (targvp && nbl_need_check(targvp)) {
4575                 nbl_start_crit(targvp, RW_READER);
4576                 in_crit_targ = 1;
4577                 if (nbl_conflict(targvp, NBL_REMOVE, 0, 0, 0, NULL)) {
4578                         *cs->statusp = resp->status = NFS4ERR_FILE_OPEN;
4579                         goto err_out;
4580                 }
4581         }
4582 
4583         /* Get source "before" change value */
4584         obdva.va_mask = AT_CTIME|AT_SEQ;
4585         error = VOP_GETATTR(odvp, &obdva, 0, cs->cr, NULL);
4586         if (!error) {
4587                 nbdva.va_mask = AT_CTIME|AT_SEQ;
4588                 error = VOP_GETATTR(ndvp, &nbdva, 0, cs->cr, NULL);
4589         }
4590         if (error) {
4591                 *cs->statusp = resp->status = puterrno4(error);
4592                 goto err_out;
4593         }
4594 
4595         NFS4_SET_FATTR4_CHANGE(resp->source_cinfo.before, obdva.va_ctime)
4596         NFS4_SET_FATTR4_CHANGE(resp->target_cinfo.before, nbdva.va_ctime)
4597 
4598         error = VOP_RENAME(odvp, converted_onm, ndvp, converted_nnm, cs->cr,
4599             NULL, 0);
4600 
4601         /*
4602          * If target existed and was unlinked by VOP_RENAME, state will need
4603          * closed. To avoid deadlock, rfs4_close_all_state will be done after
4604          * any necessary nbl_end_crit on srcvp and tgtvp.
4605          */
4606         if (error == 0 && fp != NULL) {
4607                 rfs4_dbe_lock(fp->rf_dbe);
4608                 tvp = fp->rf_vp;
4609                 if (tvp)
4610                         VN_HOLD(tvp);
4611                 rfs4_dbe_unlock(fp->rf_dbe);
4612 
4613                 if (tvp) {
4614                         struct vattr va;
4615                         va.va_mask = AT_NLINK;
4616 
4617                         if (!VOP_GETATTR(tvp, &va, 0, cs->cr, NULL) &&
4618                             va.va_nlink == 0) {
4619                                 unlinked = 1;
4620 
4621                                 /* DEBUG data */
4622                                 if ((srcvp == targvp) || (tvp != targvp)) {
4623                                         cmn_err(CE_WARN, "rfs4_op_rename: "
4624                                             "srcvp %p, targvp: %p, tvp: %p",
4625                                             (void *)srcvp, (void *)targvp,
4626                                             (void *)tvp);
4627                                 }
4628                         } else {
4629                                 VN_RELE(tvp);
4630                         }
4631                 }
4632         }
4633         if (error == 0)
4634                 vn_renamepath(ndvp, srcvp, nnm, nlen - 1);
4635 
4636         if (in_crit_src)
4637                 nbl_end_crit(srcvp);
4638         if (srcvp)
4639                 VN_RELE(srcvp);
4640         if (in_crit_targ)
4641                 nbl_end_crit(targvp);
4642         if (targvp)
4643                 VN_RELE(targvp);
4644 
4645         if (unlinked) {
4646                 ASSERT(fp != NULL);
4647                 ASSERT(tvp != NULL);
4648 
4649                 /* DEBUG data */
4650                 if (RW_READ_HELD(&tvp->v_nbllock)) {
4651                         cmn_err(CE_WARN, "rfs4_op_rename: "
4652                             "RW_READ_HELD(%p)", (void *)tvp);
4653                 }
4654 
4655                 /* The file is gone and so should the state */
4656                 rfs4_close_all_state(fp);
4657                 VN_RELE(tvp);
4658         }
4659 
4660         if (sfp) {
4661                 rfs4_clear_dont_grant(sfp);
4662                 rfs4_file_rele(sfp);
4663         }
4664         if (fp) {
4665                 rfs4_clear_dont_grant(fp);
4666                 rfs4_file_rele(fp);
4667         }
4668 
4669         if (converted_onm != onm)
4670                 kmem_free(converted_onm, MAXPATHLEN + 1);
4671         kmem_free(onm, olen);
4672         if (converted_nnm != nnm)
4673                 kmem_free(converted_nnm, MAXPATHLEN + 1);
4674         kmem_free(nnm, nlen);
4675 
4676         /*
4677          * Get the initial "after" sequence number, if it fails, set to zero
4678          */
4679         oidva.va_mask = AT_SEQ;
4680         if (VOP_GETATTR(odvp, &oidva, 0, cs->cr, NULL))
4681                 oidva.va_seq = 0;
4682 
4683         nidva.va_mask = AT_SEQ;
4684         if (VOP_GETATTR(ndvp, &nidva, 0, cs->cr, NULL))
4685                 nidva.va_seq = 0;
4686 
4687         /*
4688          * Force modified data and metadata out to stable storage.
4689          */
4690         (void) VOP_FSYNC(odvp, 0, cs->cr, NULL);
4691         (void) VOP_FSYNC(ndvp, 0, cs->cr, NULL);
4692 
4693         if (error) {
4694                 *cs->statusp = resp->status = puterrno4(error);
4695                 goto out;
4696         }
4697 
4698         /*
4699          * Get "after" change values, if it fails, simply return the
4700          * before value.
4701          */
4702         oadva.va_mask = AT_CTIME|AT_SEQ;
4703         if (VOP_GETATTR(odvp, &oadva, 0, cs->cr, NULL)) {
4704                 oadva.va_ctime = obdva.va_ctime;
4705                 oadva.va_seq = 0;
4706         }
4707 
4708         nadva.va_mask = AT_CTIME|AT_SEQ;
4709         if (VOP_GETATTR(odvp, &nadva, 0, cs->cr, NULL)) {
4710                 nadva.va_ctime = nbdva.va_ctime;
4711                 nadva.va_seq = 0;
4712         }
4713 
4714         NFS4_SET_FATTR4_CHANGE(resp->source_cinfo.after, oadva.va_ctime)
4715         NFS4_SET_FATTR4_CHANGE(resp->target_cinfo.after, nadva.va_ctime)
4716 
4717         /*
4718          * The cinfo.atomic = TRUE only if we have
4719          * non-zero va_seq's, and it has incremented by exactly one
4720          * during the VOP_RENAME and it didn't change during the VOP_FSYNC.
4721          */
4722         if (obdva.va_seq && oidva.va_seq && oadva.va_seq &&
4723             oidva.va_seq == (obdva.va_seq + 1) && oidva.va_seq == oadva.va_seq)
4724                 resp->source_cinfo.atomic = TRUE;
4725         else
4726                 resp->source_cinfo.atomic = FALSE;
4727 
4728         if (nbdva.va_seq && nidva.va_seq && nadva.va_seq &&
4729             nidva.va_seq == (nbdva.va_seq + 1) && nidva.va_seq == nadva.va_seq)
4730                 resp->target_cinfo.atomic = TRUE;
4731         else
4732                 resp->target_cinfo.atomic = FALSE;
4733 
4734 #ifdef  VOLATILE_FH_TEST
4735         {
4736         extern void add_volrnm_fh(struct exportinfo *, vnode_t *);
4737 
4738         /*
4739          * Add the renamed file handle to the volatile rename list
4740          */
4741         if (cs->exi->exi_export.ex_flags & EX_VOLRNM) {
4742                 /* file handles may expire on rename */
4743                 vnode_t *vp;
4744 
4745                 nnm = utf8_to_fn(&args->newname, &nlen, NULL);
4746                 /*
4747                  * Already know that nnm will be a valid string
4748                  */
4749                 error = VOP_LOOKUP(ndvp, nnm, &vp, NULL, 0, NULL, cs->cr,
4750                     NULL, NULL, NULL);
4751                 kmem_free(nnm, nlen);
4752                 if (!error) {
4753                         add_volrnm_fh(cs->exi, vp);
4754                         VN_RELE(vp);
4755                 }
4756         }
4757         }
4758 #endif  /* VOLATILE_FH_TEST */
4759 
4760         *cs->statusp = resp->status = NFS4_OK;
4761 out:
4762         DTRACE_NFSV4_2(op__rename__done, struct compound_state *, cs,
4763             RENAME4res *, resp);
4764         return;
4765 
4766 err_out:
4767         if (onm != converted_onm)
4768                 kmem_free(converted_onm, MAXPATHLEN + 1);
4769         if (onm != NULL)
4770                 kmem_free(onm, olen);
4771         if (nnm != converted_nnm)
4772                 kmem_free(converted_nnm, MAXPATHLEN + 1);
4773         if (nnm != NULL)
4774                 kmem_free(nnm, nlen);
4775 
4776         if (in_crit_src) nbl_end_crit(srcvp);
4777         if (in_crit_targ) nbl_end_crit(targvp);
4778         if (targvp) VN_RELE(targvp);
4779         if (srcvp) VN_RELE(srcvp);
4780         if (sfp) {
4781                 if (sfp_rele_grant_hold) rfs4_clear_dont_grant(sfp);
4782                 rfs4_file_rele(sfp);
4783         }
4784         if (fp) {
4785                 if (fp_rele_grant_hold) rfs4_clear_dont_grant(fp);
4786                 rfs4_file_rele(fp);
4787         }
4788 
4789         DTRACE_NFSV4_2(op__rename__done, struct compound_state *, cs,
4790             RENAME4res *, resp);
4791 }
4792 
4793 /* ARGSUSED */
4794 static void
4795 rfs4_op_renew(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4796     struct compound_state *cs)
4797 {
4798         RENEW4args *args = &argop->nfs_argop4_u.oprenew;
4799         RENEW4res *resp = &resop->nfs_resop4_u.oprenew;
4800         rfs4_client_t *cp;
4801 
4802         DTRACE_NFSV4_2(op__renew__start, struct compound_state *, cs,
4803             RENEW4args *, args);
4804 
4805         if ((cp = rfs4_findclient_by_id(args->clientid, FALSE)) == NULL) {
4806                 *cs->statusp = resp->status =
4807                     rfs4_check_clientid(&args->clientid, 0);
4808                 goto out;
4809         }
4810 
4811         if (rfs4_lease_expired(cp)) {
4812                 rfs4_client_rele(cp);
4813                 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
4814                 goto out;
4815         }
4816 
4817         rfs4_update_lease(cp);
4818 
4819         mutex_enter(cp->rc_cbinfo.cb_lock);
4820         if (cp->rc_cbinfo.cb_notified_of_cb_path_down == FALSE) {
4821                 cp->rc_cbinfo.cb_notified_of_cb_path_down = TRUE;
4822                 *cs->statusp = resp->status = NFS4ERR_CB_PATH_DOWN;
4823         } else {
4824                 *cs->statusp = resp->status = NFS4_OK;
4825         }
4826         mutex_exit(cp->rc_cbinfo.cb_lock);
4827 
4828         rfs4_client_rele(cp);
4829 
4830 out:
4831         DTRACE_NFSV4_2(op__renew__done, struct compound_state *, cs,
4832             RENEW4res *, resp);
4833 }
4834 
4835 /* ARGSUSED */
4836 static void
4837 rfs4_op_restorefh(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req,
4838     struct compound_state *cs)
4839 {
4840         RESTOREFH4res *resp = &resop->nfs_resop4_u.oprestorefh;
4841 
4842         DTRACE_NFSV4_1(op__restorefh__start, struct compound_state *, cs);
4843 
4844         /* No need to check cs->access - we are not accessing any object */
4845         if ((cs->saved_vp == NULL) || (cs->saved_fh.nfs_fh4_val == NULL)) {
4846                 *cs->statusp = resp->status = NFS4ERR_RESTOREFH;
4847                 goto out;
4848         }
4849         if (cs->vp != NULL) {
4850                 VN_RELE(cs->vp);
4851         }
4852         cs->vp = cs->saved_vp;
4853         cs->saved_vp = NULL;
4854         cs->exi = cs->saved_exi;
4855         nfs_fh4_copy(&cs->saved_fh, &cs->fh);
4856         *cs->statusp = resp->status = NFS4_OK;
4857         cs->deleg = FALSE;
4858 
4859 out:
4860         DTRACE_NFSV4_2(op__restorefh__done, struct compound_state *, cs,
4861             RESTOREFH4res *, resp);
4862 }
4863 
4864 /* ARGSUSED */
4865 static void
4866 rfs4_op_savefh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4867     struct compound_state *cs)
4868 {
4869         SAVEFH4res *resp = &resop->nfs_resop4_u.opsavefh;
4870 
4871         DTRACE_NFSV4_1(op__savefh__start, struct compound_state *, cs);
4872 
4873         /* No need to check cs->access - we are not accessing any object */
4874         if (cs->vp == NULL) {
4875                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4876                 goto out;
4877         }
4878         if (cs->saved_vp != NULL) {
4879                 VN_RELE(cs->saved_vp);
4880         }
4881         cs->saved_vp = cs->vp;
4882         VN_HOLD(cs->saved_vp);
4883         cs->saved_exi = cs->exi;
4884         /*
4885          * since SAVEFH is fairly rare, don't alloc space for its fh
4886          * unless necessary.
4887          */
4888         if (cs->saved_fh.nfs_fh4_val == NULL) {
4889                 cs->saved_fh.nfs_fh4_val = kmem_alloc(NFS4_FHSIZE, KM_SLEEP);
4890         }
4891         nfs_fh4_copy(&cs->fh, &cs->saved_fh);
4892         *cs->statusp = resp->status = NFS4_OK;
4893 
4894 out:
4895         DTRACE_NFSV4_2(op__savefh__done, struct compound_state *, cs,
4896             SAVEFH4res *, resp);
4897 }
4898 
4899 /*
4900  * rfs4_verify_attr is called when nfsv4 Setattr failed, but we wish to
4901  * return the bitmap of attrs that were set successfully. It is also
4902  * called by Verify/Nverify to test the vattr/vfsstat attrs. It should
4903  * always be called only after rfs4_do_set_attrs().
4904  *
4905  * Verify that the attributes are same as the expected ones. sargp->vap
4906  * and sargp->sbp contain the input attributes as translated from fattr4.
4907  *
4908  * This function verifies only the attrs that correspond to a vattr or
4909  * vfsstat struct. That is because of the extra step needed to get the
4910  * corresponding system structs. Other attributes have already been set or
4911  * verified by do_rfs4_set_attrs.
4912  *
4913  * Return 0 if all attrs match, -1 if some don't, error if error processing.
4914  */
4915 static int
4916 rfs4_verify_attr(struct nfs4_svgetit_arg *sargp,
4917     bitmap4 *resp, struct nfs4_ntov_table *ntovp)
4918 {
4919         int error, ret_error = 0;
4920         int i, k;
4921         uint_t sva_mask = sargp->vap->va_mask;
4922         uint_t vbit;
4923         union nfs4_attr_u *na;
4924         uint8_t *amap;
4925         bool_t getsb = ntovp->vfsstat;
4926 
4927         if (sva_mask != 0) {
4928                 /*
4929                  * Okay to overwrite sargp->vap because we verify based
4930                  * on the incoming values.
4931                  */
4932                 ret_error = VOP_GETATTR(sargp->cs->vp, sargp->vap, 0,
4933                     sargp->cs->cr, NULL);
4934                 if (ret_error) {
4935                         if (resp == NULL)
4936                                 return (ret_error);
4937                         /*
4938                          * Must return bitmap of successful attrs
4939                          */
4940                         sva_mask = 0;   /* to prevent checking vap later */
4941                 } else {
4942                         /*
4943                          * Some file systems clobber va_mask. it is probably
4944                          * wrong of them to do so, nonethless we practice
4945                          * defensive coding.
4946                          * See bug id 4276830.
4947                          */
4948                         sargp->vap->va_mask = sva_mask;
4949                 }
4950         }
4951 
4952         if (getsb) {
4953                 /*
4954                  * Now get the superblock and loop on the bitmap, as there is
4955                  * no simple way of translating from superblock to bitmap4.
4956                  */
4957                 ret_error = VFS_STATVFS(sargp->cs->vp->v_vfsp, sargp->sbp);
4958                 if (ret_error) {
4959                         if (resp == NULL)
4960                                 goto errout;
4961                         getsb = FALSE;
4962                 }
4963         }
4964 
4965         /*
4966          * Now loop and verify each attribute which getattr returned
4967          * whether it's the same as the input.
4968          */
4969         if (resp == NULL && !getsb && (sva_mask == 0))
4970                 goto errout;
4971 
4972         na = ntovp->na;
4973         amap = ntovp->amap;
4974         k = 0;
4975         for (i = 0; i < ntovp->attrcnt; i++, na++, amap++) {
4976                 k = *amap;
4977                 ASSERT(nfs4_ntov_map[k].nval == k);
4978                 vbit = nfs4_ntov_map[k].vbit;
4979 
4980                 /*
4981                  * If vattr attribute but VOP_GETATTR failed, or it's
4982                  * superblock attribute but VFS_STATVFS failed, skip
4983                  */
4984                 if (vbit) {
4985                         if ((vbit & sva_mask) == 0)
4986                                 continue;
4987                 } else if (!(getsb && nfs4_ntov_map[k].vfsstat)) {
4988                         continue;
4989                 }
4990                 error = (*nfs4_ntov_map[k].sv_getit)(NFS4ATTR_VERIT, sargp, na);
4991                 if (resp != NULL) {
4992                         if (error)
4993                                 ret_error = -1; /* not all match */
4994                         else    /* update response bitmap */
4995                                 *resp |= nfs4_ntov_map[k].fbit;
4996                         continue;
4997                 }
4998                 if (error) {
4999                         ret_error = -1; /* not all match */
5000                         break;
5001                 }
5002         }
5003 errout:
5004         return (ret_error);
5005 }
5006 
5007 /*
5008  * Decode the attribute to be set/verified. If the attr requires a sys op
5009  * (VOP_GETATTR, VFS_VFSSTAT), and the request is to verify, then don't
5010  * call the sv_getit function for it, because the sys op hasn't yet been done.
5011  * Return 0 for success, error code if failed.
5012  *
5013  * Note: the decoded arg is not freed here but in nfs4_ntov_table_free.
5014  */
5015 static int
5016 decode_fattr4_attr(nfs4_attr_cmd_t cmd, struct nfs4_svgetit_arg *sargp,
5017     int k, XDR *xdrp, bitmap4 *resp_bval, union nfs4_attr_u *nap)
5018 {
5019         int error = 0;
5020         bool_t set_later;
5021 
5022         sargp->vap->va_mask |= nfs4_ntov_map[k].vbit;
5023 
5024         if ((*nfs4_ntov_map[k].xfunc)(xdrp, nap)) {
5025                 set_later = nfs4_ntov_map[k].vbit || nfs4_ntov_map[k].vfsstat;
5026                 /*
5027                  * don't verify yet if a vattr or sb dependent attr,
5028                  * because we don't have their sys values yet.
5029                  * Will be done later.
5030                  */
5031                 if (! (set_later && (cmd == NFS4ATTR_VERIT))) {
5032                         /*
5033                          * ACLs are a special case, since setting the MODE
5034                          * conflicts with setting the ACL.  We delay setting
5035                          * the ACL until all other attributes have been set.
5036                          * The ACL gets set in do_rfs4_op_setattr().
5037                          */
5038                         if (nfs4_ntov_map[k].fbit != FATTR4_ACL_MASK) {
5039                                 error = (*nfs4_ntov_map[k].sv_getit)(cmd,
5040                                     sargp, nap);
5041                                 if (error) {
5042                                         xdr_free(nfs4_ntov_map[k].xfunc,
5043                                             (caddr_t)nap);
5044                                 }
5045                         }
5046                 }
5047         } else {
5048 #ifdef  DEBUG
5049                 cmn_err(CE_NOTE, "decode_fattr4_attr: error "
5050                     "decoding attribute %d\n", k);
5051 #endif
5052                 error = EINVAL;
5053         }
5054         if (!error && resp_bval && !set_later) {
5055                 *resp_bval |= nfs4_ntov_map[k].fbit;
5056         }
5057 
5058         return (error);
5059 }
5060 
5061 /*
5062  * Set vattr based on incoming fattr4 attrs - used by setattr.
5063  * Set response mask. Ignore any values that are not writable vattr attrs.
5064  */
5065 static nfsstat4
5066 do_rfs4_set_attrs(bitmap4 *resp, fattr4 *fattrp, struct compound_state *cs,
5067     struct nfs4_svgetit_arg *sargp, struct nfs4_ntov_table *ntovp,
5068     nfs4_attr_cmd_t cmd)
5069 {
5070         int error = 0;
5071         int i;
5072         char *attrs = fattrp->attrlist4;
5073         uint32_t attrslen = fattrp->attrlist4_len;
5074         XDR xdr;
5075         nfsstat4 status = NFS4_OK;
5076         vnode_t *vp = cs->vp;
5077         union nfs4_attr_u *na;
5078         uint8_t *amap;
5079 
5080 #ifndef lint
5081         /*
5082          * Make sure that maximum attribute number can be expressed as an
5083          * 8 bit quantity.
5084          */
5085         ASSERT(NFS4_MAXNUM_ATTRS <= (UINT8_MAX + 1));
5086 #endif
5087 
5088         if (vp == NULL) {
5089                 if (resp)
5090                         *resp = 0;
5091                 return (NFS4ERR_NOFILEHANDLE);
5092         }
5093         if (cs->access == CS_ACCESS_DENIED) {
5094                 if (resp)
5095                         *resp = 0;
5096                 return (NFS4ERR_ACCESS);
5097         }
5098 
5099         sargp->op = cmd;
5100         sargp->cs = cs;
5101         sargp->flag = 0;     /* may be set later */
5102         sargp->vap->va_mask = 0;
5103         sargp->rdattr_error = NFS4_OK;
5104         sargp->rdattr_error_req = FALSE;
5105         /* sargp->sbp is set by the caller */
5106 
5107         xdrmem_create(&xdr, attrs, attrslen, XDR_DECODE);
5108 
5109         na = ntovp->na;
5110         amap = ntovp->amap;
5111 
5112         /*
5113          * The following loop iterates on the nfs4_ntov_map checking
5114          * if the fbit is set in the requested bitmap.
5115          * If set then we process the arguments using the
5116          * rfs4_fattr4 conversion functions to populate the setattr
5117          * vattr and va_mask. Any settable attrs that are not using vattr
5118          * will be set in this loop.
5119          */
5120         for (i = 0; i < nfs4_ntov_map_size; i++) {
5121                 if (!(fattrp->attrmask & nfs4_ntov_map[i].fbit)) {
5122                         continue;
5123                 }
5124                 /*
5125                  * If setattr, must be a writable attr.
5126                  * If verify/nverify, must be a readable attr.
5127                  */
5128                 if ((error = (*nfs4_ntov_map[i].sv_getit)(
5129                     NFS4ATTR_SUPPORTED, sargp, NULL)) != 0) {
5130                         /*
5131                          * Client tries to set/verify an
5132                          * unsupported attribute, tries to set
5133                          * a read only attr or verify a write
5134                          * only one - error!
5135                          */
5136                         break;
5137                 }
5138                 /*
5139                  * Decode the attribute to set/verify
5140                  */
5141                 error = decode_fattr4_attr(cmd, sargp, nfs4_ntov_map[i].nval,
5142                     &xdr, resp ? resp : NULL, na);
5143                 if (error)
5144                         break;
5145                 *amap++ = (uint8_t)nfs4_ntov_map[i].nval;
5146                 na++;
5147                 (ntovp->attrcnt)++;
5148                 if (nfs4_ntov_map[i].vfsstat)
5149                         ntovp->vfsstat = TRUE;
5150         }
5151 
5152         if (error != 0)
5153                 status = (error == ENOTSUP ? NFS4ERR_ATTRNOTSUPP :
5154                     puterrno4(error));
5155         /* xdrmem_destroy(&xdrs); */        /* NO-OP */
5156         return (status);
5157 }
5158 
5159 static nfsstat4
5160 do_rfs4_op_setattr(bitmap4 *resp, fattr4 *fattrp, struct compound_state *cs,
5161     stateid4 *stateid)
5162 {
5163         int error = 0;
5164         struct nfs4_svgetit_arg sarg;
5165         bool_t trunc;
5166 
5167         nfsstat4 status = NFS4_OK;
5168         cred_t *cr = cs->cr;
5169         vnode_t *vp = cs->vp;
5170         struct nfs4_ntov_table ntov;
5171         struct statvfs64 sb;
5172         struct vattr bva;
5173         struct flock64 bf;
5174         int in_crit = 0;
5175         uint_t saved_mask = 0;
5176         caller_context_t ct;
5177 
5178         *resp = 0;
5179         sarg.sbp = &sb;
5180         sarg.is_referral = B_FALSE;
5181         nfs4_ntov_table_init(&ntov);
5182         status = do_rfs4_set_attrs(resp, fattrp, cs, &sarg, &ntov,
5183             NFS4ATTR_SETIT);
5184         if (status != NFS4_OK) {
5185                 /*
5186                  * failed set attrs
5187                  */
5188                 goto done;
5189         }
5190         if ((sarg.vap->va_mask == 0) &&
5191             (! (fattrp->attrmask & FATTR4_ACL_MASK))) {
5192                 /*
5193                  * no further work to be done
5194                  */
5195                 goto done;
5196         }
5197 
5198         /*
5199          * If we got a request to set the ACL and the MODE, only
5200          * allow changing VSUID, VSGID, and VSVTX.  Attempting
5201          * to change any other bits, along with setting an ACL,
5202          * gives NFS4ERR_INVAL.
5203          */
5204         if ((fattrp->attrmask & FATTR4_ACL_MASK) &&
5205             (fattrp->attrmask & FATTR4_MODE_MASK)) {
5206                 vattr_t va;
5207 
5208                 va.va_mask = AT_MODE;
5209                 error = VOP_GETATTR(vp, &va, 0, cs->cr, NULL);
5210                 if (error) {
5211                         status = puterrno4(error);
5212                         goto done;
5213                 }
5214                 if ((sarg.vap->va_mode ^ va.va_mode) &
5215                     ~(VSUID | VSGID | VSVTX)) {
5216                         status = NFS4ERR_INVAL;
5217                         goto done;
5218                 }
5219         }
5220 
5221         /* Check stateid only if size has been set */
5222         if (sarg.vap->va_mask & AT_SIZE) {
5223                 trunc = (sarg.vap->va_size == 0);
5224                 status = rfs4_check_stateid(FWRITE, cs->vp, stateid,
5225                     trunc, &cs->deleg, sarg.vap->va_mask & AT_SIZE, &ct);
5226                 if (status != NFS4_OK)
5227                         goto done;
5228         } else {
5229                 ct.cc_sysid = 0;
5230                 ct.cc_pid = 0;
5231                 ct.cc_caller_id = nfs4_srv_caller_id;
5232                 ct.cc_flags = CC_DONTBLOCK;
5233         }
5234 
5235         /* XXX start of possible race with delegations */
5236 
5237         /*
5238          * We need to specially handle size changes because it is
5239          * possible for the client to create a file with read-only
5240          * modes, but with the file opened for writing. If the client
5241          * then tries to set the file size, e.g. ftruncate(3C),
5242          * fcntl(F_FREESP), the normal access checking done in
5243          * VOP_SETATTR would prevent the client from doing it even though
5244          * it should be allowed to do so.  To get around this, we do the
5245          * access checking for ourselves and use VOP_SPACE which doesn't
5246          * do the access checking.
5247          * Also the client should not be allowed to change the file
5248          * size if there is a conflicting non-blocking mandatory lock in
5249          * the region of the change.
5250          */
5251         if (vp->v_type == VREG && (sarg.vap->va_mask & AT_SIZE)) {
5252                 u_offset_t offset;
5253                 ssize_t length;
5254 
5255                 /*
5256                  * ufs_setattr clears AT_SIZE from vap->va_mask, but
5257                  * before returning, sarg.vap->va_mask is used to
5258                  * generate the setattr reply bitmap.  We also clear
5259                  * AT_SIZE below before calling VOP_SPACE.  For both
5260                  * of these cases, the va_mask needs to be saved here
5261                  * and restored after calling VOP_SETATTR.
5262                  */
5263                 saved_mask = sarg.vap->va_mask;
5264 
5265                 /*
5266                  * Check any possible conflict due to NBMAND locks.
5267                  * Get into critical region before VOP_GETATTR, so the
5268                  * size attribute is valid when checking conflicts.
5269                  */
5270                 if (nbl_need_check(vp)) {
5271                         nbl_start_crit(vp, RW_READER);
5272                         in_crit = 1;
5273                 }
5274 
5275                 bva.va_mask = AT_UID|AT_SIZE;
5276                 if (error = VOP_GETATTR(vp, &bva, 0, cr, &ct)) {
5277                         status = puterrno4(error);
5278                         goto done;
5279                 }
5280 
5281                 if (in_crit) {
5282                         if (sarg.vap->va_size < bva.va_size) {
5283                                 offset = sarg.vap->va_size;
5284                                 length = bva.va_size - sarg.vap->va_size;
5285                         } else {
5286                                 offset = bva.va_size;
5287                                 length = sarg.vap->va_size - bva.va_size;
5288                         }
5289                         if (nbl_conflict(vp, NBL_WRITE, offset, length, 0,
5290                             &ct)) {
5291                                 status = NFS4ERR_LOCKED;
5292                                 goto done;
5293                         }
5294                 }
5295 
5296                 if (crgetuid(cr) == bva.va_uid) {
5297                         sarg.vap->va_mask &= ~AT_SIZE;
5298                         bf.l_type = F_WRLCK;
5299                         bf.l_whence = 0;
5300                         bf.l_start = (off64_t)sarg.vap->va_size;
5301                         bf.l_len = 0;
5302                         bf.l_sysid = 0;
5303                         bf.l_pid = 0;
5304                         error = VOP_SPACE(vp, F_FREESP, &bf, FWRITE,
5305                             (offset_t)sarg.vap->va_size, cr, &ct);
5306                 }
5307         }
5308 
5309         if (!error && sarg.vap->va_mask != 0)
5310                 error = VOP_SETATTR(vp, sarg.vap, sarg.flag, cr, &ct);
5311 
5312         /* restore va_mask -- ufs_setattr clears AT_SIZE */
5313         if (saved_mask & AT_SIZE)
5314                 sarg.vap->va_mask |= AT_SIZE;
5315 
5316         /*
5317          * If an ACL was being set, it has been delayed until now,
5318          * in order to set the mode (via the VOP_SETATTR() above) first.
5319          */
5320         if ((! error) && (fattrp->attrmask & FATTR4_ACL_MASK)) {
5321                 int i;
5322 
5323                 for (i = 0; i < NFS4_MAXNUM_ATTRS; i++)
5324                         if (ntov.amap[i] == FATTR4_ACL)
5325                                 break;
5326                 if (i < NFS4_MAXNUM_ATTRS) {
5327                         error = (*nfs4_ntov_map[FATTR4_ACL].sv_getit)(
5328                             NFS4ATTR_SETIT, &sarg, &ntov.na[i]);
5329                         if (error == 0) {
5330                                 *resp |= FATTR4_ACL_MASK;
5331                         } else if (error == ENOTSUP) {
5332                                 (void) rfs4_verify_attr(&sarg, resp, &ntov);
5333                                 status = NFS4ERR_ATTRNOTSUPP;
5334                                 goto done;
5335                         }
5336                 } else {
5337                         NFS4_DEBUG(rfs4_debug,
5338                             (CE_NOTE, "do_rfs4_op_setattr: "
5339                             "unable to find ACL in fattr4"));
5340                         error = EINVAL;
5341                 }
5342         }
5343 
5344         if (error) {
5345                 /* check if a monitor detected a delegation conflict */
5346                 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK))
5347                         status = NFS4ERR_DELAY;
5348                 else
5349                         status = puterrno4(error);
5350 
5351                 /*
5352                  * Set the response bitmap when setattr failed.
5353                  * If VOP_SETATTR partially succeeded, test by doing a
5354                  * VOP_GETATTR on the object and comparing the data
5355                  * to the setattr arguments.
5356                  */
5357                 (void) rfs4_verify_attr(&sarg, resp, &ntov);
5358         } else {
5359                 /*
5360                  * Force modified metadata out to stable storage.
5361                  */
5362                 (void) VOP_FSYNC(vp, FNODSYNC, cr, &ct);
5363                 /*
5364                  * Set response bitmap
5365                  */
5366                 nfs4_vmask_to_nmask_set(sarg.vap->va_mask, resp);
5367         }
5368 
5369 /* Return early and already have a NFSv4 error */
5370 done:
5371         /*
5372          * Except for nfs4_vmask_to_nmask_set(), vattr --> fattr
5373          * conversion sets both readable and writeable NFS4 attrs
5374          * for AT_MTIME and AT_ATIME.  The line below masks out
5375          * unrequested attrs from the setattr result bitmap.  This
5376          * is placed after the done: label to catch the ATTRNOTSUP
5377          * case.
5378          */
5379         *resp &= fattrp->attrmask;
5380 
5381         if (in_crit)
5382                 nbl_end_crit(vp);
5383 
5384         nfs4_ntov_table_free(&ntov, &sarg);
5385 
5386         return (status);
5387 }
5388 
5389 /* ARGSUSED */
5390 static void
5391 rfs4_op_setattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5392     struct compound_state *cs)
5393 {
5394         SETATTR4args *args = &argop->nfs_argop4_u.opsetattr;
5395         SETATTR4res *resp = &resop->nfs_resop4_u.opsetattr;
5396         bslabel_t *clabel;
5397 
5398         DTRACE_NFSV4_2(op__setattr__start, struct compound_state *, cs,
5399             SETATTR4args *, args);
5400 
5401         if (cs->vp == NULL) {
5402                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5403                 goto out;
5404         }
5405 
5406         /*
5407          * If there is an unshared filesystem mounted on this vnode,
5408          * do not allow to setattr on this vnode.
5409          */
5410         if (vn_ismntpt(cs->vp)) {
5411                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
5412                 goto out;
5413         }
5414 
5415         resp->attrsset = 0;
5416 
5417         if (rdonly4(req, cs)) {
5418                 *cs->statusp = resp->status = NFS4ERR_ROFS;
5419                 goto out;
5420         }
5421 
5422         /* check label before setting attributes */
5423         if (is_system_labeled()) {
5424                 ASSERT(req->rq_label != NULL);
5425                 clabel = req->rq_label;
5426                 DTRACE_PROBE2(tx__rfs4__log__info__opsetattr__clabel, char *,
5427                     "got client label from request(1)",
5428                     struct svc_req *, req);
5429                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
5430                         if (!do_rfs_label_check(clabel, cs->vp,
5431                             EQUALITY_CHECK, cs->exi)) {
5432                                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
5433                                 goto out;
5434                         }
5435                 }
5436         }
5437 
5438         *cs->statusp = resp->status =
5439             do_rfs4_op_setattr(&resp->attrsset, &args->obj_attributes, cs,
5440             &args->stateid);
5441 
5442 out:
5443         DTRACE_NFSV4_2(op__setattr__done, struct compound_state *, cs,
5444             SETATTR4res *, resp);
5445 }
5446 
5447 /* ARGSUSED */
5448 static void
5449 rfs4_op_verify(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5450     struct compound_state *cs)
5451 {
5452         /*
5453          * verify and nverify are exactly the same, except that nverify
5454          * succeeds when some argument changed, and verify succeeds when
5455          * when none changed.
5456          */
5457 
5458         VERIFY4args  *args = &argop->nfs_argop4_u.opverify;
5459         VERIFY4res *resp = &resop->nfs_resop4_u.opverify;
5460 
5461         int error;
5462         struct nfs4_svgetit_arg sarg;
5463         struct statvfs64 sb;
5464         struct nfs4_ntov_table ntov;
5465 
5466         DTRACE_NFSV4_2(op__verify__start, struct compound_state *, cs,
5467             VERIFY4args *, args);
5468 
5469         if (cs->vp == NULL) {
5470                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5471                 goto out;
5472         }
5473 
5474         sarg.sbp = &sb;
5475         sarg.is_referral = B_FALSE;
5476         nfs4_ntov_table_init(&ntov);
5477         resp->status = do_rfs4_set_attrs(NULL, &args->obj_attributes, cs,
5478             &sarg, &ntov, NFS4ATTR_VERIT);
5479         if (resp->status != NFS4_OK) {
5480                 /*
5481                  * do_rfs4_set_attrs will try to verify systemwide attrs,
5482                  * so could return -1 for "no match".
5483                  */
5484                 if (resp->status == -1)
5485                         resp->status = NFS4ERR_NOT_SAME;
5486                 goto done;
5487         }
5488         error = rfs4_verify_attr(&sarg, NULL, &ntov);
5489         switch (error) {
5490         case 0:
5491                 resp->status = NFS4_OK;
5492                 break;
5493         case -1:
5494                 resp->status = NFS4ERR_NOT_SAME;
5495                 break;
5496         default:
5497                 resp->status = puterrno4(error);
5498                 break;
5499         }
5500 done:
5501         *cs->statusp = resp->status;
5502         nfs4_ntov_table_free(&ntov, &sarg);
5503 out:
5504         DTRACE_NFSV4_2(op__verify__done, struct compound_state *, cs,
5505             VERIFY4res *, resp);
5506 }
5507 
5508 /* ARGSUSED */
5509 static void
5510 rfs4_op_nverify(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5511     struct compound_state *cs)
5512 {
5513         /*
5514          * verify and nverify are exactly the same, except that nverify
5515          * succeeds when some argument changed, and verify succeeds when
5516          * when none changed.
5517          */
5518 
5519         NVERIFY4args  *args = &argop->nfs_argop4_u.opnverify;
5520         NVERIFY4res *resp = &resop->nfs_resop4_u.opnverify;
5521 
5522         int error;
5523         struct nfs4_svgetit_arg sarg;
5524         struct statvfs64 sb;
5525         struct nfs4_ntov_table ntov;
5526 
5527         DTRACE_NFSV4_2(op__nverify__start, struct compound_state *, cs,
5528             NVERIFY4args *, args);
5529 
5530         if (cs->vp == NULL) {
5531                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5532                 DTRACE_NFSV4_2(op__nverify__done, struct compound_state *, cs,
5533                     NVERIFY4res *, resp);
5534                 return;
5535         }
5536         sarg.sbp = &sb;
5537         sarg.is_referral = B_FALSE;
5538         nfs4_ntov_table_init(&ntov);
5539         resp->status = do_rfs4_set_attrs(NULL, &args->obj_attributes, cs,
5540             &sarg, &ntov, NFS4ATTR_VERIT);
5541         if (resp->status != NFS4_OK) {
5542                 /*
5543                  * do_rfs4_set_attrs will try to verify systemwide attrs,
5544                  * so could return -1 for "no match".
5545                  */
5546                 if (resp->status == -1)
5547                         resp->status = NFS4_OK;
5548                 goto done;
5549         }
5550         error = rfs4_verify_attr(&sarg, NULL, &ntov);
5551         switch (error) {
5552         case 0:
5553                 resp->status = NFS4ERR_SAME;
5554                 break;
5555         case -1:
5556                 resp->status = NFS4_OK;
5557                 break;
5558         default:
5559                 resp->status = puterrno4(error);
5560                 break;
5561         }
5562 done:
5563         *cs->statusp = resp->status;
5564         nfs4_ntov_table_free(&ntov, &sarg);
5565 
5566         DTRACE_NFSV4_2(op__nverify__done, struct compound_state *, cs,
5567             NVERIFY4res *, resp);
5568 }
5569 
5570 /*
5571  * XXX - This should live in an NFS header file.
5572  */
5573 #define MAX_IOVECS      12
5574 
5575 /* ARGSUSED */
5576 static void
5577 rfs4_op_write(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5578     struct compound_state *cs)
5579 {
5580         WRITE4args *args = &argop->nfs_argop4_u.opwrite;
5581         WRITE4res *resp = &resop->nfs_resop4_u.opwrite;
5582         int error;
5583         vnode_t *vp;
5584         struct vattr bva;
5585         u_offset_t rlimit;
5586         struct uio uio;
5587         struct iovec iov[MAX_IOVECS];
5588         struct iovec *iovp;
5589         int iovcnt;
5590         int ioflag;
5591         cred_t *savecred, *cr;
5592         bool_t *deleg = &cs->deleg;
5593         nfsstat4 stat;
5594         int in_crit = 0;
5595         caller_context_t ct;
5596         nfs4_srv_t *nsrv4;
5597 
5598         DTRACE_NFSV4_2(op__write__start, struct compound_state *, cs,
5599             WRITE4args *, args);
5600 
5601         vp = cs->vp;
5602         if (vp == NULL) {
5603                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5604                 goto out;
5605         }
5606         if (cs->access == CS_ACCESS_DENIED) {
5607                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
5608                 goto out;
5609         }
5610 
5611         cr = cs->cr;
5612 
5613         if ((stat = rfs4_check_stateid(FWRITE, vp, &args->stateid, FALSE,
5614             deleg, TRUE, &ct)) != NFS4_OK) {
5615                 *cs->statusp = resp->status = stat;
5616                 goto out;
5617         }
5618 
5619         /*
5620          * We have to enter the critical region before calling VOP_RWLOCK
5621          * to avoid a deadlock with ufs.
5622          */
5623         if (nbl_need_check(vp)) {
5624                 nbl_start_crit(vp, RW_READER);
5625                 in_crit = 1;
5626                 if (nbl_conflict(vp, NBL_WRITE,
5627                     args->offset, args->data_len, 0, &ct)) {
5628                         *cs->statusp = resp->status = NFS4ERR_LOCKED;
5629                         goto out;
5630                 }
5631         }
5632 
5633         bva.va_mask = AT_MODE | AT_UID;
5634         error = VOP_GETATTR(vp, &bva, 0, cr, &ct);
5635 
5636         /*
5637          * If we can't get the attributes, then we can't do the
5638          * right access checking.  So, we'll fail the request.
5639          */
5640         if (error) {
5641                 *cs->statusp = resp->status = puterrno4(error);
5642                 goto out;
5643         }
5644 
5645         if (rdonly4(req, cs)) {
5646                 *cs->statusp = resp->status = NFS4ERR_ROFS;
5647                 goto out;
5648         }
5649 
5650         if (vp->v_type != VREG) {
5651                 *cs->statusp = resp->status =
5652                     ((vp->v_type == VDIR) ? NFS4ERR_ISDIR : NFS4ERR_INVAL);
5653                 goto out;
5654         }
5655 
5656         if (crgetuid(cr) != bva.va_uid &&
5657             (error = VOP_ACCESS(vp, VWRITE, 0, cr, &ct))) {
5658                 *cs->statusp = resp->status = puterrno4(error);
5659                 goto out;
5660         }
5661 
5662         if (MANDLOCK(vp, bva.va_mode)) {
5663                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
5664                 goto out;
5665         }
5666 
5667         nsrv4 = zone_getspecific(rfs4_zone_key, curzone);
5668         if (args->data_len == 0) {
5669                 *cs->statusp = resp->status = NFS4_OK;
5670                 resp->count = 0;
5671                 resp->committed = args->stable;
5672                 resp->writeverf = nsrv4->write4verf;
5673                 goto out;
5674         }
5675 
5676         if (args->mblk != NULL) {
5677                 mblk_t *m;
5678                 uint_t bytes, round_len;
5679 
5680                 iovcnt = 0;
5681                 bytes = 0;
5682                 round_len = roundup(args->data_len, BYTES_PER_XDR_UNIT);
5683                 for (m = args->mblk;
5684                     m != NULL && bytes < round_len;
5685                     m = m->b_cont) {
5686                         iovcnt++;
5687                         bytes += MBLKL(m);
5688                 }
5689 #ifdef DEBUG
5690                 /* should have ended on an mblk boundary */
5691                 if (bytes != round_len) {
5692                         printf("bytes=0x%x, round_len=0x%x, req len=0x%x\n",
5693                             bytes, round_len, args->data_len);
5694                         printf("args=%p, args->mblk=%p, m=%p", (void *)args,
5695                             (void *)args->mblk, (void *)m);
5696                         ASSERT(bytes == round_len);
5697                 }
5698 #endif
5699                 if (iovcnt <= MAX_IOVECS) {
5700                         iovp = iov;
5701                 } else {
5702                         iovp = kmem_alloc(sizeof (*iovp) * iovcnt, KM_SLEEP);
5703                 }
5704                 mblk_to_iov(args->mblk, iovcnt, iovp);
5705         } else if (args->rlist != NULL) {
5706                 iovcnt = 1;
5707                 iovp = iov;
5708                 iovp->iov_base = (char *)((args->rlist)->u.c_daddr3);
5709                 iovp->iov_len = args->data_len;
5710         } else {
5711                 iovcnt = 1;
5712                 iovp = iov;
5713                 iovp->iov_base = args->data_val;
5714                 iovp->iov_len = args->data_len;
5715         }
5716 
5717         uio.uio_iov = iovp;
5718         uio.uio_iovcnt = iovcnt;
5719 
5720         uio.uio_segflg = UIO_SYSSPACE;
5721         uio.uio_extflg = UIO_COPY_DEFAULT;
5722         uio.uio_loffset = args->offset;
5723         uio.uio_resid = args->data_len;
5724         uio.uio_llimit = curproc->p_fsz_ctl;
5725         rlimit = uio.uio_llimit - args->offset;
5726         if (rlimit < (u_offset_t)uio.uio_resid)
5727                 uio.uio_resid = (int)rlimit;
5728 
5729         if (args->stable == UNSTABLE4)
5730                 ioflag = 0;
5731         else if (args->stable == FILE_SYNC4)
5732                 ioflag = FSYNC;
5733         else if (args->stable == DATA_SYNC4)
5734                 ioflag = FDSYNC;
5735         else {
5736                 if (iovp != iov)
5737                         kmem_free(iovp, sizeof (*iovp) * iovcnt);
5738                 *cs->statusp = resp->status = NFS4ERR_INVAL;
5739                 goto out;
5740         }
5741 
5742         /*
5743          * We're changing creds because VM may fault and we need
5744          * the cred of the current thread to be used if quota
5745          * checking is enabled.
5746          */
5747         savecred = curthread->t_cred;
5748         curthread->t_cred = cr;
5749         error = do_io(FWRITE, vp, &uio, ioflag, cr, &ct);
5750         curthread->t_cred = savecred;
5751 
5752         if (iovp != iov)
5753                 kmem_free(iovp, sizeof (*iovp) * iovcnt);
5754 
5755         if (error) {
5756                 *cs->statusp = resp->status = puterrno4(error);
5757                 goto out;
5758         }
5759 
5760         *cs->statusp = resp->status = NFS4_OK;
5761         resp->count = args->data_len - uio.uio_resid;
5762 
5763         if (ioflag == 0)
5764                 resp->committed = UNSTABLE4;
5765         else
5766                 resp->committed = FILE_SYNC4;
5767 
5768         resp->writeverf = nsrv4->write4verf;
5769 
5770 out:
5771         if (in_crit)
5772                 nbl_end_crit(vp);
5773 
5774         DTRACE_NFSV4_2(op__write__done, struct compound_state *, cs,
5775             WRITE4res *, resp);
5776 }
5777 
5778 
5779 /* XXX put in a header file */
5780 extern int      sec_svc_getcred(struct svc_req *, cred_t *,  caddr_t *, int *);
5781 
5782 void
5783 rfs4_compound(COMPOUND4args *args, COMPOUND4res *resp, struct exportinfo *exi,
5784     struct svc_req *req, cred_t *cr, int *rv)
5785 {
5786         uint_t i;
5787         struct compound_state cs;
5788         nfs4_srv_t *nsrv4;
5789         nfs_export_t *ne = nfs_get_export();
5790 
5791         if (rv != NULL)
5792                 *rv = 0;
5793         rfs4_init_compound_state(&cs);
5794         /*
5795          * Form a reply tag by copying over the reqeuest tag.
5796          */
5797         resp->tag.utf8string_val =
5798             kmem_alloc(args->tag.utf8string_len, KM_SLEEP);
5799         resp->tag.utf8string_len = args->tag.utf8string_len;
5800         bcopy(args->tag.utf8string_val, resp->tag.utf8string_val,
5801             resp->tag.utf8string_len);
5802 
5803         cs.statusp = &resp->status;
5804         cs.req = req;
5805         resp->array = NULL;
5806         resp->array_len = 0;
5807 
5808         /*
5809          * XXX for now, minorversion should be zero
5810          */
5811         if (args->minorversion != NFS4_MINORVERSION) {
5812                 DTRACE_NFSV4_2(compound__start, struct compound_state *,
5813                     &cs, COMPOUND4args *, args);
5814                 resp->status = NFS4ERR_MINOR_VERS_MISMATCH;
5815                 DTRACE_NFSV4_2(compound__done, struct compound_state *,
5816                     &cs, COMPOUND4res *, resp);
5817                 return;
5818         }
5819 
5820         if (args->array_len == 0) {
5821                 resp->status = NFS4_OK;
5822                 return;
5823         }
5824 
5825         ASSERT(exi == NULL);
5826         ASSERT(cr == NULL);
5827 
5828         cr = crget();
5829         ASSERT(cr != NULL);
5830 
5831         if (sec_svc_getcred(req, cr, &cs.principal, &cs.nfsflavor) == 0) {
5832                 DTRACE_NFSV4_2(compound__start, struct compound_state *,
5833                     &cs, COMPOUND4args *, args);
5834                 crfree(cr);
5835                 DTRACE_NFSV4_2(compound__done, struct compound_state *,
5836                     &cs, COMPOUND4res *, resp);
5837                 svcerr_badcred(req->rq_xprt);
5838                 if (rv != NULL)
5839                         *rv = 1;
5840                 return;
5841         }
5842         resp->array_len = args->array_len;
5843         resp->array = kmem_zalloc(args->array_len * sizeof (nfs_resop4),
5844             KM_SLEEP);
5845 
5846         cs.basecr = cr;
5847         nsrv4 = zone_getspecific(rfs4_zone_key, curzone);
5848 
5849         DTRACE_NFSV4_2(compound__start, struct compound_state *, &cs,
5850             COMPOUND4args *, args);
5851 
5852         /*
5853          * For now, NFS4 compound processing must be protected by
5854          * exported_lock because it can access more than one exportinfo
5855          * per compound and share/unshare can now change multiple
5856          * exinfo structs.  The NFS2/3 code only refs 1 exportinfo
5857          * per proc (excluding public exinfo), and exi_count design
5858          * is sufficient to protect concurrent execution of NFS2/3
5859          * ops along with unexport.  This lock will be removed as
5860          * part of the NFSv4 phase 2 namespace redesign work.
5861          */
5862         rw_enter(&ne->exported_lock, RW_READER);
5863 
5864         /*
5865          * If this is the first compound we've seen, we need to start all
5866          * new instances' grace periods.
5867          */
5868         if (nsrv4->seen_first_compound == 0) {
5869                 rfs4_grace_start_new(nsrv4);
5870                 /*
5871                  * This must be set after rfs4_grace_start_new(), otherwise
5872                  * another thread could proceed past here before the former
5873                  * is finished.
5874                  */
5875                 nsrv4->seen_first_compound = 1;
5876         }
5877 
5878         for (i = 0; i < args->array_len && cs.cont; i++) {
5879                 nfs_argop4 *argop;
5880                 nfs_resop4 *resop;
5881                 uint_t op;
5882 
5883                 argop = &args->array[i];
5884                 resop = &resp->array[i];
5885                 resop->resop = argop->argop;
5886                 op = (uint_t)resop->resop;
5887 
5888                 if (op < rfsv4disp_cnt) {
5889                         /*
5890                          * Count the individual ops here; NULL and COMPOUND
5891                          * are counted in common_dispatch()
5892                          */
5893                         rfsproccnt_v4_ptr[op].value.ui64++;
5894 
5895                         NFS4_DEBUG(rfs4_debug > 1,
5896                             (CE_NOTE, "Executing %s", rfs4_op_string[op]));
5897                         (*rfsv4disptab[op].dis_proc)(argop, resop, req, &cs);
5898                         NFS4_DEBUG(rfs4_debug > 1, (CE_NOTE, "%s returned %d",
5899                             rfs4_op_string[op], *cs.statusp));
5900                         if (*cs.statusp != NFS4_OK)
5901                                 cs.cont = FALSE;
5902                 } else {
5903                         /*
5904                          * This is effectively dead code since XDR code
5905                          * will have already returned BADXDR if op doesn't
5906                          * decode to legal value.  This only done for a
5907                          * day when XDR code doesn't verify v4 opcodes.
5908                          */
5909                         op = OP_ILLEGAL;
5910                         rfsproccnt_v4_ptr[OP_ILLEGAL_IDX].value.ui64++;
5911 
5912                         rfs4_op_illegal(argop, resop, req, &cs);
5913                         cs.cont = FALSE;
5914                 }
5915 
5916                 /*
5917                  * If not at last op, and if we are to stop, then
5918                  * compact the results array.
5919                  */
5920                 if ((i + 1) < args->array_len && !cs.cont) {
5921                         nfs_resop4 *new_res = kmem_alloc(
5922                             (i+1) * sizeof (nfs_resop4), KM_SLEEP);
5923                         bcopy(resp->array,
5924                             new_res, (i+1) * sizeof (nfs_resop4));
5925                         kmem_free(resp->array,
5926                             args->array_len * sizeof (nfs_resop4));
5927 
5928                         resp->array_len =  i + 1;
5929                         resp->array = new_res;
5930                 }
5931         }
5932 
5933         rw_exit(&ne->exported_lock);
5934 
5935         /*
5936          * clear exportinfo and vnode fields from compound_state before dtrace
5937          * probe, to avoid tracing residual values for path and share path.
5938          */
5939         if (cs.vp)
5940                 VN_RELE(cs.vp);
5941         if (cs.saved_vp)
5942                 VN_RELE(cs.saved_vp);
5943         cs.exi = cs.saved_exi = NULL;
5944         cs.vp = cs.saved_vp = NULL;
5945 
5946         DTRACE_NFSV4_2(compound__done, struct compound_state *, &cs,
5947             COMPOUND4res *, resp);
5948 
5949         if (cs.saved_fh.nfs_fh4_val)
5950                 kmem_free(cs.saved_fh.nfs_fh4_val, NFS4_FHSIZE);
5951 
5952         if (cs.basecr)
5953                 crfree(cs.basecr);
5954         if (cs.cr)
5955                 crfree(cs.cr);
5956         /*
5957          * done with this compound request, free the label
5958          */
5959 
5960         if (req->rq_label != NULL) {
5961                 kmem_free(req->rq_label, sizeof (bslabel_t));
5962                 req->rq_label = NULL;
5963         }
5964 }
5965 
5966 /*
5967  * XXX because of what appears to be duplicate calls to rfs4_compound_free
5968  * XXX zero out the tag and array values. Need to investigate why the
5969  * XXX calls occur, but at least prevent the panic for now.
5970  */
5971 void
5972 rfs4_compound_free(COMPOUND4res *resp)
5973 {
5974         uint_t i;
5975 
5976         if (resp->tag.utf8string_val) {
5977                 UTF8STRING_FREE(resp->tag)
5978         }
5979 
5980         for (i = 0; i < resp->array_len; i++) {
5981                 nfs_resop4 *resop;
5982                 uint_t op;
5983 
5984                 resop = &resp->array[i];
5985                 op = (uint_t)resop->resop;
5986                 if (op < rfsv4disp_cnt) {
5987                         (*rfsv4disptab[op].dis_resfree)(resop);
5988                 }
5989         }
5990         if (resp->array != NULL) {
5991                 kmem_free(resp->array, resp->array_len * sizeof (nfs_resop4));
5992         }
5993 }
5994 
5995 /*
5996  * Process the value of the compound request rpc flags, as a bit-AND
5997  * of the individual per-op flags (idempotent, allowork, publicfh_ok)
5998  */
5999 void
6000 rfs4_compound_flagproc(COMPOUND4args *args, int *flagp)
6001 {
6002         int i;
6003         int flag = RPC_ALL;
6004 
6005         for (i = 0; flag && i < args->array_len; i++) {
6006                 uint_t op;
6007 
6008                 op = (uint_t)args->array[i].argop;
6009 
6010                 if (op < rfsv4disp_cnt)
6011                         flag &= rfsv4disptab[op].dis_flags;
6012                 else
6013                         flag = 0;
6014         }
6015         *flagp = flag;
6016 }
6017 
6018 nfsstat4
6019 rfs4_client_sysid(rfs4_client_t *cp, sysid_t *sp)
6020 {
6021         nfsstat4 e;
6022 
6023         rfs4_dbe_lock(cp->rc_dbe);
6024 
6025         if (cp->rc_sysidt != LM_NOSYSID) {
6026                 *sp = cp->rc_sysidt;
6027                 e = NFS4_OK;
6028 
6029         } else if ((cp->rc_sysidt = lm_alloc_sysidt()) != LM_NOSYSID) {
6030                 *sp = cp->rc_sysidt;
6031                 e = NFS4_OK;
6032 
6033                 NFS4_DEBUG(rfs4_debug, (CE_NOTE,
6034                     "rfs4_client_sysid: allocated 0x%x\n", *sp));
6035         } else
6036                 e = NFS4ERR_DELAY;
6037 
6038         rfs4_dbe_unlock(cp->rc_dbe);
6039         return (e);
6040 }
6041 
6042 #if defined(DEBUG) && ! defined(lint)
6043 static void lock_print(char *str, int operation, struct flock64 *flk)
6044 {
6045         char *op, *type;
6046 
6047         switch (operation) {
6048         case F_GETLK: op = "F_GETLK";
6049                 break;
6050         case F_SETLK: op = "F_SETLK";
6051                 break;
6052         case F_SETLK_NBMAND: op = "F_SETLK_NBMAND";
6053                 break;
6054         default: op = "F_UNKNOWN";
6055                 break;
6056         }
6057         switch (flk->l_type) {
6058         case F_UNLCK: type = "F_UNLCK";
6059                 break;
6060         case F_RDLCK: type = "F_RDLCK";
6061                 break;
6062         case F_WRLCK: type = "F_WRLCK";
6063                 break;
6064         default: type = "F_UNKNOWN";
6065                 break;
6066         }
6067 
6068         ASSERT(flk->l_whence == 0);
6069         cmn_err(CE_NOTE, "%s:  %s, type = %s, off = %llx len = %llx pid = %d",
6070             str, op, type, (longlong_t)flk->l_start,
6071             flk->l_len ? (longlong_t)flk->l_len : ~0LL, flk->l_pid);
6072 }
6073 
6074 #define LOCK_PRINT(d, s, t, f) if (d) lock_print(s, t, f)
6075 #else
6076 #define LOCK_PRINT(d, s, t, f)
6077 #endif
6078 
6079 /*ARGSUSED*/
6080 static bool_t
6081 creds_ok(cred_set_t cr_set, struct svc_req *req, struct compound_state *cs)
6082 {
6083         return (TRUE);
6084 }
6085 
6086 /*
6087  * Look up the pathname using the vp in cs as the directory vnode.
6088  * cs->vp will be the vnode for the file on success
6089  */
6090 
6091 static nfsstat4
6092 rfs4_lookup(component4 *component, struct svc_req *req,
6093     struct compound_state *cs)
6094 {
6095         char *nm;
6096         uint32_t len;
6097         nfsstat4 status;
6098         struct sockaddr *ca;
6099         char *name;
6100 
6101         if (cs->vp == NULL) {
6102                 return (NFS4ERR_NOFILEHANDLE);
6103         }
6104         if (cs->vp->v_type != VDIR) {
6105                 return (NFS4ERR_NOTDIR);
6106         }
6107 
6108         status = utf8_dir_verify(component);
6109         if (status != NFS4_OK)
6110                 return (status);
6111 
6112         nm = utf8_to_fn(component, &len, NULL);
6113         if (nm == NULL) {
6114                 return (NFS4ERR_INVAL);
6115         }
6116 
6117         if (len > MAXNAMELEN) {
6118                 kmem_free(nm, len);
6119                 return (NFS4ERR_NAMETOOLONG);
6120         }
6121 
6122         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
6123         name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
6124             MAXPATHLEN + 1);
6125 
6126         if (name == NULL) {
6127                 kmem_free(nm, len);
6128                 return (NFS4ERR_INVAL);
6129         }
6130 
6131         status = do_rfs4_op_lookup(name, req, cs);
6132 
6133         if (name != nm)
6134                 kmem_free(name, MAXPATHLEN + 1);
6135 
6136         kmem_free(nm, len);
6137 
6138         return (status);
6139 }
6140 
6141 static nfsstat4
6142 rfs4_lookupfile(component4 *component, struct svc_req *req,
6143     struct compound_state *cs, uint32_t access, change_info4 *cinfo)
6144 {
6145         nfsstat4 status;
6146         vnode_t *dvp = cs->vp;
6147         vattr_t bva, ava, fva;
6148         int error;
6149 
6150         /* Get "before" change value */
6151         bva.va_mask = AT_CTIME|AT_SEQ;
6152         error = VOP_GETATTR(dvp, &bva, 0, cs->cr, NULL);
6153         if (error)
6154                 return (puterrno4(error));
6155 
6156         /* rfs4_lookup may VN_RELE directory */
6157         VN_HOLD(dvp);
6158 
6159         status = rfs4_lookup(component, req, cs);
6160         if (status != NFS4_OK) {
6161                 VN_RELE(dvp);
6162                 return (status);
6163         }
6164 
6165         /*
6166          * Get "after" change value, if it fails, simply return the
6167          * before value.
6168          */
6169         ava.va_mask = AT_CTIME|AT_SEQ;
6170         if (VOP_GETATTR(dvp, &ava, 0, cs->cr, NULL)) {
6171                 ava.va_ctime = bva.va_ctime;
6172                 ava.va_seq = 0;
6173         }
6174         VN_RELE(dvp);
6175 
6176         /*
6177          * Validate the file is a file
6178          */
6179         fva.va_mask = AT_TYPE|AT_MODE;
6180         error = VOP_GETATTR(cs->vp, &fva, 0, cs->cr, NULL);
6181         if (error)
6182                 return (puterrno4(error));
6183 
6184         if (fva.va_type != VREG) {
6185                 if (fva.va_type == VDIR)
6186                         return (NFS4ERR_ISDIR);
6187                 if (fva.va_type == VLNK)
6188                         return (NFS4ERR_SYMLINK);
6189                 return (NFS4ERR_INVAL);
6190         }
6191 
6192         NFS4_SET_FATTR4_CHANGE(cinfo->before, bva.va_ctime);
6193         NFS4_SET_FATTR4_CHANGE(cinfo->after, ava.va_ctime);
6194 
6195         /*
6196          * It is undefined if VOP_LOOKUP will change va_seq, so
6197          * cinfo.atomic = TRUE only if we have
6198          * non-zero va_seq's, and they have not changed.
6199          */
6200         if (bva.va_seq && ava.va_seq && ava.va_seq == bva.va_seq)
6201                 cinfo->atomic = TRUE;
6202         else
6203                 cinfo->atomic = FALSE;
6204 
6205         /* Check for mandatory locking */
6206         cs->mandlock = MANDLOCK(cs->vp, fva.va_mode);
6207         return (check_open_access(access, cs, req));
6208 }
6209 
6210 static nfsstat4
6211 create_vnode(vnode_t *dvp, char *nm,  vattr_t *vap, createmode4 mode,
6212     cred_t *cr, vnode_t **vpp, bool_t *created)
6213 {
6214         int error;
6215         nfsstat4 status = NFS4_OK;
6216         vattr_t va;
6217 
6218 tryagain:
6219 
6220         /*
6221          * The file open mode used is VWRITE.  If the client needs
6222          * some other semantic, then it should do the access checking
6223          * itself.  It would have been nice to have the file open mode
6224          * passed as part of the arguments.
6225          */
6226 
6227         *created = TRUE;
6228         error = VOP_CREATE(dvp, nm, vap, EXCL, VWRITE, vpp, cr, 0, NULL, NULL);
6229 
6230         if (error) {
6231                 *created = FALSE;
6232 
6233                 /*
6234                  * If we got something other than file already exists
6235                  * then just return this error.  Otherwise, we got
6236                  * EEXIST.  If we were doing a GUARDED create, then
6237                  * just return this error.  Otherwise, we need to
6238                  * make sure that this wasn't a duplicate of an
6239                  * exclusive create request.
6240                  *
6241                  * The assumption is made that a non-exclusive create
6242                  * request will never return EEXIST.
6243                  */
6244 
6245                 if (error != EEXIST || mode == GUARDED4) {
6246                         status = puterrno4(error);
6247                         return (status);
6248                 }
6249                 error = VOP_LOOKUP(dvp, nm, vpp, NULL, 0, NULL, cr,
6250                     NULL, NULL, NULL);
6251 
6252                 if (error) {
6253                         /*
6254                          * We couldn't find the file that we thought that
6255                          * we just created.  So, we'll just try creating
6256                          * it again.
6257                          */
6258                         if (error == ENOENT)
6259                                 goto tryagain;
6260 
6261                         status = puterrno4(error);
6262                         return (status);
6263                 }
6264 
6265                 if (mode == UNCHECKED4) {
6266                         /* existing object must be regular file */
6267                         if ((*vpp)->v_type != VREG) {
6268                                 if ((*vpp)->v_type == VDIR)
6269                                         status = NFS4ERR_ISDIR;
6270                                 else if ((*vpp)->v_type == VLNK)
6271                                         status = NFS4ERR_SYMLINK;
6272                                 else
6273                                         status = NFS4ERR_INVAL;
6274                                 VN_RELE(*vpp);
6275                                 return (status);
6276                         }
6277 
6278                         return (NFS4_OK);
6279                 }
6280 
6281                 /* Check for duplicate request */
6282                 va.va_mask = AT_MTIME;
6283                 error = VOP_GETATTR(*vpp, &va, 0, cr, NULL);
6284                 if (!error) {
6285                         /* We found the file */
6286                         const timestruc_t *mtime = &vap->va_mtime;
6287 
6288                         if (va.va_mtime.tv_sec != mtime->tv_sec ||
6289                             va.va_mtime.tv_nsec != mtime->tv_nsec) {
6290                                 /* but its not our creation */
6291                                 VN_RELE(*vpp);
6292                                 return (NFS4ERR_EXIST);
6293                         }
6294                         *created = TRUE; /* retrans of create == created */
6295                         return (NFS4_OK);
6296                 }
6297                 VN_RELE(*vpp);
6298                 return (NFS4ERR_EXIST);
6299         }
6300 
6301         return (NFS4_OK);
6302 }
6303 
6304 static nfsstat4
6305 check_open_access(uint32_t access, struct compound_state *cs,
6306     struct svc_req *req)
6307 {
6308         int error;
6309         vnode_t *vp;
6310         bool_t readonly;
6311         cred_t *cr = cs->cr;
6312 
6313         /* For now we don't allow mandatory locking as per V2/V3 */
6314         if (cs->access == CS_ACCESS_DENIED || cs->mandlock) {
6315                 return (NFS4ERR_ACCESS);
6316         }
6317 
6318         vp = cs->vp;
6319         ASSERT(cr != NULL && vp->v_type == VREG);
6320 
6321         /*
6322          * If the file system is exported read only and we are trying
6323          * to open for write, then return NFS4ERR_ROFS
6324          */
6325 
6326         readonly = rdonly4(req, cs);
6327 
6328         if ((access & OPEN4_SHARE_ACCESS_WRITE) && readonly)
6329                 return (NFS4ERR_ROFS);
6330 
6331         if (access & OPEN4_SHARE_ACCESS_READ) {
6332                 if ((VOP_ACCESS(vp, VREAD, 0, cr, NULL) != 0) &&
6333                     (VOP_ACCESS(vp, VEXEC, 0, cr, NULL) != 0)) {
6334                         return (NFS4ERR_ACCESS);
6335                 }
6336         }
6337 
6338         if (access & OPEN4_SHARE_ACCESS_WRITE) {
6339                 error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
6340                 if (error)
6341                         return (NFS4ERR_ACCESS);
6342         }
6343 
6344         return (NFS4_OK);
6345 }
6346 
6347 static nfsstat4
6348 rfs4_createfile(OPEN4args *args, struct svc_req *req, struct compound_state *cs,
6349     change_info4 *cinfo, bitmap4 *attrset, clientid4 clientid)
6350 {
6351         struct nfs4_svgetit_arg sarg;
6352         struct nfs4_ntov_table ntov;
6353 
6354         bool_t ntov_table_init = FALSE;
6355         struct statvfs64 sb;
6356         nfsstat4 status;
6357         vnode_t *vp;
6358         vattr_t bva, ava, iva, cva, *vap;
6359         vnode_t *dvp;
6360         timespec32_t *mtime;
6361         char *nm = NULL;
6362         uint_t buflen;
6363         bool_t created;
6364         bool_t setsize = FALSE;
6365         len_t reqsize;
6366         int error;
6367         bool_t trunc;
6368         caller_context_t ct;
6369         component4 *component;
6370         bslabel_t *clabel;
6371         struct sockaddr *ca;
6372         char *name = NULL;
6373 
6374         sarg.sbp = &sb;
6375         sarg.is_referral = B_FALSE;
6376 
6377         dvp = cs->vp;
6378 
6379         /* Check if the file system is read only */
6380         if (rdonly4(req, cs))
6381                 return (NFS4ERR_ROFS);
6382 
6383         /* check the label of including directory */
6384         if (is_system_labeled()) {
6385                 ASSERT(req->rq_label != NULL);
6386                 clabel = req->rq_label;
6387                 DTRACE_PROBE2(tx__rfs4__log__info__opremove__clabel, char *,
6388                     "got client label from request(1)",
6389                     struct svc_req *, req);
6390                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
6391                         if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
6392                             cs->exi)) {
6393                                 return (NFS4ERR_ACCESS);
6394                         }
6395                 }
6396         }
6397 
6398         /*
6399          * Get the last component of path name in nm. cs will reference
6400          * the including directory on success.
6401          */
6402         component = &args->open_claim4_u.file;
6403         status = utf8_dir_verify(component);
6404         if (status != NFS4_OK)
6405                 return (status);
6406 
6407         nm = utf8_to_fn(component, &buflen, NULL);
6408 
6409         if (nm == NULL)
6410                 return (NFS4ERR_RESOURCE);
6411 
6412         if (buflen > MAXNAMELEN) {
6413                 kmem_free(nm, buflen);
6414                 return (NFS4ERR_NAMETOOLONG);
6415         }
6416 
6417         bva.va_mask = AT_TYPE|AT_CTIME|AT_SEQ;
6418         error = VOP_GETATTR(dvp, &bva, 0, cs->cr, NULL);
6419         if (error) {
6420                 kmem_free(nm, buflen);
6421                 return (puterrno4(error));
6422         }
6423 
6424         if (bva.va_type != VDIR) {
6425                 kmem_free(nm, buflen);
6426                 return (NFS4ERR_NOTDIR);
6427         }
6428 
6429         NFS4_SET_FATTR4_CHANGE(cinfo->before, bva.va_ctime)
6430 
6431         switch (args->mode) {
6432         case GUARDED4:
6433                 /*FALLTHROUGH*/
6434         case UNCHECKED4:
6435                 nfs4_ntov_table_init(&ntov);
6436                 ntov_table_init = TRUE;
6437 
6438                 *attrset = 0;
6439                 status = do_rfs4_set_attrs(attrset,
6440                     &args->createhow4_u.createattrs,
6441                     cs, &sarg, &ntov, NFS4ATTR_SETIT);
6442 
6443                 if (status == NFS4_OK && (sarg.vap->va_mask & AT_TYPE) &&
6444                     sarg.vap->va_type != VREG) {
6445                         if (sarg.vap->va_type == VDIR)
6446                                 status = NFS4ERR_ISDIR;
6447                         else if (sarg.vap->va_type == VLNK)
6448                                 status = NFS4ERR_SYMLINK;
6449                         else
6450                                 status = NFS4ERR_INVAL;
6451                 }
6452 
6453                 if (status != NFS4_OK) {
6454                         kmem_free(nm, buflen);
6455                         nfs4_ntov_table_free(&ntov, &sarg);
6456                         *attrset = 0;
6457                         return (status);
6458                 }
6459 
6460                 vap = sarg.vap;
6461                 vap->va_type = VREG;
6462                 vap->va_mask |= AT_TYPE;
6463 
6464                 if ((vap->va_mask & AT_MODE) == 0) {
6465                         vap->va_mask |= AT_MODE;
6466                         vap->va_mode = (mode_t)0600;
6467                 }
6468 
6469                 if (vap->va_mask & AT_SIZE) {
6470 
6471                         /* Disallow create with a non-zero size */
6472 
6473                         if ((reqsize = sarg.vap->va_size) != 0) {
6474                                 kmem_free(nm, buflen);
6475                                 nfs4_ntov_table_free(&ntov, &sarg);
6476                                 *attrset = 0;
6477                                 return (NFS4ERR_INVAL);
6478                         }
6479                         setsize = TRUE;
6480                 }
6481                 break;
6482 
6483         case EXCLUSIVE4:
6484                 /* prohibit EXCL create of named attributes */
6485                 if (dvp->v_flag & V_XATTRDIR) {
6486                         kmem_free(nm, buflen);
6487                         *attrset = 0;
6488                         return (NFS4ERR_INVAL);
6489                 }
6490 
6491                 cva.va_mask = AT_TYPE | AT_MTIME | AT_MODE;
6492                 cva.va_type = VREG;
6493                 /*
6494                  * Ensure no time overflows. Assumes underlying
6495                  * filesystem supports at least 32 bits.
6496                  * Truncate nsec to usec resolution to allow valid
6497                  * compares even if the underlying filesystem truncates.
6498                  */
6499                 mtime = (timespec32_t *)&args->createhow4_u.createverf;
6500                 cva.va_mtime.tv_sec = mtime->tv_sec % TIME32_MAX;
6501                 cva.va_mtime.tv_nsec = (mtime->tv_nsec / 1000) * 1000;
6502                 cva.va_mode = (mode_t)0;
6503                 vap = &cva;
6504 
6505                 /*
6506                  * For EXCL create, attrset is set to the server attr
6507                  * used to cache the client's verifier.
6508                  */
6509                 *attrset = FATTR4_TIME_MODIFY_MASK;
6510                 break;
6511         }
6512 
6513         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
6514         name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
6515             MAXPATHLEN  + 1);
6516 
6517         if (name == NULL) {
6518                 kmem_free(nm, buflen);
6519                 return (NFS4ERR_SERVERFAULT);
6520         }
6521 
6522         status = create_vnode(dvp, name, vap, args->mode,
6523             cs->cr, &vp, &created);
6524         if (nm != name)
6525                 kmem_free(name, MAXPATHLEN + 1);
6526         kmem_free(nm, buflen);
6527 
6528         if (status != NFS4_OK) {
6529                 if (ntov_table_init)
6530                         nfs4_ntov_table_free(&ntov, &sarg);
6531                 *attrset = 0;
6532                 return (status);
6533         }
6534 
6535         trunc = (setsize && !created);
6536 
6537         if (args->mode != EXCLUSIVE4) {
6538                 bitmap4 createmask = args->createhow4_u.createattrs.attrmask;
6539 
6540                 /*
6541                  * True verification that object was created with correct
6542                  * attrs is impossible.  The attrs could have been changed
6543                  * immediately after object creation.  If attributes did
6544                  * not verify, the only recourse for the server is to
6545                  * destroy the object.  Maybe if some attrs (like gid)
6546                  * are set incorrectly, the object should be destroyed;
6547                  * however, seems bad as a default policy.  Do we really
6548                  * want to destroy an object over one of the times not
6549                  * verifying correctly?  For these reasons, the server
6550                  * currently sets bits in attrset for createattrs
6551                  * that were set; however, no verification is done.
6552                  *
6553                  * vmask_to_nmask accounts for vattr bits set on create
6554                  *      [do_rfs4_set_attrs() only sets resp bits for
6555                  *       non-vattr/vfs bits.]
6556                  * Mask off any bits we set by default so as not to return
6557                  * more attrset bits than were requested in createattrs
6558                  */
6559                 if (created) {
6560                         nfs4_vmask_to_nmask(sarg.vap->va_mask, attrset);
6561                         *attrset &= createmask;
6562                 } else {
6563                         /*
6564                          * We did not create the vnode (we tried but it
6565                          * already existed).  In this case, the only createattr
6566                          * that the spec allows the server to set is size,
6567                          * and even then, it can only be set if it is 0.
6568                          */
6569                         *attrset = 0;
6570                         if (trunc)
6571                                 *attrset = FATTR4_SIZE_MASK;
6572                 }
6573         }
6574         if (ntov_table_init)
6575                 nfs4_ntov_table_free(&ntov, &sarg);
6576 
6577         /*
6578          * Get the initial "after" sequence number, if it fails,
6579          * set to zero, time to before.
6580          */
6581         iva.va_mask = AT_CTIME|AT_SEQ;
6582         if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL)) {
6583                 iva.va_seq = 0;
6584                 iva.va_ctime = bva.va_ctime;
6585         }
6586 
6587         /*
6588          * create_vnode attempts to create the file exclusive,
6589          * if it already exists the VOP_CREATE will fail and
6590          * may not increase va_seq. It is atomic if
6591          * we haven't changed the directory, but if it has changed
6592          * we don't know what changed it.
6593          */
6594         if (!created) {
6595                 if (bva.va_seq && iva.va_seq &&
6596                     bva.va_seq == iva.va_seq)
6597                         cinfo->atomic = TRUE;
6598                 else
6599                         cinfo->atomic = FALSE;
6600                 NFS4_SET_FATTR4_CHANGE(cinfo->after, iva.va_ctime);
6601         } else {
6602                 /*
6603                  * The entry was created, we need to sync the
6604                  * directory metadata.
6605                  */
6606                 (void) VOP_FSYNC(dvp, 0, cs->cr, NULL);
6607 
6608                 /*
6609                  * Get "after" change value, if it fails, simply return the
6610                  * before value.
6611                  */
6612                 ava.va_mask = AT_CTIME|AT_SEQ;
6613                 if (VOP_GETATTR(dvp, &ava, 0, cs->cr, NULL)) {
6614                         ava.va_ctime = bva.va_ctime;
6615                         ava.va_seq = 0;
6616                 }
6617 
6618                 NFS4_SET_FATTR4_CHANGE(cinfo->after, ava.va_ctime);
6619 
6620                 /*
6621                  * The cinfo->atomic = TRUE only if we have
6622                  * non-zero va_seq's, and it has incremented by exactly one
6623                  * during the create_vnode and it didn't
6624                  * change during the VOP_FSYNC.
6625                  */
6626                 if (bva.va_seq && iva.va_seq && ava.va_seq &&
6627                     iva.va_seq == (bva.va_seq + 1) && iva.va_seq == ava.va_seq)
6628                         cinfo->atomic = TRUE;
6629                 else
6630                         cinfo->atomic = FALSE;
6631         }
6632 
6633         /* Check for mandatory locking and that the size gets set. */
6634         cva.va_mask = AT_MODE;
6635         if (setsize)
6636                 cva.va_mask |= AT_SIZE;
6637 
6638         /* Assume the worst */
6639         cs->mandlock = TRUE;
6640 
6641         if (VOP_GETATTR(vp, &cva, 0, cs->cr, NULL) == 0) {
6642                 cs->mandlock = MANDLOCK(cs->vp, cva.va_mode);
6643 
6644                 /*
6645                  * Truncate the file if necessary; this would be
6646                  * the case for create over an existing file.
6647                  */
6648 
6649                 if (trunc) {
6650                         int in_crit = 0;
6651                         rfs4_file_t *fp;
6652                         nfs4_srv_t *nsrv4;
6653                         bool_t create = FALSE;
6654 
6655                         /*
6656                          * We are writing over an existing file.
6657                          * Check to see if we need to recall a delegation.
6658                          */
6659                         nsrv4 = zone_getspecific(rfs4_zone_key, curzone);
6660                         rfs4_hold_deleg_policy(nsrv4);
6661                         if ((fp = rfs4_findfile(vp, NULL, &create)) != NULL) {
6662                                 if (rfs4_check_delegated_byfp(FWRITE, fp,
6663                                     (reqsize == 0), FALSE, FALSE, &clientid)) {
6664                                         rfs4_file_rele(fp);
6665                                         rfs4_rele_deleg_policy(nsrv4);
6666                                         VN_RELE(vp);
6667                                         *attrset = 0;
6668                                         return (NFS4ERR_DELAY);
6669                                 }
6670                                 rfs4_file_rele(fp);
6671                         }
6672                         rfs4_rele_deleg_policy(nsrv4);
6673 
6674                         if (nbl_need_check(vp)) {
6675                                 in_crit = 1;
6676 
6677                                 ASSERT(reqsize == 0);
6678 
6679                                 nbl_start_crit(vp, RW_READER);
6680                                 if (nbl_conflict(vp, NBL_WRITE, 0,
6681                                     cva.va_size, 0, NULL)) {
6682                                         in_crit = 0;
6683                                         nbl_end_crit(vp);
6684                                         VN_RELE(vp);
6685                                         *attrset = 0;
6686                                         return (NFS4ERR_ACCESS);
6687                                 }
6688                         }
6689                         ct.cc_sysid = 0;
6690                         ct.cc_pid = 0;
6691                         ct.cc_caller_id = nfs4_srv_caller_id;
6692                         ct.cc_flags = CC_DONTBLOCK;
6693 
6694                         cva.va_mask = AT_SIZE;
6695                         cva.va_size = reqsize;
6696                         (void) VOP_SETATTR(vp, &cva, 0, cs->cr, &ct);
6697                         if (in_crit)
6698                                 nbl_end_crit(vp);
6699                 }
6700         }
6701 
6702         error = makefh4(&cs->fh, vp, cs->exi);
6703 
6704         /*
6705          * Force modified data and metadata out to stable storage.
6706          */
6707         (void) VOP_FSYNC(vp, FNODSYNC, cs->cr, NULL);
6708 
6709         if (error) {
6710                 VN_RELE(vp);
6711                 *attrset = 0;
6712                 return (puterrno4(error));
6713         }
6714 
6715         /* if parent dir is attrdir, set namedattr fh flag */
6716         if (dvp->v_flag & V_XATTRDIR)
6717                 set_fh4_flag(&cs->fh, FH4_NAMEDATTR);
6718 
6719         if (cs->vp)
6720                 VN_RELE(cs->vp);
6721 
6722         cs->vp = vp;
6723 
6724         /*
6725          * if we did not create the file, we will need to check
6726          * the access bits on the file
6727          */
6728 
6729         if (!created) {
6730                 if (setsize)
6731                         args->share_access |= OPEN4_SHARE_ACCESS_WRITE;
6732                 status = check_open_access(args->share_access, cs, req);
6733                 if (status != NFS4_OK)
6734                         *attrset = 0;
6735         }
6736         return (status);
6737 }
6738 
6739 /*ARGSUSED*/
6740 static void
6741 rfs4_do_open(struct compound_state *cs, struct svc_req *req,
6742     rfs4_openowner_t *oo, delegreq_t deleg,
6743     uint32_t access, uint32_t deny,
6744     OPEN4res *resp, int deleg_cur)
6745 {
6746         /* XXX Currently not using req  */
6747         rfs4_state_t *sp;
6748         rfs4_file_t *fp;
6749         bool_t screate = TRUE;
6750         bool_t fcreate = TRUE;
6751         uint32_t open_a, share_a;
6752         uint32_t open_d, share_d;
6753         rfs4_deleg_state_t *dsp;
6754         sysid_t sysid;
6755         nfsstat4 status;
6756         caller_context_t ct;
6757         int fflags = 0;
6758         int recall = 0;
6759         int err;
6760         int first_open;
6761 
6762         /* get the file struct and hold a lock on it during initial open */
6763         fp = rfs4_findfile_withlock(cs->vp, &cs->fh, &fcreate);
6764         if (fp == NULL) {
6765                 resp->status = NFS4ERR_RESOURCE;
6766                 DTRACE_PROBE1(nfss__e__do__open1, nfsstat4, resp->status);
6767                 return;
6768         }
6769 
6770         sp = rfs4_findstate_by_owner_file(oo, fp, &screate);
6771         if (sp == NULL) {
6772                 resp->status = NFS4ERR_RESOURCE;
6773                 DTRACE_PROBE1(nfss__e__do__open2, nfsstat4, resp->status);
6774                 /* No need to keep any reference */
6775                 rw_exit(&fp->rf_file_rwlock);
6776                 rfs4_file_rele(fp);
6777                 return;
6778         }
6779 
6780         /* try to get the sysid before continuing */
6781         if ((status = rfs4_client_sysid(oo->ro_client, &sysid)) != NFS4_OK) {
6782                 resp->status = status;
6783                 rfs4_file_rele(fp);
6784                 /* Not a fully formed open; "close" it */
6785                 if (screate == TRUE)
6786                         rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6787                 rfs4_state_rele(sp);
6788                 return;
6789         }
6790 
6791         /* Calculate the fflags for this OPEN. */
6792         if (access & OPEN4_SHARE_ACCESS_READ)
6793                 fflags |= FREAD;
6794         if (access & OPEN4_SHARE_ACCESS_WRITE)
6795                 fflags |= FWRITE;
6796 
6797         rfs4_dbe_lock(sp->rs_dbe);
6798 
6799         /*
6800          * Calculate the new deny and access mode that this open is adding to
6801          * the file for this open owner;
6802          */
6803         open_d = (deny & ~sp->rs_open_deny);
6804         open_a = (access & ~sp->rs_open_access);
6805 
6806         /*
6807          * Calculate the new share access and share deny modes that this open
6808          * is adding to the file for this open owner;
6809          */
6810         share_a = (access & ~sp->rs_share_access);
6811         share_d = (deny & ~sp->rs_share_deny);
6812 
6813         first_open = (sp->rs_open_access & OPEN4_SHARE_ACCESS_BOTH) == 0;
6814 
6815         /*
6816          * Check to see the client has already sent an open for this
6817          * open owner on this file with the same share/deny modes.
6818          * If so, we don't need to check for a conflict and we don't
6819          * need to add another shrlock.  If not, then we need to
6820          * check for conflicts in deny and access before checking for
6821          * conflicts in delegation.  We don't want to recall a
6822          * delegation based on an open that will eventually fail based
6823          * on shares modes.
6824          */
6825 
6826         if (share_a || share_d) {
6827                 if ((err = rfs4_share(sp, access, deny)) != 0) {
6828                         rfs4_dbe_unlock(sp->rs_dbe);
6829                         resp->status = err;
6830 
6831                         rfs4_file_rele(fp);
6832                         /* Not a fully formed open; "close" it */
6833                         if (screate == TRUE)
6834                                 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6835                         rfs4_state_rele(sp);
6836                         return;
6837                 }
6838         }
6839 
6840         rfs4_dbe_lock(fp->rf_dbe);
6841 
6842         /*
6843          * Check to see if this file is delegated and if so, if a
6844          * recall needs to be done.
6845          */
6846         if (rfs4_check_recall(sp, access)) {
6847                 rfs4_dbe_unlock(fp->rf_dbe);
6848                 rfs4_dbe_unlock(sp->rs_dbe);
6849                 rfs4_recall_deleg(fp, FALSE, sp->rs_owner->ro_client);
6850                 delay(NFS4_DELEGATION_CONFLICT_DELAY);
6851                 rfs4_dbe_lock(sp->rs_dbe);
6852 
6853                 /* if state closed while lock was dropped */
6854                 if (sp->rs_closed) {
6855                         if (share_a || share_d)
6856                                 (void) rfs4_unshare(sp);
6857                         rfs4_dbe_unlock(sp->rs_dbe);
6858                         rfs4_file_rele(fp);
6859                         /* Not a fully formed open; "close" it */
6860                         if (screate == TRUE)
6861                                 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6862                         rfs4_state_rele(sp);
6863                         resp->status = NFS4ERR_OLD_STATEID;
6864                         return;
6865                 }
6866 
6867                 rfs4_dbe_lock(fp->rf_dbe);
6868                 /* Let's see if the delegation was returned */
6869                 if (rfs4_check_recall(sp, access)) {
6870                         rfs4_dbe_unlock(fp->rf_dbe);
6871                         if (share_a || share_d)
6872                                 (void) rfs4_unshare(sp);
6873                         rfs4_dbe_unlock(sp->rs_dbe);
6874                         rfs4_file_rele(fp);
6875                         rfs4_update_lease(sp->rs_owner->ro_client);
6876 
6877                         /* Not a fully formed open; "close" it */
6878                         if (screate == TRUE)
6879                                 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6880                         rfs4_state_rele(sp);
6881                         resp->status = NFS4ERR_DELAY;
6882                         return;
6883                 }
6884         }
6885         /*
6886          * the share check passed and any delegation conflict has been
6887          * taken care of, now call vop_open.
6888          * if this is the first open then call vop_open with fflags.
6889          * if not, call vn_open_upgrade with just the upgrade flags.
6890          *
6891          * if the file has been opened already, it will have the current
6892          * access mode in the state struct.  if it has no share access, then
6893          * this is a new open.
6894          *
6895          * However, if this is open with CLAIM_DLEGATE_CUR, then don't
6896          * call VOP_OPEN(), just do the open upgrade.
6897          */
6898         if (first_open && !deleg_cur) {
6899                 ct.cc_sysid = sysid;
6900                 ct.cc_pid = rfs4_dbe_getid(sp->rs_owner->ro_dbe);
6901                 ct.cc_caller_id = nfs4_srv_caller_id;
6902                 ct.cc_flags = CC_DONTBLOCK;
6903                 err = VOP_OPEN(&cs->vp, fflags, cs->cr, &ct);
6904                 if (err) {
6905                         rfs4_dbe_unlock(fp->rf_dbe);
6906                         if (share_a || share_d)
6907                                 (void) rfs4_unshare(sp);
6908                         rfs4_dbe_unlock(sp->rs_dbe);
6909                         rfs4_file_rele(fp);
6910 
6911                         /* Not a fully formed open; "close" it */
6912                         if (screate == TRUE)
6913                                 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6914                         rfs4_state_rele(sp);
6915                         /* check if a monitor detected a delegation conflict */
6916                         if (err == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK))
6917                                 resp->status = NFS4ERR_DELAY;
6918                         else
6919                                 resp->status = NFS4ERR_SERVERFAULT;
6920                         return;
6921                 }
6922         } else { /* open upgrade */
6923                 /*
6924                  * calculate the fflags for the new mode that is being added
6925                  * by this upgrade.
6926                  */
6927                 fflags = 0;
6928                 if (open_a & OPEN4_SHARE_ACCESS_READ)
6929                         fflags |= FREAD;
6930                 if (open_a & OPEN4_SHARE_ACCESS_WRITE)
6931                         fflags |= FWRITE;
6932                 vn_open_upgrade(cs->vp, fflags);
6933         }
6934         sp->rs_open_access |= access;
6935         sp->rs_open_deny |= deny;
6936 
6937         if (open_d & OPEN4_SHARE_DENY_READ)
6938                 fp->rf_deny_read++;
6939         if (open_d & OPEN4_SHARE_DENY_WRITE)
6940                 fp->rf_deny_write++;
6941         fp->rf_share_deny |= deny;
6942 
6943         if (open_a & OPEN4_SHARE_ACCESS_READ)
6944                 fp->rf_access_read++;
6945         if (open_a & OPEN4_SHARE_ACCESS_WRITE)
6946                 fp->rf_access_write++;
6947         fp->rf_share_access |= access;
6948 
6949         /*
6950          * Check for delegation here. if the deleg argument is not
6951          * DELEG_ANY, then this is a reclaim from a client and
6952          * we must honor the delegation requested. If necessary we can
6953          * set the recall flag.
6954          */
6955 
6956         dsp = rfs4_grant_delegation(deleg, sp, &recall);
6957 
6958         cs->deleg = (fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_WRITE);
6959 
6960         next_stateid(&sp->rs_stateid);
6961 
6962         resp->stateid = sp->rs_stateid.stateid;
6963 
6964         rfs4_dbe_unlock(fp->rf_dbe);
6965         rfs4_dbe_unlock(sp->rs_dbe);
6966 
6967         if (dsp) {
6968                 rfs4_set_deleg_response(dsp, &resp->delegation, NULL, recall);
6969                 rfs4_deleg_state_rele(dsp);
6970         }
6971 
6972         rfs4_file_rele(fp);
6973         rfs4_state_rele(sp);
6974 
6975         resp->status = NFS4_OK;
6976 }
6977 
6978 /*ARGSUSED*/
6979 static void
6980 rfs4_do_opennull(struct compound_state *cs, struct svc_req *req,
6981     OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
6982 {
6983         change_info4 *cinfo = &resp->cinfo;
6984         bitmap4 *attrset = &resp->attrset;
6985 
6986         if (args->opentype == OPEN4_NOCREATE)
6987                 resp->status = rfs4_lookupfile(&args->open_claim4_u.file,
6988                     req, cs, args->share_access, cinfo);
6989         else {
6990                 /* inhibit delegation grants during exclusive create */
6991 
6992                 if (args->mode == EXCLUSIVE4)
6993                         rfs4_disable_delegation();
6994 
6995                 resp->status = rfs4_createfile(args, req, cs, cinfo, attrset,
6996                     oo->ro_client->rc_clientid);
6997         }
6998 
6999         if (resp->status == NFS4_OK) {
7000 
7001                 /* cs->vp cs->fh now reference the desired file */
7002 
7003                 rfs4_do_open(cs, req, oo,
7004                     oo->ro_need_confirm ? DELEG_NONE : DELEG_ANY,
7005                     args->share_access, args->share_deny, resp, 0);
7006 
7007                 /*
7008                  * If rfs4_createfile set attrset, we must
7009                  * clear this attrset before the response is copied.
7010                  */
7011                 if (resp->status != NFS4_OK && resp->attrset) {
7012                         resp->attrset = 0;
7013                 }
7014         }
7015         else
7016                 *cs->statusp = resp->status;
7017 
7018         if (args->mode == EXCLUSIVE4)
7019                 rfs4_enable_delegation();
7020 }
7021 
7022 /*ARGSUSED*/
7023 static void
7024 rfs4_do_openprev(struct compound_state *cs, struct svc_req *req,
7025     OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
7026 {
7027         change_info4 *cinfo = &resp->cinfo;
7028         vattr_t va;
7029         vtype_t v_type = cs->vp->v_type;
7030         int error = 0;
7031 
7032         /* Verify that we have a regular file */
7033         if (v_type != VREG) {
7034                 if (v_type == VDIR)
7035                         resp->status = NFS4ERR_ISDIR;
7036                 else if (v_type == VLNK)
7037                         resp->status = NFS4ERR_SYMLINK;
7038                 else
7039                         resp->status = NFS4ERR_INVAL;
7040                 return;
7041         }
7042 
7043         va.va_mask = AT_MODE|AT_UID;
7044         error = VOP_GETATTR(cs->vp, &va, 0, cs->cr, NULL);
7045         if (error) {
7046                 resp->status = puterrno4(error);
7047                 return;
7048         }
7049 
7050         cs->mandlock = MANDLOCK(cs->vp, va.va_mode);
7051 
7052         /*
7053          * Check if we have access to the file, Note the the file
7054          * could have originally been open UNCHECKED or GUARDED
7055          * with mode bits that will now fail, but there is nothing
7056          * we can really do about that except in the case that the
7057          * owner of the file is the one requesting the open.
7058          */
7059         if (crgetuid(cs->cr) != va.va_uid) {
7060                 resp->status = check_open_access(args->share_access, cs, req);
7061                 if (resp->status != NFS4_OK) {
7062                         return;
7063                 }
7064         }
7065 
7066         /*
7067          * cinfo on a CLAIM_PREVIOUS is undefined, initialize to zero
7068          */
7069         cinfo->before = 0;
7070         cinfo->after = 0;
7071         cinfo->atomic = FALSE;
7072 
7073         rfs4_do_open(cs, req, oo,
7074             NFS4_DELEG4TYPE2REQTYPE(args->open_claim4_u.delegate_type),
7075             args->share_access, args->share_deny, resp, 0);
7076 }
7077 
7078 static void
7079 rfs4_do_opendelcur(struct compound_state *cs, struct svc_req *req,
7080     OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
7081 {
7082         int error;
7083         nfsstat4 status;
7084         stateid4 stateid =
7085             args->open_claim4_u.delegate_cur_info.delegate_stateid;
7086         rfs4_deleg_state_t *dsp;
7087 
7088         /*
7089          * Find the state info from the stateid and confirm that the
7090          * file is delegated.  If the state openowner is the same as
7091          * the supplied openowner we're done. If not, get the file
7092          * info from the found state info. Use that file info to
7093          * create the state for this lock owner. Note solaris doen't
7094          * really need the pathname to find the file. We may want to
7095          * lookup the pathname and make sure that the vp exist and
7096          * matches the vp in the file structure. However it is
7097          * possible that the pathname nolonger exists (local process
7098          * unlinks the file), so this may not be that useful.
7099          */
7100 
7101         status = rfs4_get_deleg_state(&stateid, &dsp);
7102         if (status != NFS4_OK) {
7103                 resp->status = status;
7104                 return;
7105         }
7106 
7107         ASSERT(dsp->rds_finfo->rf_dinfo.rd_dtype != OPEN_DELEGATE_NONE);
7108 
7109         /*
7110          * New lock owner, create state. Since this was probably called
7111          * in response to a CB_RECALL we set deleg to DELEG_NONE
7112          */
7113 
7114         ASSERT(cs->vp != NULL);
7115         VN_RELE(cs->vp);
7116         VN_HOLD(dsp->rds_finfo->rf_vp);
7117         cs->vp = dsp->rds_finfo->rf_vp;
7118 
7119         if (error = makefh4(&cs->fh, cs->vp, cs->exi)) {
7120                 rfs4_deleg_state_rele(dsp);
7121                 *cs->statusp = resp->status = puterrno4(error);
7122                 return;
7123         }
7124 
7125         /* Mark progress for delegation returns */
7126         dsp->rds_finfo->rf_dinfo.rd_time_lastwrite = gethrestime_sec();
7127         rfs4_deleg_state_rele(dsp);
7128         rfs4_do_open(cs, req, oo, DELEG_NONE,
7129             args->share_access, args->share_deny, resp, 1);
7130 }
7131 
7132 /*ARGSUSED*/
7133 static void
7134 rfs4_do_opendelprev(struct compound_state *cs, struct svc_req *req,
7135     OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
7136 {
7137         /*
7138          * Lookup the pathname, it must already exist since this file
7139          * was delegated.
7140          *
7141          * Find the file and state info for this vp and open owner pair.
7142          *      check that they are in fact delegated.
7143          *      check that the state access and deny modes are the same.
7144          *
7145          * Return the delgation possibly seting the recall flag.
7146          */
7147         rfs4_file_t *fp;
7148         rfs4_state_t *sp;
7149         bool_t create = FALSE;
7150         bool_t dcreate = FALSE;
7151         rfs4_deleg_state_t *dsp;
7152         nfsace4 *ace;
7153 
7154         /* Note we ignore oflags */
7155         resp->status = rfs4_lookupfile(&args->open_claim4_u.file_delegate_prev,
7156             req, cs, args->share_access, &resp->cinfo);
7157 
7158         if (resp->status != NFS4_OK) {
7159                 return;
7160         }
7161 
7162         /* get the file struct and hold a lock on it during initial open */
7163         fp = rfs4_findfile_withlock(cs->vp, NULL, &create);
7164         if (fp == NULL) {
7165                 resp->status = NFS4ERR_RESOURCE;
7166                 DTRACE_PROBE1(nfss__e__do_opendelprev1, nfsstat4, resp->status);
7167                 return;
7168         }
7169 
7170         sp = rfs4_findstate_by_owner_file(oo, fp, &create);
7171         if (sp == NULL) {
7172                 resp->status = NFS4ERR_SERVERFAULT;
7173                 DTRACE_PROBE1(nfss__e__do_opendelprev2, nfsstat4, resp->status);
7174                 rw_exit(&fp->rf_file_rwlock);
7175                 rfs4_file_rele(fp);
7176                 return;
7177         }
7178 
7179         rfs4_dbe_lock(sp->rs_dbe);
7180         rfs4_dbe_lock(fp->rf_dbe);
7181         if (args->share_access != sp->rs_share_access ||
7182             args->share_deny != sp->rs_share_deny ||
7183             sp->rs_finfo->rf_dinfo.rd_dtype == OPEN_DELEGATE_NONE) {
7184                 NFS4_DEBUG(rfs4_debug,
7185                     (CE_NOTE, "rfs4_do_opendelprev: state mixup"));
7186                 rfs4_dbe_unlock(fp->rf_dbe);
7187                 rfs4_dbe_unlock(sp->rs_dbe);
7188                 rfs4_file_rele(fp);
7189                 rfs4_state_rele(sp);
7190                 resp->status = NFS4ERR_SERVERFAULT;
7191                 return;
7192         }
7193         rfs4_dbe_unlock(fp->rf_dbe);
7194         rfs4_dbe_unlock(sp->rs_dbe);
7195 
7196         dsp = rfs4_finddeleg(sp, &dcreate);
7197         if (dsp == NULL) {
7198                 rfs4_state_rele(sp);
7199                 rfs4_file_rele(fp);
7200                 resp->status = NFS4ERR_SERVERFAULT;
7201                 return;
7202         }
7203 
7204         next_stateid(&sp->rs_stateid);
7205 
7206         resp->stateid = sp->rs_stateid.stateid;
7207 
7208         resp->delegation.delegation_type = dsp->rds_dtype;
7209 
7210         if (dsp->rds_dtype == OPEN_DELEGATE_READ) {
7211                 open_read_delegation4 *rv =
7212                     &resp->delegation.open_delegation4_u.read;
7213 
7214                 rv->stateid = dsp->rds_delegid.stateid;
7215                 rv->recall = FALSE; /* no policy in place to set to TRUE */
7216                 ace = &rv->permissions;
7217         } else {
7218                 open_write_delegation4 *rv =
7219                     &resp->delegation.open_delegation4_u.write;
7220 
7221                 rv->stateid = dsp->rds_delegid.stateid;
7222                 rv->recall = FALSE;  /* no policy in place to set to TRUE */
7223                 ace = &rv->permissions;
7224                 rv->space_limit.limitby = NFS_LIMIT_SIZE;
7225                 rv->space_limit.nfs_space_limit4_u.filesize = UINT64_MAX;
7226         }
7227 
7228         /* XXX For now */
7229         ace->type = ACE4_ACCESS_ALLOWED_ACE_TYPE;
7230         ace->flag = 0;
7231         ace->access_mask = 0;
7232         ace->who.utf8string_len = 0;
7233         ace->who.utf8string_val = 0;
7234 
7235         rfs4_deleg_state_rele(dsp);
7236         rfs4_state_rele(sp);
7237         rfs4_file_rele(fp);
7238 }
7239 
7240 typedef enum {
7241         NFS4_CHKSEQ_OKAY = 0,
7242         NFS4_CHKSEQ_REPLAY = 1,
7243         NFS4_CHKSEQ_BAD = 2
7244 } rfs4_chkseq_t;
7245 
7246 /*
7247  * Generic function for sequence number checks.
7248  */
7249 static rfs4_chkseq_t
7250 rfs4_check_seqid(seqid4 seqid, nfs_resop4 *lastop,
7251     seqid4 rqst_seq, nfs_resop4 *resop, bool_t copyres)
7252 {
7253         /* Same sequence ids and matching operations? */
7254         if (seqid == rqst_seq && resop->resop == lastop->resop) {
7255                 if (copyres == TRUE) {
7256                         rfs4_free_reply(resop);
7257                         rfs4_copy_reply(resop, lastop);
7258                 }
7259                 NFS4_DEBUG(rfs4_debug, (CE_NOTE,
7260                     "Replayed SEQID %d\n", seqid));
7261                 return (NFS4_CHKSEQ_REPLAY);
7262         }
7263 
7264         /* If the incoming sequence is not the next expected then it is bad */
7265         if (rqst_seq != seqid + 1) {
7266                 if (rqst_seq == seqid) {
7267                         NFS4_DEBUG(rfs4_debug,
7268                             (CE_NOTE, "BAD SEQID: Replayed sequence id "
7269                             "but last op was %d current op is %d\n",
7270                             lastop->resop, resop->resop));
7271                         return (NFS4_CHKSEQ_BAD);
7272                 }
7273                 NFS4_DEBUG(rfs4_debug,
7274                     (CE_NOTE, "BAD SEQID: got %u expecting %u\n",
7275                     rqst_seq, seqid));
7276                 return (NFS4_CHKSEQ_BAD);
7277         }
7278 
7279         /* Everything okay -- next expected */
7280         return (NFS4_CHKSEQ_OKAY);
7281 }
7282 
7283 
7284 static rfs4_chkseq_t
7285 rfs4_check_open_seqid(seqid4 seqid, rfs4_openowner_t *op, nfs_resop4 *resop)
7286 {
7287         rfs4_chkseq_t rc;
7288 
7289         rfs4_dbe_lock(op->ro_dbe);
7290         rc = rfs4_check_seqid(op->ro_open_seqid, &op->ro_reply, seqid, resop,
7291             TRUE);
7292         rfs4_dbe_unlock(op->ro_dbe);
7293 
7294         if (rc == NFS4_CHKSEQ_OKAY)
7295                 rfs4_update_lease(op->ro_client);
7296 
7297         return (rc);
7298 }
7299 
7300 static rfs4_chkseq_t
7301 rfs4_check_olo_seqid(seqid4 olo_seqid, rfs4_openowner_t *op, nfs_resop4 *resop)
7302 {
7303         rfs4_chkseq_t rc;
7304 
7305         rfs4_dbe_lock(op->ro_dbe);
7306         rc = rfs4_check_seqid(op->ro_open_seqid, &op->ro_reply,
7307             olo_seqid, resop, FALSE);
7308         rfs4_dbe_unlock(op->ro_dbe);
7309 
7310         return (rc);
7311 }
7312 
7313 static rfs4_chkseq_t
7314 rfs4_check_lock_seqid(seqid4 seqid, rfs4_lo_state_t *lsp, nfs_resop4 *resop)
7315 {
7316         rfs4_chkseq_t rc = NFS4_CHKSEQ_OKAY;
7317 
7318         rfs4_dbe_lock(lsp->rls_dbe);
7319         if (!lsp->rls_skip_seqid_check)
7320                 rc = rfs4_check_seqid(lsp->rls_seqid, &lsp->rls_reply, seqid,
7321                     resop, TRUE);
7322         rfs4_dbe_unlock(lsp->rls_dbe);
7323 
7324         return (rc);
7325 }
7326 
7327 static void
7328 rfs4_op_open(nfs_argop4 *argop, nfs_resop4 *resop,
7329     struct svc_req *req, struct compound_state *cs)
7330 {
7331         OPEN4args *args = &argop->nfs_argop4_u.opopen;
7332         OPEN4res *resp = &resop->nfs_resop4_u.opopen;
7333         open_owner4 *owner = &args->owner;
7334         open_claim_type4 claim = args->claim;
7335         rfs4_client_t *cp;
7336         rfs4_openowner_t *oo;
7337         bool_t create;
7338         bool_t replay = FALSE;
7339         int can_reclaim;
7340 
7341         DTRACE_NFSV4_2(op__open__start, struct compound_state *, cs,
7342             OPEN4args *, args);
7343 
7344         if (cs->vp == NULL) {
7345                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
7346                 goto end;
7347         }
7348 
7349         /*
7350          * Need to check clientid and lease expiration first based on
7351          * error ordering and incrementing sequence id.
7352          */
7353         cp = rfs4_findclient_by_id(owner->clientid, FALSE);
7354         if (cp == NULL) {
7355                 *cs->statusp = resp->status =
7356                     rfs4_check_clientid(&owner->clientid, 0);
7357                 goto end;
7358         }
7359 
7360         if (rfs4_lease_expired(cp)) {
7361                 rfs4_client_close(cp);
7362                 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
7363                 goto end;
7364         }
7365         can_reclaim = cp->rc_can_reclaim;
7366 
7367         /*
7368          * Find the open_owner for use from this point forward.  Take
7369          * care in updating the sequence id based on the type of error
7370          * being returned.
7371          */
7372 retry:
7373         create = TRUE;
7374         oo = rfs4_findopenowner(owner, &create, args->seqid);
7375         if (oo == NULL) {
7376                 *cs->statusp = resp->status = NFS4ERR_RESOURCE;
7377                 rfs4_client_rele(cp);
7378                 goto end;
7379         }
7380 
7381         /* Hold off access to the sequence space while the open is done */
7382         rfs4_sw_enter(&oo->ro_sw);
7383 
7384         /*
7385          * If the open_owner existed before at the server, then check
7386          * the sequence id.
7387          */
7388         if (!create && !oo->ro_postpone_confirm) {
7389                 switch (rfs4_check_open_seqid(args->seqid, oo, resop)) {
7390                 case NFS4_CHKSEQ_BAD:
7391                         if ((args->seqid > oo->ro_open_seqid) &&
7392                             oo->ro_need_confirm) {
7393                                 rfs4_free_opens(oo, TRUE, FALSE);
7394                                 rfs4_sw_exit(&oo->ro_sw);
7395                                 rfs4_openowner_rele(oo);
7396                                 goto retry;
7397                         }
7398                         resp->status = NFS4ERR_BAD_SEQID;
7399                         goto out;
7400                 case NFS4_CHKSEQ_REPLAY: /* replay of previous request */
7401                         replay = TRUE;
7402                         goto out;
7403                 default:
7404                         break;
7405                 }
7406 
7407                 /*
7408                  * Sequence was ok and open owner exists
7409                  * check to see if we have yet to see an
7410                  * open_confirm.
7411                  */
7412                 if (oo->ro_need_confirm) {
7413                         rfs4_free_opens(oo, TRUE, FALSE);
7414                         rfs4_sw_exit(&oo->ro_sw);
7415                         rfs4_openowner_rele(oo);
7416                         goto retry;
7417                 }
7418         }
7419         /* Grace only applies to regular-type OPENs */
7420         if (rfs4_clnt_in_grace(cp) &&
7421             (claim == CLAIM_NULL || claim == CLAIM_DELEGATE_CUR)) {
7422                 *cs->statusp = resp->status = NFS4ERR_GRACE;
7423                 goto out;
7424         }
7425 
7426         /*
7427          * If previous state at the server existed then can_reclaim
7428          * will be set. If not reply NFS4ERR_NO_GRACE to the
7429          * client.
7430          */
7431         if (rfs4_clnt_in_grace(cp) && claim == CLAIM_PREVIOUS && !can_reclaim) {
7432                 *cs->statusp = resp->status = NFS4ERR_NO_GRACE;
7433                 goto out;
7434         }
7435 
7436 
7437         /*
7438          * Reject the open if the client has missed the grace period
7439          */
7440         if (!rfs4_clnt_in_grace(cp) && claim == CLAIM_PREVIOUS) {
7441                 *cs->statusp = resp->status = NFS4ERR_NO_GRACE;
7442                 goto out;
7443         }
7444 
7445         /* Couple of up-front bookkeeping items */
7446         if (oo->ro_need_confirm) {
7447                 /*
7448                  * If this is a reclaim OPEN then we should not ask
7449                  * for a confirmation of the open_owner per the
7450                  * protocol specification.
7451                  */
7452                 if (claim == CLAIM_PREVIOUS)
7453                         oo->ro_need_confirm = FALSE;
7454                 else
7455                         resp->rflags |= OPEN4_RESULT_CONFIRM;
7456         }
7457         resp->rflags |= OPEN4_RESULT_LOCKTYPE_POSIX;
7458 
7459         /*
7460          * If there is an unshared filesystem mounted on this vnode,
7461          * do not allow to open/create in this directory.
7462          */
7463         if (vn_ismntpt(cs->vp)) {
7464                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
7465                 goto out;
7466         }
7467 
7468         /*
7469          * access must READ, WRITE, or BOTH.  No access is invalid.
7470          * deny can be READ, WRITE, BOTH, or NONE.
7471          * bits not defined for access/deny are invalid.
7472          */
7473         if (! (args->share_access & OPEN4_SHARE_ACCESS_BOTH) ||
7474             (args->share_access & ~OPEN4_SHARE_ACCESS_BOTH) ||
7475             (args->share_deny & ~OPEN4_SHARE_DENY_BOTH)) {
7476                 *cs->statusp = resp->status = NFS4ERR_INVAL;
7477                 goto out;
7478         }
7479 
7480 
7481         /*
7482          * make sure attrset is zero before response is built.
7483          */
7484         resp->attrset = 0;
7485 
7486         switch (claim) {
7487         case CLAIM_NULL:
7488                 rfs4_do_opennull(cs, req, args, oo, resp);
7489                 break;
7490         case CLAIM_PREVIOUS:
7491                 rfs4_do_openprev(cs, req, args, oo, resp);
7492                 break;
7493         case CLAIM_DELEGATE_CUR:
7494                 rfs4_do_opendelcur(cs, req, args, oo, resp);
7495                 break;
7496         case CLAIM_DELEGATE_PREV:
7497                 rfs4_do_opendelprev(cs, req, args, oo, resp);
7498                 break;
7499         default:
7500                 resp->status = NFS4ERR_INVAL;
7501                 break;
7502         }
7503 
7504 out:
7505         rfs4_client_rele(cp);
7506 
7507         /* Catch sequence id handling here to make it a little easier */
7508         switch (resp->status) {
7509         case NFS4ERR_BADXDR:
7510         case NFS4ERR_BAD_SEQID:
7511         case NFS4ERR_BAD_STATEID:
7512         case NFS4ERR_NOFILEHANDLE:
7513         case NFS4ERR_RESOURCE:
7514         case NFS4ERR_STALE_CLIENTID:
7515         case NFS4ERR_STALE_STATEID:
7516                 /*
7517                  * The protocol states that if any of these errors are
7518                  * being returned, the sequence id should not be
7519                  * incremented.  Any other return requires an
7520                  * increment.
7521                  */
7522                 break;
7523         default:
7524                 /* Always update the lease in this case */
7525                 rfs4_update_lease(oo->ro_client);
7526 
7527                 /* Regular response - copy the result */
7528                 if (!replay)
7529                         rfs4_update_open_resp(oo, resop, &cs->fh);
7530 
7531                 /*
7532                  * REPLAY case: Only if the previous response was OK
7533                  * do we copy the filehandle.  If not OK, no
7534                  * filehandle to copy.
7535                  */
7536                 if (replay == TRUE &&
7537                     resp->status == NFS4_OK &&
7538                     oo->ro_reply_fh.nfs_fh4_val) {
7539                         /*
7540                          * If this is a replay, we must restore the
7541                          * current filehandle/vp to that of what was
7542                          * returned originally.  Try our best to do
7543                          * it.
7544                          */
7545                         nfs_fh4_fmt_t *fh_fmtp =
7546                             (nfs_fh4_fmt_t *)oo->ro_reply_fh.nfs_fh4_val;
7547 
7548                         cs->exi = checkexport4(&fh_fmtp->fh4_fsid,
7549                             (fid_t *)&fh_fmtp->fh4_xlen, NULL);
7550 
7551                         if (cs->exi == NULL) {
7552                                 resp->status = NFS4ERR_STALE;
7553                                 goto finish;
7554                         }
7555 
7556                         VN_RELE(cs->vp);
7557 
7558                         cs->vp = nfs4_fhtovp(&oo->ro_reply_fh, cs->exi,
7559                             &resp->status);
7560 
7561                         if (cs->vp == NULL)
7562                                 goto finish;
7563 
7564                         nfs_fh4_copy(&oo->ro_reply_fh, &cs->fh);
7565                 }
7566 
7567                 /*
7568                  * If this was a replay, no need to update the
7569                  * sequence id. If the open_owner was not created on
7570                  * this pass, then update.  The first use of an
7571                  * open_owner will not bump the sequence id.
7572                  */
7573                 if (replay == FALSE && !create)
7574                         rfs4_update_open_sequence(oo);
7575                 /*
7576                  * If the client is receiving an error and the
7577                  * open_owner needs to be confirmed, there is no way
7578                  * to notify the client of this fact ignoring the fact
7579                  * that the server has no method of returning a
7580                  * stateid to confirm.  Therefore, the server needs to
7581                  * mark this open_owner in a way as to avoid the
7582                  * sequence id checking the next time the client uses
7583                  * this open_owner.
7584                  */
7585                 if (resp->status != NFS4_OK && oo->ro_need_confirm)
7586                         oo->ro_postpone_confirm = TRUE;
7587                 /*
7588                  * If OK response then clear the postpone flag and
7589                  * reset the sequence id to keep in sync with the
7590                  * client.
7591                  */
7592                 if (resp->status == NFS4_OK && oo->ro_postpone_confirm) {
7593                         oo->ro_postpone_confirm = FALSE;
7594                         oo->ro_open_seqid = args->seqid;
7595                 }
7596                 break;
7597         }
7598 
7599 finish:
7600         *cs->statusp = resp->status;
7601 
7602         rfs4_sw_exit(&oo->ro_sw);
7603         rfs4_openowner_rele(oo);
7604 
7605 end:
7606         DTRACE_NFSV4_2(op__open__done, struct compound_state *, cs,
7607             OPEN4res *, resp);
7608 }
7609 
7610 /*ARGSUSED*/
7611 void
7612 rfs4_op_open_confirm(nfs_argop4 *argop, nfs_resop4 *resop,
7613     struct svc_req *req, struct compound_state *cs)
7614 {
7615         OPEN_CONFIRM4args *args = &argop->nfs_argop4_u.opopen_confirm;
7616         OPEN_CONFIRM4res *resp = &resop->nfs_resop4_u.opopen_confirm;
7617         rfs4_state_t *sp;
7618         nfsstat4 status;
7619 
7620         DTRACE_NFSV4_2(op__open__confirm__start, struct compound_state *, cs,
7621             OPEN_CONFIRM4args *, args);
7622 
7623         if (cs->vp == NULL) {
7624                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
7625                 goto out;
7626         }
7627 
7628         if (cs->vp->v_type != VREG) {
7629                 *cs->statusp = resp->status =
7630                     cs->vp->v_type == VDIR ? NFS4ERR_ISDIR : NFS4ERR_INVAL;
7631                 return;
7632         }
7633 
7634         status = rfs4_get_state(&args->open_stateid, &sp, RFS4_DBS_VALID);
7635         if (status != NFS4_OK) {
7636                 *cs->statusp = resp->status = status;
7637                 goto out;
7638         }
7639 
7640         /* Ensure specified filehandle matches */
7641         if (cs->vp != sp->rs_finfo->rf_vp) {
7642                 rfs4_state_rele(sp);
7643                 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7644                 goto out;
7645         }
7646 
7647         /* hold off other access to open_owner while we tinker */
7648         rfs4_sw_enter(&sp->rs_owner->ro_sw);
7649 
7650         switch (rfs4_check_stateid_seqid(sp, &args->open_stateid)) {
7651         case NFS4_CHECK_STATEID_OKAY:
7652                 if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7653                     resop) != 0) {
7654                         *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7655                         break;
7656                 }
7657                 /*
7658                  * If it is the appropriate stateid and determined to
7659                  * be "OKAY" then this means that the stateid does not
7660                  * need to be confirmed and the client is in error for
7661                  * sending an OPEN_CONFIRM.
7662                  */
7663                 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7664                 break;
7665         case NFS4_CHECK_STATEID_OLD:
7666                 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7667                 break;
7668         case NFS4_CHECK_STATEID_BAD:
7669                 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7670                 break;
7671         case NFS4_CHECK_STATEID_EXPIRED:
7672                 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
7673                 break;
7674         case NFS4_CHECK_STATEID_CLOSED:
7675                 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7676                 break;
7677         case NFS4_CHECK_STATEID_REPLAY:
7678                 switch (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7679                     resop)) {
7680                 case NFS4_CHKSEQ_OKAY:
7681                         /*
7682                          * This is replayed stateid; if seqid matches
7683                          * next expected, then client is using wrong seqid.
7684                          */
7685                         /* fall through */
7686                 case NFS4_CHKSEQ_BAD:
7687                         *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7688                         break;
7689                 case NFS4_CHKSEQ_REPLAY:
7690                         /*
7691                          * Note this case is the duplicate case so
7692                          * resp->status is already set.
7693                          */
7694                         *cs->statusp = resp->status;
7695                         rfs4_update_lease(sp->rs_owner->ro_client);
7696                         break;
7697                 }
7698                 break;
7699         case NFS4_CHECK_STATEID_UNCONFIRMED:
7700                 if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7701                     resop) != NFS4_CHKSEQ_OKAY) {
7702                         *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7703                         break;
7704                 }
7705                 *cs->statusp = resp->status = NFS4_OK;
7706 
7707                 next_stateid(&sp->rs_stateid);
7708                 resp->open_stateid = sp->rs_stateid.stateid;
7709                 sp->rs_owner->ro_need_confirm = FALSE;
7710                 rfs4_update_lease(sp->rs_owner->ro_client);
7711                 rfs4_update_open_sequence(sp->rs_owner);
7712                 rfs4_update_open_resp(sp->rs_owner, resop, NULL);
7713                 break;
7714         default:
7715                 ASSERT(FALSE);
7716                 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
7717                 break;
7718         }
7719         rfs4_sw_exit(&sp->rs_owner->ro_sw);
7720         rfs4_state_rele(sp);
7721 
7722 out:
7723         DTRACE_NFSV4_2(op__open__confirm__done, struct compound_state *, cs,
7724             OPEN_CONFIRM4res *, resp);
7725 }
7726 
7727 /*ARGSUSED*/
7728 void
7729 rfs4_op_open_downgrade(nfs_argop4 *argop, nfs_resop4 *resop,
7730     struct svc_req *req, struct compound_state *cs)
7731 {
7732         OPEN_DOWNGRADE4args *args = &argop->nfs_argop4_u.opopen_downgrade;
7733         OPEN_DOWNGRADE4res *resp = &resop->nfs_resop4_u.opopen_downgrade;
7734         uint32_t access = args->share_access;
7735         uint32_t deny = args->share_deny;
7736         nfsstat4 status;
7737         rfs4_state_t *sp;
7738         rfs4_file_t *fp;
7739         int fflags = 0;
7740 
7741         DTRACE_NFSV4_2(op__open__downgrade__start, struct compound_state *, cs,
7742             OPEN_DOWNGRADE4args *, args);
7743 
7744         if (cs->vp == NULL) {
7745                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
7746                 goto out;
7747         }
7748 
7749         if (cs->vp->v_type != VREG) {
7750                 *cs->statusp = resp->status = NFS4ERR_INVAL;
7751                 return;
7752         }
7753 
7754         status = rfs4_get_state(&args->open_stateid, &sp, RFS4_DBS_VALID);
7755         if (status != NFS4_OK) {
7756                 *cs->statusp = resp->status = status;
7757                 goto out;
7758         }
7759 
7760         /* Ensure specified filehandle matches */
7761         if (cs->vp != sp->rs_finfo->rf_vp) {
7762                 rfs4_state_rele(sp);
7763                 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7764                 goto out;
7765         }
7766 
7767         /* hold off other access to open_owner while we tinker */
7768         rfs4_sw_enter(&sp->rs_owner->ro_sw);
7769 
7770         switch (rfs4_check_stateid_seqid(sp, &args->open_stateid)) {
7771         case NFS4_CHECK_STATEID_OKAY:
7772                 if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7773                     resop) != NFS4_CHKSEQ_OKAY) {
7774                         *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7775                         goto end;
7776                 }
7777                 break;
7778         case NFS4_CHECK_STATEID_OLD:
7779                 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7780                 goto end;
7781         case NFS4_CHECK_STATEID_BAD:
7782                 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7783                 goto end;
7784         case NFS4_CHECK_STATEID_EXPIRED:
7785                 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
7786                 goto end;
7787         case NFS4_CHECK_STATEID_CLOSED:
7788                 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7789                 goto end;
7790         case NFS4_CHECK_STATEID_UNCONFIRMED:
7791                 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7792                 goto end;
7793         case NFS4_CHECK_STATEID_REPLAY:
7794                 /* Check the sequence id for the open owner */
7795                 switch (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7796                     resop)) {
7797                 case NFS4_CHKSEQ_OKAY:
7798                         /*
7799                          * This is replayed stateid; if seqid matches
7800                          * next expected, then client is using wrong seqid.
7801                          */
7802                         /* fall through */
7803                 case NFS4_CHKSEQ_BAD:
7804                         *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7805                         goto end;
7806                 case NFS4_CHKSEQ_REPLAY:
7807                         /*
7808                          * Note this case is the duplicate case so
7809                          * resp->status is already set.
7810                          */
7811                         *cs->statusp = resp->status;
7812                         rfs4_update_lease(sp->rs_owner->ro_client);
7813                         goto end;
7814                 }
7815                 break;
7816         default:
7817                 ASSERT(FALSE);
7818                 break;
7819         }
7820 
7821         rfs4_dbe_lock(sp->rs_dbe);
7822         /*
7823          * Check that the new access modes and deny modes are valid.
7824          * Check that no invalid bits are set.
7825          */
7826         if ((access & ~(OPEN4_SHARE_ACCESS_READ | OPEN4_SHARE_ACCESS_WRITE)) ||
7827             (deny & ~(OPEN4_SHARE_DENY_READ | OPEN4_SHARE_DENY_WRITE))) {
7828                 *cs->statusp = resp->status = NFS4ERR_INVAL;
7829                 rfs4_update_open_sequence(sp->rs_owner);
7830                 rfs4_dbe_unlock(sp->rs_dbe);
7831                 goto end;
7832         }
7833 
7834         /*
7835          * The new modes must be a subset of the current modes and
7836          * the access must specify at least one mode. To test that
7837          * the new mode is a subset of the current modes we bitwise
7838          * AND them together and check that the result equals the new
7839          * mode. For example:
7840          * New mode, access == R and current mode, sp->rs_open_access  == RW
7841          * access & sp->rs_open_access == R == access, so the new access mode
7842          * is valid. Consider access == RW, sp->rs_open_access = R
7843          * access & sp->rs_open_access == R != access, so the new access mode
7844          * is invalid.
7845          */
7846         if ((access & sp->rs_open_access) != access ||
7847             (deny & sp->rs_open_deny) != deny ||
7848             (access &
7849             (OPEN4_SHARE_ACCESS_READ | OPEN4_SHARE_ACCESS_WRITE)) == 0) {
7850                 *cs->statusp = resp->status = NFS4ERR_INVAL;
7851                 rfs4_update_open_sequence(sp->rs_owner);
7852                 rfs4_dbe_unlock(sp->rs_dbe);
7853                 goto end;
7854         }
7855 
7856         /*
7857          * Release any share locks associated with this stateID.
7858          * Strictly speaking, this violates the spec because the
7859          * spec effectively requires that open downgrade be atomic.
7860          * At present, fs_shrlock does not have this capability.
7861          */
7862         (void) rfs4_unshare(sp);
7863 
7864         status = rfs4_share(sp, access, deny);
7865         if (status != NFS4_OK) {
7866                 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
7867                 rfs4_update_open_sequence(sp->rs_owner);
7868                 rfs4_dbe_unlock(sp->rs_dbe);
7869                 goto end;
7870         }
7871 
7872         fp = sp->rs_finfo;
7873         rfs4_dbe_lock(fp->rf_dbe);
7874 
7875         /*
7876          * If the current mode has deny read and the new mode
7877          * does not, decrement the number of deny read mode bits
7878          * and if it goes to zero turn off the deny read bit
7879          * on the file.
7880          */
7881         if ((sp->rs_open_deny & OPEN4_SHARE_DENY_READ) &&
7882             (deny & OPEN4_SHARE_DENY_READ) == 0) {
7883                 fp->rf_deny_read--;
7884                 if (fp->rf_deny_read == 0)
7885                         fp->rf_share_deny &= ~OPEN4_SHARE_DENY_READ;
7886         }
7887 
7888         /*
7889          * If the current mode has deny write and the new mode
7890          * does not, decrement the number of deny write mode bits
7891          * and if it goes to zero turn off the deny write bit
7892          * on the file.
7893          */
7894         if ((sp->rs_open_deny & OPEN4_SHARE_DENY_WRITE) &&
7895             (deny & OPEN4_SHARE_DENY_WRITE) == 0) {
7896                 fp->rf_deny_write--;
7897                 if (fp->rf_deny_write == 0)
7898                         fp->rf_share_deny &= ~OPEN4_SHARE_DENY_WRITE;
7899         }
7900 
7901         /*
7902          * If the current mode has access read and the new mode
7903          * does not, decrement the number of access read mode bits
7904          * and if it goes to zero turn off the access read bit
7905          * on the file.  set fflags to FREAD for the call to
7906          * vn_open_downgrade().
7907          */
7908         if ((sp->rs_open_access & OPEN4_SHARE_ACCESS_READ) &&
7909             (access & OPEN4_SHARE_ACCESS_READ) == 0) {
7910                 fp->rf_access_read--;
7911                 if (fp->rf_access_read == 0)
7912                         fp->rf_share_access &= ~OPEN4_SHARE_ACCESS_READ;
7913                 fflags |= FREAD;
7914         }
7915 
7916         /*
7917          * If the current mode has access write and the new mode
7918          * does not, decrement the number of access write mode bits
7919          * and if it goes to zero turn off the access write bit
7920          * on the file.  set fflags to FWRITE for the call to
7921          * vn_open_downgrade().
7922          */
7923         if ((sp->rs_open_access & OPEN4_SHARE_ACCESS_WRITE) &&
7924             (access & OPEN4_SHARE_ACCESS_WRITE) == 0) {
7925                 fp->rf_access_write--;
7926                 if (fp->rf_access_write == 0)
7927                         fp->rf_share_deny &= ~OPEN4_SHARE_ACCESS_WRITE;
7928                 fflags |= FWRITE;
7929         }
7930 
7931         /* Check that the file is still accessible */
7932         ASSERT(fp->rf_share_access);
7933 
7934         rfs4_dbe_unlock(fp->rf_dbe);
7935 
7936         /* now set the new open access and deny modes */
7937         sp->rs_open_access = access;
7938         sp->rs_open_deny = deny;
7939 
7940         /*
7941          * we successfully downgraded the share lock, now we need to downgrade
7942          * the open. it is possible that the downgrade was only for a deny
7943          * mode and we have nothing else to do.
7944          */
7945         if ((fflags & (FREAD|FWRITE)) != 0)
7946                 vn_open_downgrade(cs->vp, fflags);
7947 
7948         /* Update the stateid */
7949         next_stateid(&sp->rs_stateid);
7950         resp->open_stateid = sp->rs_stateid.stateid;
7951 
7952         rfs4_dbe_unlock(sp->rs_dbe);
7953 
7954         *cs->statusp = resp->status = NFS4_OK;
7955         /* Update the lease */
7956         rfs4_update_lease(sp->rs_owner->ro_client);
7957         /* And the sequence */
7958         rfs4_update_open_sequence(sp->rs_owner);
7959         rfs4_update_open_resp(sp->rs_owner, resop, NULL);
7960 
7961 end:
7962         rfs4_sw_exit(&sp->rs_owner->ro_sw);
7963         rfs4_state_rele(sp);
7964 out:
7965         DTRACE_NFSV4_2(op__open__downgrade__done, struct compound_state *, cs,
7966             OPEN_DOWNGRADE4res *, resp);
7967 }
7968 
7969 static void *
7970 memstr(const void *s1, const char *s2, size_t n)
7971 {
7972         size_t l = strlen(s2);
7973         char *p = (char *)s1;
7974 
7975         while (n >= l) {
7976                 if (bcmp(p, s2, l) == 0)
7977                         return (p);
7978                 p++;
7979                 n--;
7980         }
7981 
7982         return (NULL);
7983 }
7984 
7985 /*
7986  * The logic behind this function is detailed in the NFSv4 RFC in the
7987  * SETCLIENTID operation description under IMPLEMENTATION.  Refer to
7988  * that section for explicit guidance to server behavior for
7989  * SETCLIENTID.
7990  */
7991 void
7992 rfs4_op_setclientid(nfs_argop4 *argop, nfs_resop4 *resop,
7993     struct svc_req *req, struct compound_state *cs)
7994 {
7995         SETCLIENTID4args *args = &argop->nfs_argop4_u.opsetclientid;
7996         SETCLIENTID4res *res = &resop->nfs_resop4_u.opsetclientid;
7997         rfs4_client_t *cp, *newcp, *cp_confirmed, *cp_unconfirmed;
7998         rfs4_clntip_t *ci;
7999         bool_t create;
8000         char *addr, *netid;
8001         int len;
8002 
8003         DTRACE_NFSV4_2(op__setclientid__start, struct compound_state *, cs,
8004             SETCLIENTID4args *, args);
8005 retry:
8006         newcp = cp_confirmed = cp_unconfirmed = NULL;
8007 
8008         /*
8009          * Save the caller's IP address
8010          */
8011         args->client.cl_addr =
8012             (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
8013 
8014         /*
8015          * Record if it is a Solaris client that cannot handle referrals.
8016          */
8017         if (memstr(args->client.id_val, "Solaris", args->client.id_len) &&
8018             !memstr(args->client.id_val, "+referrals", args->client.id_len)) {
8019                 /* Add a "yes, it's downrev" record */
8020                 create = TRUE;
8021                 ci = rfs4_find_clntip(args->client.cl_addr, &create);
8022                 ASSERT(ci != NULL);
8023                 rfs4_dbe_rele(ci->ri_dbe);
8024         } else {
8025                 /* Remove any previous record */
8026                 rfs4_invalidate_clntip(args->client.cl_addr);
8027         }
8028 
8029         /*
8030          * In search of an EXISTING client matching the incoming
8031          * request to establish a new client identifier at the server
8032          */
8033         create = TRUE;
8034         cp = rfs4_findclient(&args->client, &create, NULL);
8035 
8036         /* Should never happen */
8037         ASSERT(cp != NULL);
8038 
8039         if (cp == NULL) {
8040                 *cs->statusp = res->status = NFS4ERR_SERVERFAULT;
8041                 goto out;
8042         }
8043 
8044         /*
8045          * Easiest case. Client identifier is newly created and is
8046          * unconfirmed.  Also note that for this case, no other
8047          * entries exist for the client identifier.  Nothing else to
8048          * check.  Just setup the response and respond.
8049          */
8050         if (create) {
8051                 *cs->statusp = res->status = NFS4_OK;
8052                 res->SETCLIENTID4res_u.resok4.clientid = cp->rc_clientid;
8053                 res->SETCLIENTID4res_u.resok4.setclientid_confirm =
8054                     cp->rc_confirm_verf;
8055                 /* Setup callback information; CB_NULL confirmation later */
8056                 rfs4_client_setcb(cp, &args->callback, args->callback_ident);
8057 
8058                 rfs4_client_rele(cp);
8059                 goto out;
8060         }
8061 
8062         /*
8063          * An existing, confirmed client may exist but it may not have
8064          * been active for at least one lease period.  If so, then
8065          * "close" the client and create a new client identifier
8066          */
8067         if (rfs4_lease_expired(cp)) {
8068                 rfs4_client_close(cp);
8069                 goto retry;
8070         }
8071 
8072         if (cp->rc_need_confirm == TRUE)
8073                 cp_unconfirmed = cp;
8074         else
8075                 cp_confirmed = cp;
8076 
8077         cp = NULL;
8078 
8079         /*
8080          * We have a confirmed client, now check for an
8081          * unconfimred entry
8082          */
8083         if (cp_confirmed) {
8084                 /* If creds don't match then client identifier is inuse */
8085                 if (!creds_ok(cp_confirmed->rc_cr_set, req, cs)) {
8086                         rfs4_cbinfo_t *cbp;
8087                         /*
8088                          * Some one else has established this client
8089                          * id. Try and say * who they are. We will use
8090                          * the call back address supplied by * the
8091                          * first client.
8092                          */
8093                         *cs->statusp = res->status = NFS4ERR_CLID_INUSE;
8094 
8095                         addr = netid = NULL;
8096 
8097                         cbp = &cp_confirmed->rc_cbinfo;
8098                         if (cbp->cb_callback.cb_location.r_addr &&
8099                             cbp->cb_callback.cb_location.r_netid) {
8100                                 cb_client4 *cbcp = &cbp->cb_callback;
8101 
8102                                 len = strlen(cbcp->cb_location.r_addr)+1;
8103                                 addr = kmem_alloc(len, KM_SLEEP);
8104                                 bcopy(cbcp->cb_location.r_addr, addr, len);
8105                                 len = strlen(cbcp->cb_location.r_netid)+1;
8106                                 netid = kmem_alloc(len, KM_SLEEP);
8107                                 bcopy(cbcp->cb_location.r_netid, netid, len);
8108                         }
8109 
8110                         res->SETCLIENTID4res_u.client_using.r_addr = addr;
8111                         res->SETCLIENTID4res_u.client_using.r_netid = netid;
8112 
8113                         rfs4_client_rele(cp_confirmed);
8114                 }
8115 
8116                 /*
8117                  * Confirmed, creds match, and verifier matches; must
8118                  * be an update of the callback info
8119                  */
8120                 if (cp_confirmed->rc_nfs_client.verifier ==
8121                     args->client.verifier) {
8122                         /* Setup callback information */
8123                         rfs4_client_setcb(cp_confirmed, &args->callback,
8124                             args->callback_ident);
8125 
8126                         /* everything okay -- move ahead */
8127                         *cs->statusp = res->status = NFS4_OK;
8128                         res->SETCLIENTID4res_u.resok4.clientid =
8129                             cp_confirmed->rc_clientid;
8130 
8131                         /* update the confirm_verifier and return it */
8132                         rfs4_client_scv_next(cp_confirmed);
8133                         res->SETCLIENTID4res_u.resok4.setclientid_confirm =
8134                             cp_confirmed->rc_confirm_verf;
8135 
8136                         rfs4_client_rele(cp_confirmed);
8137                         goto out;
8138                 }
8139 
8140                 /*
8141                  * Creds match but the verifier doesn't.  Must search
8142                  * for an unconfirmed client that would be replaced by
8143                  * this request.
8144                  */
8145                 create = FALSE;
8146                 cp_unconfirmed = rfs4_findclient(&args->client, &create,
8147                     cp_confirmed);
8148         }
8149 
8150         /*
8151          * At this point, we have taken care of the brand new client
8152          * struct, INUSE case, update of an existing, and confirmed
8153          * client struct.
8154          */
8155 
8156         /*
8157          * check to see if things have changed while we originally
8158          * picked up the client struct.  If they have, then return and
8159          * retry the processing of this SETCLIENTID request.
8160          */
8161         if (cp_unconfirmed) {
8162                 rfs4_dbe_lock(cp_unconfirmed->rc_dbe);
8163                 if (!cp_unconfirmed->rc_need_confirm) {
8164                         rfs4_dbe_unlock(cp_unconfirmed->rc_dbe);
8165                         rfs4_client_rele(cp_unconfirmed);
8166                         if (cp_confirmed)
8167                                 rfs4_client_rele(cp_confirmed);
8168                         goto retry;
8169                 }
8170                 /* do away with the old unconfirmed one */
8171                 rfs4_dbe_invalidate(cp_unconfirmed->rc_dbe);
8172                 rfs4_dbe_unlock(cp_unconfirmed->rc_dbe);
8173                 rfs4_client_rele(cp_unconfirmed);
8174                 cp_unconfirmed = NULL;
8175         }
8176 
8177         /*
8178          * This search will temporarily hide the confirmed client
8179          * struct while a new client struct is created as the
8180          * unconfirmed one.
8181          */
8182         create = TRUE;
8183         newcp = rfs4_findclient(&args->client, &create, cp_confirmed);
8184 
8185         ASSERT(newcp != NULL);
8186 
8187         if (newcp == NULL) {
8188                 *cs->statusp = res->status = NFS4ERR_SERVERFAULT;
8189                 rfs4_client_rele(cp_confirmed);
8190                 goto out;
8191         }
8192 
8193         /*
8194          * If one was not created, then a similar request must be in
8195          * process so release and start over with this one
8196          */
8197         if (create != TRUE) {
8198                 rfs4_client_rele(newcp);
8199                 if (cp_confirmed)
8200                         rfs4_client_rele(cp_confirmed);
8201                 goto retry;
8202         }
8203 
8204         *cs->statusp = res->status = NFS4_OK;
8205         res->SETCLIENTID4res_u.resok4.clientid = newcp->rc_clientid;
8206         res->SETCLIENTID4res_u.resok4.setclientid_confirm =
8207             newcp->rc_confirm_verf;
8208         /* Setup callback information; CB_NULL confirmation later */
8209         rfs4_client_setcb(newcp, &args->callback, args->callback_ident);
8210 
8211         newcp->rc_cp_confirmed = cp_confirmed;
8212 
8213         rfs4_client_rele(newcp);
8214 
8215 out:
8216         DTRACE_NFSV4_2(op__setclientid__done, struct compound_state *, cs,
8217             SETCLIENTID4res *, res);
8218 }
8219 
8220 /*ARGSUSED*/
8221 void
8222 rfs4_op_setclientid_confirm(nfs_argop4 *argop, nfs_resop4 *resop,
8223     struct svc_req *req, struct compound_state *cs)
8224 {
8225         SETCLIENTID_CONFIRM4args *args =
8226             &argop->nfs_argop4_u.opsetclientid_confirm;
8227         SETCLIENTID_CONFIRM4res *res =
8228             &resop->nfs_resop4_u.opsetclientid_confirm;
8229         rfs4_client_t *cp, *cptoclose = NULL;
8230         nfs4_srv_t *nsrv4;
8231 
8232         DTRACE_NFSV4_2(op__setclientid__confirm__start,
8233             struct compound_state *, cs,
8234             SETCLIENTID_CONFIRM4args *, args);
8235 
8236         nsrv4 = zone_getspecific(rfs4_zone_key, curzone);
8237         *cs->statusp = res->status = NFS4_OK;
8238 
8239         cp = rfs4_findclient_by_id(args->clientid, TRUE);
8240 
8241         if (cp == NULL) {
8242                 *cs->statusp = res->status =
8243                     rfs4_check_clientid(&args->clientid, 1);
8244                 goto out;
8245         }
8246 
8247         if (!creds_ok(cp, req, cs)) {
8248                 *cs->statusp = res->status = NFS4ERR_CLID_INUSE;
8249                 rfs4_client_rele(cp);
8250                 goto out;
8251         }
8252 
8253         /* If the verifier doesn't match, the record doesn't match */
8254         if (cp->rc_confirm_verf != args->setclientid_confirm) {
8255                 *cs->statusp = res->status = NFS4ERR_STALE_CLIENTID;
8256                 rfs4_client_rele(cp);
8257                 goto out;
8258         }
8259 
8260         rfs4_dbe_lock(cp->rc_dbe);
8261         cp->rc_need_confirm = FALSE;
8262         if (cp->rc_cp_confirmed) {
8263                 cptoclose = cp->rc_cp_confirmed;
8264                 cptoclose->rc_ss_remove = 1;
8265                 cp->rc_cp_confirmed = NULL;
8266         }
8267 
8268         /*
8269          * Update the client's associated server instance, if it's changed
8270          * since the client was created.
8271          */
8272         if (rfs4_servinst(cp) != nsrv4->nfs4_cur_servinst)
8273                 rfs4_servinst_assign(nsrv4, cp, nsrv4->nfs4_cur_servinst);
8274 
8275         /*
8276          * Record clientid in stable storage.
8277          * Must be done after server instance has been assigned.
8278          */
8279         rfs4_ss_clid(nsrv4, cp);
8280 
8281         rfs4_dbe_unlock(cp->rc_dbe);
8282 
8283         if (cptoclose)
8284                 /* don't need to rele, client_close does it */
8285                 rfs4_client_close(cptoclose);
8286 
8287         /* If needed, initiate CB_NULL call for callback path */
8288         rfs4_deleg_cb_check(cp);
8289         rfs4_update_lease(cp);
8290 
8291         /*
8292          * Check to see if client can perform reclaims
8293          */
8294         rfs4_ss_chkclid(nsrv4, cp);
8295 
8296         rfs4_client_rele(cp);
8297 
8298 out:
8299         DTRACE_NFSV4_2(op__setclientid__confirm__done,
8300             struct compound_state *, cs,
8301             SETCLIENTID_CONFIRM4 *, res);
8302 }
8303 
8304 
8305 /*ARGSUSED*/
8306 void
8307 rfs4_op_close(nfs_argop4 *argop, nfs_resop4 *resop,
8308     struct svc_req *req, struct compound_state *cs)
8309 {
8310         CLOSE4args *args = &argop->nfs_argop4_u.opclose;
8311         CLOSE4res *resp = &resop->nfs_resop4_u.opclose;
8312         rfs4_state_t *sp;
8313         nfsstat4 status;
8314 
8315         DTRACE_NFSV4_2(op__close__start, struct compound_state *, cs,
8316             CLOSE4args *, args);
8317 
8318         if (cs->vp == NULL) {
8319                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
8320                 goto out;
8321         }
8322 
8323         status = rfs4_get_state(&args->open_stateid, &sp, RFS4_DBS_INVALID);
8324         if (status != NFS4_OK) {
8325                 *cs->statusp = resp->status = status;
8326                 goto out;
8327         }
8328 
8329         /* Ensure specified filehandle matches */
8330         if (cs->vp != sp->rs_finfo->rf_vp) {
8331                 rfs4_state_rele(sp);
8332                 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8333                 goto out;
8334         }
8335 
8336         /* hold off other access to open_owner while we tinker */
8337         rfs4_sw_enter(&sp->rs_owner->ro_sw);
8338 
8339         switch (rfs4_check_stateid_seqid(sp, &args->open_stateid)) {
8340         case NFS4_CHECK_STATEID_OKAY:
8341                 if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
8342                     resop) != NFS4_CHKSEQ_OKAY) {
8343                         *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8344                         goto end;
8345                 }
8346                 break;
8347         case NFS4_CHECK_STATEID_OLD:
8348                 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8349                 goto end;
8350         case NFS4_CHECK_STATEID_BAD:
8351                 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8352                 goto end;
8353         case NFS4_CHECK_STATEID_EXPIRED:
8354                 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
8355                 goto end;
8356         case NFS4_CHECK_STATEID_CLOSED:
8357                 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8358                 goto end;
8359         case NFS4_CHECK_STATEID_UNCONFIRMED:
8360                 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8361                 goto end;
8362         case NFS4_CHECK_STATEID_REPLAY:
8363                 /* Check the sequence id for the open owner */
8364                 switch (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
8365                     resop)) {
8366                 case NFS4_CHKSEQ_OKAY:
8367                         /*
8368                          * This is replayed stateid; if seqid matches
8369                          * next expected, then client is using wrong seqid.
8370                          */
8371                         /* FALL THROUGH */
8372                 case NFS4_CHKSEQ_BAD:
8373                         *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8374                         goto end;
8375                 case NFS4_CHKSEQ_REPLAY:
8376                         /*
8377                          * Note this case is the duplicate case so
8378                          * resp->status is already set.
8379                          */
8380                         *cs->statusp = resp->status;
8381                         rfs4_update_lease(sp->rs_owner->ro_client);
8382                         goto end;
8383                 }
8384                 break;
8385         default:
8386                 ASSERT(FALSE);
8387                 break;
8388         }
8389 
8390         rfs4_dbe_lock(sp->rs_dbe);
8391 
8392         /* Update the stateid. */
8393         next_stateid(&sp->rs_stateid);
8394         resp->open_stateid = sp->rs_stateid.stateid;
8395 
8396         rfs4_dbe_unlock(sp->rs_dbe);
8397 
8398         rfs4_update_lease(sp->rs_owner->ro_client);
8399         rfs4_update_open_sequence(sp->rs_owner);
8400         rfs4_update_open_resp(sp->rs_owner, resop, NULL);
8401 
8402         rfs4_state_close(sp, FALSE, FALSE, cs->cr);
8403 
8404         *cs->statusp = resp->status = status;
8405 
8406 end:
8407         rfs4_sw_exit(&sp->rs_owner->ro_sw);
8408         rfs4_state_rele(sp);
8409 out:
8410         DTRACE_NFSV4_2(op__close__done, struct compound_state *, cs,
8411             CLOSE4res *, resp);
8412 }
8413 
8414 /*
8415  * Manage the counts on the file struct and close all file locks
8416  */
8417 /*ARGSUSED*/
8418 void
8419 rfs4_release_share_lock_state(rfs4_state_t *sp, cred_t *cr,
8420     bool_t close_of_client)
8421 {
8422         rfs4_file_t *fp = sp->rs_finfo;
8423         rfs4_lo_state_t *lsp;
8424         int fflags = 0;
8425 
8426         /*
8427          * If this call is part of the larger closing down of client
8428          * state then it is just easier to release all locks
8429          * associated with this client instead of going through each
8430          * individual file and cleaning locks there.
8431          */
8432         if (close_of_client) {
8433                 if (sp->rs_owner->ro_client->rc_unlksys_completed == FALSE &&
8434                     !list_is_empty(&sp->rs_lostatelist) &&
8435                     sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID) {
8436                         /* Is the PxFS kernel module loaded? */
8437                         if (lm_remove_file_locks != NULL) {
8438                                 int new_sysid;
8439 
8440                                 /* Encode the cluster nodeid in new sysid */
8441                                 new_sysid = sp->rs_owner->ro_client->rc_sysidt;
8442                                 lm_set_nlmid_flk(&new_sysid);
8443 
8444                                 /*
8445                                  * This PxFS routine removes file locks for a
8446                                  * client over all nodes of a cluster.
8447                                  */
8448                                 NFS4_DEBUG(rfs4_debug, (CE_NOTE,
8449                                     "lm_remove_file_locks(sysid=0x%x)\n",
8450                                     new_sysid));
8451                                 (*lm_remove_file_locks)(new_sysid);
8452                         } else {
8453                                 struct flock64 flk;
8454 
8455                                 /* Release all locks for this client */
8456                                 flk.l_type = F_UNLKSYS;
8457                                 flk.l_whence = 0;
8458                                 flk.l_start = 0;
8459                                 flk.l_len = 0;
8460                                 flk.l_sysid =
8461                                     sp->rs_owner->ro_client->rc_sysidt;
8462                                 flk.l_pid = 0;
8463                                 (void) VOP_FRLOCK(sp->rs_finfo->rf_vp, F_SETLK,
8464                                     &flk, F_REMOTELOCK | FREAD | FWRITE,
8465                                     (u_offset_t)0, NULL, CRED(), NULL);
8466                         }
8467 
8468                         sp->rs_owner->ro_client->rc_unlksys_completed = TRUE;
8469                 }
8470         }
8471 
8472         /*
8473          * Release all locks on this file by this lock owner or at
8474          * least mark the locks as having been released
8475          */
8476         for (lsp = list_head(&sp->rs_lostatelist); lsp != NULL;
8477             lsp = list_next(&sp->rs_lostatelist, lsp)) {
8478                 lsp->rls_locks_cleaned = TRUE;
8479 
8480                 /* Was this already taken care of above? */
8481                 if (!close_of_client &&
8482                     sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID)
8483                         (void) cleanlocks(sp->rs_finfo->rf_vp,
8484                             lsp->rls_locker->rl_pid,
8485                             lsp->rls_locker->rl_client->rc_sysidt);
8486         }
8487 
8488         /*
8489          * Release any shrlocks associated with this open state ID.
8490          * This must be done before the rfs4_state gets marked closed.
8491          */
8492         if (sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID)
8493                 (void) rfs4_unshare(sp);
8494 
8495         if (sp->rs_open_access) {
8496                 rfs4_dbe_lock(fp->rf_dbe);
8497 
8498                 /*
8499                  * Decrement the count for each access and deny bit that this
8500                  * state has contributed to the file.
8501                  * If the file counts go to zero
8502                  * clear the appropriate bit in the appropriate mask.
8503                  */
8504                 if (sp->rs_open_access & OPEN4_SHARE_ACCESS_READ) {
8505                         fp->rf_access_read--;
8506                         fflags |= FREAD;
8507                         if (fp->rf_access_read == 0)
8508                                 fp->rf_share_access &= ~OPEN4_SHARE_ACCESS_READ;
8509                 }
8510                 if (sp->rs_open_access & OPEN4_SHARE_ACCESS_WRITE) {
8511                         fp->rf_access_write--;
8512                         fflags |= FWRITE;
8513                         if (fp->rf_access_write == 0)
8514                                 fp->rf_share_access &=
8515                                     ~OPEN4_SHARE_ACCESS_WRITE;
8516                 }
8517                 if (sp->rs_open_deny & OPEN4_SHARE_DENY_READ) {
8518                         fp->rf_deny_read--;
8519                         if (fp->rf_deny_read == 0)
8520                                 fp->rf_share_deny &= ~OPEN4_SHARE_DENY_READ;
8521                 }
8522                 if (sp->rs_open_deny & OPEN4_SHARE_DENY_WRITE) {
8523                         fp->rf_deny_write--;
8524                         if (fp->rf_deny_write == 0)
8525                                 fp->rf_share_deny &= ~OPEN4_SHARE_DENY_WRITE;
8526                 }
8527 
8528                 (void) VOP_CLOSE(fp->rf_vp, fflags, 1, (offset_t)0, cr, NULL);
8529 
8530                 rfs4_dbe_unlock(fp->rf_dbe);
8531 
8532                 sp->rs_open_access = 0;
8533                 sp->rs_open_deny = 0;
8534         }
8535 }
8536 
8537 /*
8538  * lock_denied: Fill in a LOCK4deneid structure given an flock64 structure.
8539  */
8540 static nfsstat4
8541 lock_denied(LOCK4denied *dp, struct flock64 *flk)
8542 {
8543         rfs4_lockowner_t *lo;
8544         rfs4_client_t *cp;
8545         uint32_t len;
8546 
8547         lo = rfs4_findlockowner_by_pid(flk->l_pid);
8548         if (lo != NULL) {
8549                 cp = lo->rl_client;
8550                 if (rfs4_lease_expired(cp)) {
8551                         rfs4_lockowner_rele(lo);
8552                         rfs4_dbe_hold(cp->rc_dbe);
8553                         rfs4_client_close(cp);
8554                         return (NFS4ERR_EXPIRED);
8555                 }
8556                 dp->owner.clientid = lo->rl_owner.clientid;
8557                 len = lo->rl_owner.owner_len;
8558                 dp->owner.owner_val = kmem_alloc(len, KM_SLEEP);
8559                 bcopy(lo->rl_owner.owner_val, dp->owner.owner_val, len);
8560                 dp->owner.owner_len = len;
8561                 rfs4_lockowner_rele(lo);
8562                 goto finish;
8563         }
8564 
8565         /*
8566          * Its not a NFS4 lock. We take advantage that the upper 32 bits
8567          * of the client id contain the boot time for a NFS4 lock. So we
8568          * fabricate and identity by setting clientid to the sysid, and
8569          * the lock owner to the pid.
8570          */
8571         dp->owner.clientid = flk->l_sysid;
8572         len = sizeof (pid_t);
8573         dp->owner.owner_len = len;
8574         dp->owner.owner_val = kmem_alloc(len, KM_SLEEP);
8575         bcopy(&flk->l_pid, dp->owner.owner_val, len);
8576 finish:
8577         dp->offset = flk->l_start;
8578         dp->length = flk->l_len;
8579 
8580         if (flk->l_type == F_RDLCK)
8581                 dp->locktype = READ_LT;
8582         else if (flk->l_type == F_WRLCK)
8583                 dp->locktype = WRITE_LT;
8584         else
8585                 return (NFS4ERR_INVAL); /* no mapping from POSIX ltype to v4 */
8586 
8587         return (NFS4_OK);
8588 }
8589 
8590 /*
8591  * The NFSv4.0 LOCK operation does not support the blocking lock (at the
8592  * NFSv4.0 protocol level) so the client needs to resend the LOCK request in a
8593  * case the lock is denied by the NFSv4.0 server.  NFSv4.0 clients are prepared
8594  * for that (obviously); they are sending the LOCK requests with some delays
8595  * between the attempts.  See nfs4frlock() and nfs4_block_and_wait() for the
8596  * locking and delay implementation at the client side.
8597  *
8598  * To make the life of the clients easier, the NFSv4.0 server tries to do some
8599  * fast retries on its own (the for loop below) in a hope the lock will be
8600  * available soon.  And if not, the client won't need to resend the LOCK
8601  * requests so fast to check the lock availability.  This basically saves some
8602  * network traffic and tries to make sure the client gets the lock ASAP.
8603  */
8604 static int
8605 setlock(vnode_t *vp, struct flock64 *flock, int flag, cred_t *cred)
8606 {
8607         int error;
8608         struct flock64 flk;
8609         int i;
8610         clock_t delaytime;
8611         int cmd;
8612         int spin_cnt = 0;
8613 
8614         cmd = nbl_need_check(vp) ? F_SETLK_NBMAND : F_SETLK;
8615 retry:
8616         delaytime = MSEC_TO_TICK_ROUNDUP(rfs4_lock_delay);
8617 
8618         for (i = 0; i < rfs4_maxlock_tries; i++) {
8619                 LOCK_PRINT(rfs4_debug, "setlock", cmd, flock);
8620                 error = VOP_FRLOCK(vp, cmd,
8621                     flock, flag, (u_offset_t)0, NULL, cred, NULL);
8622 
8623                 if (error != EAGAIN && error != EACCES)
8624                         break;
8625 
8626                 if (i < rfs4_maxlock_tries - 1) {
8627                         delay(delaytime);
8628                         delaytime *= 2;
8629                 }
8630         }
8631 
8632         if (error == EAGAIN || error == EACCES) {
8633                 /* Get the owner of the lock */
8634                 flk = *flock;
8635                 LOCK_PRINT(rfs4_debug, "setlock", F_GETLK, &flk);
8636                 if (VOP_FRLOCK(vp, F_GETLK, &flk, flag, 0, NULL, cred,
8637                     NULL) == 0) {
8638                         /*
8639                          * There's a race inherent in the current VOP_FRLOCK
8640                          * design where:
8641                          * a: "other guy" takes a lock that conflicts with a
8642                          * lock we want
8643                          * b: we attempt to take our lock (non-blocking) and
8644                          * the attempt fails.
8645                          * c: "other guy" releases the conflicting lock
8646                          * d: we ask what lock conflicts with the lock we want,
8647                          * getting F_UNLCK (no lock blocks us)
8648                          *
8649                          * If we retry the non-blocking lock attempt in this
8650                          * case (restart at step 'b') there's some possibility
8651                          * that many such attempts might fail.  However a test
8652                          * designed to actually provoke this race shows that
8653                          * the vast majority of cases require no retry, and
8654                          * only a few took as many as three retries.  Here's
8655                          * the test outcome:
8656                          *
8657                          *         number of retries    how many times we needed
8658                          *                              that many retries
8659                          *         0                    79461
8660                          *         1                      862
8661                          *         2                       49
8662                          *         3                        5
8663                          *
8664                          * Given those empirical results, we arbitrarily limit
8665                          * the retry count to ten.
8666                          *
8667                          * If we actually make to ten retries and give up,
8668                          * nothing catastrophic happens, but we're unable to
8669                          * return the information about the conflicting lock to
8670                          * the NFS client.  That's an acceptable trade off vs.
8671                          * letting this retry loop run forever.
8672                          */
8673                         if (flk.l_type == F_UNLCK) {
8674                                 if (spin_cnt++ < 10) {
8675                                         /* No longer locked, retry */
8676                                         goto retry;
8677                                 }
8678                         } else {
8679                                 *flock = flk;
8680                                 LOCK_PRINT(rfs4_debug, "setlock(blocking lock)",
8681                                     F_GETLK, &flk);
8682                         }
8683                 }
8684         }
8685 
8686         return (error);
8687 }
8688 
8689 /*ARGSUSED*/
8690 static nfsstat4
8691 rfs4_do_lock(rfs4_lo_state_t *lsp, nfs_lock_type4 locktype,
8692     offset4 offset, length4 length, cred_t *cred, nfs_resop4 *resop)
8693 {
8694         nfsstat4 status;
8695         rfs4_lockowner_t *lo = lsp->rls_locker;
8696         rfs4_state_t *sp = lsp->rls_state;
8697         struct flock64 flock;
8698         int16_t ltype;
8699         int flag;
8700         int error;
8701         sysid_t sysid;
8702         LOCK4res *lres;
8703         vnode_t *vp;
8704 
8705         if (rfs4_lease_expired(lo->rl_client)) {
8706                 return (NFS4ERR_EXPIRED);
8707         }
8708 
8709         if ((status = rfs4_client_sysid(lo->rl_client, &sysid)) != NFS4_OK)
8710                 return (status);
8711 
8712         /* Check for zero length. To lock to end of file use all ones for V4 */
8713         if (length == 0)
8714                 return (NFS4ERR_INVAL);
8715         else if (length == (length4)(~0))
8716                 length = 0;             /* Posix to end of file  */
8717 
8718 retry:
8719         rfs4_dbe_lock(sp->rs_dbe);
8720         if (sp->rs_closed == TRUE) {
8721                 rfs4_dbe_unlock(sp->rs_dbe);
8722                 return (NFS4ERR_OLD_STATEID);
8723         }
8724 
8725         if (resop->resop != OP_LOCKU) {
8726                 switch (locktype) {
8727                 case READ_LT:
8728                 case READW_LT:
8729                         if ((sp->rs_share_access
8730                             & OPEN4_SHARE_ACCESS_READ) == 0) {
8731                                 rfs4_dbe_unlock(sp->rs_dbe);
8732 
8733                                 return (NFS4ERR_OPENMODE);
8734                         }
8735                         ltype = F_RDLCK;
8736                         break;
8737                 case WRITE_LT:
8738                 case WRITEW_LT:
8739                         if ((sp->rs_share_access
8740                             & OPEN4_SHARE_ACCESS_WRITE) == 0) {
8741                                 rfs4_dbe_unlock(sp->rs_dbe);
8742 
8743                                 return (NFS4ERR_OPENMODE);
8744                         }
8745                         ltype = F_WRLCK;
8746                         break;
8747                 }
8748         } else
8749                 ltype = F_UNLCK;
8750 
8751         flock.l_type = ltype;
8752         flock.l_whence = 0;             /* SEEK_SET */
8753         flock.l_start = offset;
8754         flock.l_len = length;
8755         flock.l_sysid = sysid;
8756         flock.l_pid = lsp->rls_locker->rl_pid;
8757 
8758         /* Note that length4 is uint64_t but l_len and l_start are off64_t */
8759         if (flock.l_len < 0 || flock.l_start < 0) {
8760                 rfs4_dbe_unlock(sp->rs_dbe);
8761                 return (NFS4ERR_INVAL);
8762         }
8763 
8764         /*
8765          * N.B. FREAD has the same value as OPEN4_SHARE_ACCESS_READ and
8766          * FWRITE has the same value as OPEN4_SHARE_ACCESS_WRITE.
8767          */
8768         flag = (int)sp->rs_share_access | F_REMOTELOCK;
8769 
8770         vp = sp->rs_finfo->rf_vp;
8771         VN_HOLD(vp);
8772 
8773         /*
8774          * We need to unlock sp before we call the underlying filesystem to
8775          * acquire the file lock.
8776          */
8777         rfs4_dbe_unlock(sp->rs_dbe);
8778 
8779         error = setlock(vp, &flock, flag, cred);
8780 
8781         /*
8782          * Make sure the file is still open.  In a case the file was closed in
8783          * the meantime, clean the lock we acquired using the setlock() call
8784          * above, and return the appropriate error.
8785          */
8786         rfs4_dbe_lock(sp->rs_dbe);
8787         if (sp->rs_closed == TRUE) {
8788                 cleanlocks(vp, lsp->rls_locker->rl_pid, sysid);
8789                 rfs4_dbe_unlock(sp->rs_dbe);
8790 
8791                 VN_RELE(vp);
8792 
8793                 return (NFS4ERR_OLD_STATEID);
8794         }
8795         rfs4_dbe_unlock(sp->rs_dbe);
8796 
8797         VN_RELE(vp);
8798 
8799         if (error == 0) {
8800                 rfs4_dbe_lock(lsp->rls_dbe);
8801                 next_stateid(&lsp->rls_lockid);
8802                 rfs4_dbe_unlock(lsp->rls_dbe);
8803         }
8804 
8805         /*
8806          * N.B. We map error values to nfsv4 errors. This is differrent
8807          * than puterrno4 routine.
8808          */
8809         switch (error) {
8810         case 0:
8811                 status = NFS4_OK;
8812                 break;
8813         case EAGAIN:
8814         case EACCES:            /* Old value */
8815                 /* Can only get here if op is OP_LOCK */
8816                 ASSERT(resop->resop == OP_LOCK);
8817                 lres = &resop->nfs_resop4_u.oplock;
8818                 status = NFS4ERR_DENIED;
8819                 if (lock_denied(&lres->LOCK4res_u.denied, &flock)
8820                     == NFS4ERR_EXPIRED)
8821                         goto retry;
8822                 break;
8823         case ENOLCK:
8824                 status = NFS4ERR_DELAY;
8825                 break;
8826         case EOVERFLOW:
8827                 status = NFS4ERR_INVAL;
8828                 break;
8829         case EINVAL:
8830                 status = NFS4ERR_NOTSUPP;
8831                 break;
8832         default:
8833                 status = NFS4ERR_SERVERFAULT;
8834                 break;
8835         }
8836 
8837         return (status);
8838 }
8839 
8840 /*ARGSUSED*/
8841 void
8842 rfs4_op_lock(nfs_argop4 *argop, nfs_resop4 *resop,
8843     struct svc_req *req, struct compound_state *cs)
8844 {
8845         LOCK4args *args = &argop->nfs_argop4_u.oplock;
8846         LOCK4res *resp = &resop->nfs_resop4_u.oplock;
8847         nfsstat4 status;
8848         stateid4 *stateid;
8849         rfs4_lockowner_t *lo;
8850         rfs4_client_t *cp;
8851         rfs4_state_t *sp = NULL;
8852         rfs4_lo_state_t *lsp = NULL;
8853         bool_t ls_sw_held = FALSE;
8854         bool_t create = TRUE;
8855         bool_t lcreate = TRUE;
8856         bool_t dup_lock = FALSE;
8857         int rc;
8858 
8859         DTRACE_NFSV4_2(op__lock__start, struct compound_state *, cs,
8860             LOCK4args *, args);
8861 
8862         if (cs->vp == NULL) {
8863                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
8864                 DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
8865                     cs, LOCK4res *, resp);
8866                 return;
8867         }
8868 
8869         if (args->locker.new_lock_owner) {
8870                 /* Create a new lockowner for this instance */
8871                 open_to_lock_owner4 *olo = &args->locker.locker4_u.open_owner;
8872 
8873                 NFS4_DEBUG(rfs4_debug, (CE_NOTE, "Creating new lock owner"));
8874 
8875                 stateid = &olo->open_stateid;
8876                 status = rfs4_get_state(stateid, &sp, RFS4_DBS_VALID);
8877                 if (status != NFS4_OK) {
8878                         NFS4_DEBUG(rfs4_debug,
8879                             (CE_NOTE, "Get state failed in lock %d", status));
8880                         *cs->statusp = resp->status = status;
8881                         DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
8882                             cs, LOCK4res *, resp);
8883                         return;
8884                 }
8885 
8886                 /* Ensure specified filehandle matches */
8887                 if (cs->vp != sp->rs_finfo->rf_vp) {
8888                         rfs4_state_rele(sp);
8889                         *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8890                         DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
8891                             cs, LOCK4res *, resp);
8892                         return;
8893                 }
8894 
8895                 /* hold off other access to open_owner while we tinker */
8896                 rfs4_sw_enter(&sp->rs_owner->ro_sw);
8897 
8898                 switch (rc = rfs4_check_stateid_seqid(sp, stateid)) {
8899                 case NFS4_CHECK_STATEID_OLD:
8900                         *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8901                         goto end;
8902                 case NFS4_CHECK_STATEID_BAD:
8903                         *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8904                         goto end;
8905                 case NFS4_CHECK_STATEID_EXPIRED:
8906                         *cs->statusp = resp->status = NFS4ERR_EXPIRED;
8907                         goto end;
8908                 case NFS4_CHECK_STATEID_UNCONFIRMED:
8909                         *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8910                         goto end;
8911                 case NFS4_CHECK_STATEID_CLOSED:
8912                         *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8913                         goto end;
8914                 case NFS4_CHECK_STATEID_OKAY:
8915                 case NFS4_CHECK_STATEID_REPLAY:
8916                         switch (rfs4_check_olo_seqid(olo->open_seqid,
8917                             sp->rs_owner, resop)) {
8918                         case NFS4_CHKSEQ_OKAY:
8919                                 if (rc == NFS4_CHECK_STATEID_OKAY)
8920                                         break;
8921                                 /*
8922                                  * This is replayed stateid; if seqid
8923                                  * matches next expected, then client
8924                                  * is using wrong seqid.
8925                                  */
8926                                 /* FALLTHROUGH */
8927                         case NFS4_CHKSEQ_BAD:
8928                                 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8929                                 goto end;
8930                         case NFS4_CHKSEQ_REPLAY:
8931                                 /* This is a duplicate LOCK request */
8932                                 dup_lock = TRUE;
8933 
8934                                 /*
8935                                  * For a duplicate we do not want to
8936                                  * create a new lockowner as it should
8937                                  * already exist.
8938                                  * Turn off the lockowner create flag.
8939                                  */
8940                                 lcreate = FALSE;
8941                         }
8942                         break;
8943                 }
8944 
8945                 lo = rfs4_findlockowner(&olo->lock_owner, &lcreate);
8946                 if (lo == NULL) {
8947                         NFS4_DEBUG(rfs4_debug,
8948                             (CE_NOTE, "rfs4_op_lock: no lock owner"));
8949                         *cs->statusp = resp->status = NFS4ERR_RESOURCE;
8950                         goto end;
8951                 }
8952 
8953                 lsp = rfs4_findlo_state_by_owner(lo, sp, &create);
8954                 if (lsp == NULL) {
8955                         rfs4_update_lease(sp->rs_owner->ro_client);
8956                         /*
8957                          * Only update theh open_seqid if this is not
8958                          * a duplicate request
8959                          */
8960                         if (dup_lock == FALSE) {
8961                                 rfs4_update_open_sequence(sp->rs_owner);
8962                         }
8963 
8964                         NFS4_DEBUG(rfs4_debug,
8965                             (CE_NOTE, "rfs4_op_lock: no state"));
8966                         *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
8967                         rfs4_update_open_resp(sp->rs_owner, resop, NULL);
8968                         rfs4_lockowner_rele(lo);
8969                         goto end;
8970                 }
8971 
8972                 /*
8973                  * This is the new_lock_owner branch and the client is
8974                  * supposed to be associating a new lock_owner with
8975                  * the open file at this point.  If we find that a
8976                  * lock_owner/state association already exists and a
8977                  * successful LOCK request was returned to the client,
8978                  * an error is returned to the client since this is
8979                  * not appropriate.  The client should be using the
8980                  * existing lock_owner branch.
8981                  */
8982                 if (dup_lock == FALSE && create == FALSE) {
8983                         if (lsp->rls_lock_completed == TRUE) {
8984                                 *cs->statusp =
8985                                     resp->status = NFS4ERR_BAD_SEQID;
8986                                 rfs4_lockowner_rele(lo);
8987                                 goto end;
8988                         }
8989                 }
8990 
8991                 rfs4_update_lease(sp->rs_owner->ro_client);
8992 
8993                 /*
8994                  * Only update theh open_seqid if this is not
8995                  * a duplicate request
8996                  */
8997                 if (dup_lock == FALSE) {
8998                         rfs4_update_open_sequence(sp->rs_owner);
8999                 }
9000 
9001                 /*
9002                  * If this is a duplicate lock request, just copy the
9003                  * previously saved reply and return.
9004                  */
9005                 if (dup_lock == TRUE) {
9006                         /* verify that lock_seqid's match */
9007                         if (lsp->rls_seqid != olo->lock_seqid) {
9008                                 NFS4_DEBUG(rfs4_debug,
9009                                     (CE_NOTE, "rfs4_op_lock: Dup-Lock seqid bad"
9010                                     "lsp->seqid=%d old->seqid=%d",
9011                                     lsp->rls_seqid, olo->lock_seqid));
9012                                 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9013                         } else {
9014                                 rfs4_copy_reply(resop, &lsp->rls_reply);
9015                                 /*
9016                                  * Make sure to copy the just
9017                                  * retrieved reply status into the
9018                                  * overall compound status
9019                                  */
9020                                 *cs->statusp = resp->status;
9021                         }
9022                         rfs4_lockowner_rele(lo);
9023                         goto end;
9024                 }
9025 
9026                 rfs4_dbe_lock(lsp->rls_dbe);
9027 
9028                 /* Make sure to update the lock sequence id */
9029                 lsp->rls_seqid = olo->lock_seqid;
9030 
9031                 NFS4_DEBUG(rfs4_debug,
9032                     (CE_NOTE, "Lock seqid established as %d", lsp->rls_seqid));
9033 
9034                 /*
9035                  * This is used to signify the newly created lockowner
9036                  * stateid and its sequence number.  The checks for
9037                  * sequence number and increment don't occur on the
9038                  * very first lock request for a lockowner.
9039                  */
9040                 lsp->rls_skip_seqid_check = TRUE;
9041 
9042                 /* hold off other access to lsp while we tinker */
9043                 rfs4_sw_enter(&lsp->rls_sw);
9044                 ls_sw_held = TRUE;
9045 
9046                 rfs4_dbe_unlock(lsp->rls_dbe);
9047 
9048                 rfs4_lockowner_rele(lo);
9049         } else {
9050                 stateid = &args->locker.locker4_u.lock_owner.lock_stateid;
9051                 /* get lsp and hold the lock on the underlying file struct */
9052                 if ((status = rfs4_get_lo_state(stateid, &lsp, TRUE))
9053                     != NFS4_OK) {
9054                         *cs->statusp = resp->status = status;
9055                         DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
9056                             cs, LOCK4res *, resp);
9057                         return;
9058                 }
9059                 create = FALSE; /* We didn't create lsp */
9060 
9061                 /* Ensure specified filehandle matches */
9062                 if (cs->vp != lsp->rls_state->rs_finfo->rf_vp) {
9063                         rfs4_lo_state_rele(lsp, TRUE);
9064                         *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9065                         DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
9066                             cs, LOCK4res *, resp);
9067                         return;
9068                 }
9069 
9070                 /* hold off other access to lsp while we tinker */
9071                 rfs4_sw_enter(&lsp->rls_sw);
9072                 ls_sw_held = TRUE;
9073 
9074                 switch (rfs4_check_lo_stateid_seqid(lsp, stateid)) {
9075                 /*
9076                  * The stateid looks like it was okay (expected to be
9077                  * the next one)
9078                  */
9079                 case NFS4_CHECK_STATEID_OKAY:
9080                         /*
9081                          * The sequence id is now checked.  Determine
9082                          * if this is a replay or if it is in the
9083                          * expected (next) sequence.  In the case of a
9084                          * replay, there are two replay conditions
9085                          * that may occur.  The first is the normal
9086                          * condition where a LOCK is done with a
9087                          * NFS4_OK response and the stateid is
9088                          * updated.  That case is handled below when
9089                          * the stateid is identified as a REPLAY.  The
9090                          * second is the case where an error is
9091                          * returned, like NFS4ERR_DENIED, and the
9092                          * sequence number is updated but the stateid
9093                          * is not updated.  This second case is dealt
9094                          * with here.  So it may seem odd that the
9095                          * stateid is okay but the sequence id is a
9096                          * replay but it is okay.
9097                          */
9098                         switch (rfs4_check_lock_seqid(
9099                             args->locker.locker4_u.lock_owner.lock_seqid,
9100                             lsp, resop)) {
9101                         case NFS4_CHKSEQ_REPLAY:
9102                                 if (resp->status != NFS4_OK) {
9103                                         /*
9104                                          * Here is our replay and need
9105                                          * to verify that the last
9106                                          * response was an error.
9107                                          */
9108                                         *cs->statusp = resp->status;
9109                                         goto end;
9110                                 }
9111                                 /*
9112                                  * This is done since the sequence id
9113                                  * looked like a replay but it didn't
9114                                  * pass our check so a BAD_SEQID is
9115                                  * returned as a result.
9116                                  */
9117                                 /*FALLTHROUGH*/
9118                         case NFS4_CHKSEQ_BAD:
9119                                 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9120                                 goto end;
9121                         case NFS4_CHKSEQ_OKAY:
9122                                 /* Everything looks okay move ahead */
9123                                 break;
9124                         }
9125                         break;
9126                 case NFS4_CHECK_STATEID_OLD:
9127                         *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9128                         goto end;
9129                 case NFS4_CHECK_STATEID_BAD:
9130                         *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9131                         goto end;
9132                 case NFS4_CHECK_STATEID_EXPIRED:
9133                         *cs->statusp = resp->status = NFS4ERR_EXPIRED;
9134                         goto end;
9135                 case NFS4_CHECK_STATEID_CLOSED:
9136                         *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9137                         goto end;
9138                 case NFS4_CHECK_STATEID_REPLAY:
9139                         switch (rfs4_check_lock_seqid(
9140                             args->locker.locker4_u.lock_owner.lock_seqid,
9141                             lsp, resop)) {
9142                         case NFS4_CHKSEQ_OKAY:
9143                                 /*
9144                                  * This is a replayed stateid; if
9145                                  * seqid matches the next expected,
9146                                  * then client is using wrong seqid.
9147                                  */
9148                         case NFS4_CHKSEQ_BAD:
9149                                 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9150                                 goto end;
9151                         case NFS4_CHKSEQ_REPLAY:
9152                                 rfs4_update_lease(lsp->rls_locker->rl_client);
9153                                 *cs->statusp = status = resp->status;
9154                                 goto end;
9155                         }
9156                         break;
9157                 default:
9158                         ASSERT(FALSE);
9159                         break;
9160                 }
9161 
9162                 rfs4_update_lock_sequence(lsp);
9163                 rfs4_update_lease(lsp->rls_locker->rl_client);
9164         }
9165 
9166         /*
9167          * NFS4 only allows locking on regular files, so
9168          * verify type of object.
9169          */
9170         if (cs->vp->v_type != VREG) {
9171                 if (cs->vp->v_type == VDIR)
9172                         status = NFS4ERR_ISDIR;
9173                 else
9174                         status = NFS4ERR_INVAL;
9175                 goto out;
9176         }
9177 
9178         cp = lsp->rls_state->rs_owner->ro_client;
9179 
9180         if (rfs4_clnt_in_grace(cp) && !args->reclaim) {
9181                 status = NFS4ERR_GRACE;
9182                 goto out;
9183         }
9184 
9185         if (rfs4_clnt_in_grace(cp) && args->reclaim && !cp->rc_can_reclaim) {
9186                 status = NFS4ERR_NO_GRACE;
9187                 goto out;
9188         }
9189 
9190         if (!rfs4_clnt_in_grace(cp) && args->reclaim) {
9191                 status = NFS4ERR_NO_GRACE;
9192                 goto out;
9193         }
9194 
9195         if (lsp->rls_state->rs_finfo->rf_dinfo.rd_dtype == OPEN_DELEGATE_WRITE)
9196                 cs->deleg = TRUE;
9197 
9198         status = rfs4_do_lock(lsp, args->locktype,
9199             args->offset, args->length, cs->cr, resop);
9200 
9201 out:
9202         lsp->rls_skip_seqid_check = FALSE;
9203 
9204         *cs->statusp = resp->status = status;
9205 
9206         if (status == NFS4_OK) {
9207                 resp->LOCK4res_u.lock_stateid = lsp->rls_lockid.stateid;
9208                 lsp->rls_lock_completed = TRUE;
9209         }
9210         /*
9211          * Only update the "OPEN" response here if this was a new
9212          * lock_owner
9213          */
9214         if (sp)
9215                 rfs4_update_open_resp(sp->rs_owner, resop, NULL);
9216 
9217         rfs4_update_lock_resp(lsp, resop);
9218 
9219 end:
9220         if (lsp) {
9221                 if (ls_sw_held)
9222                         rfs4_sw_exit(&lsp->rls_sw);
9223                 /*
9224                  * If an sp obtained, then the lsp does not represent
9225                  * a lock on the file struct.
9226                  */
9227                 if (sp != NULL)
9228                         rfs4_lo_state_rele(lsp, FALSE);
9229                 else
9230                         rfs4_lo_state_rele(lsp, TRUE);
9231         }
9232         if (sp) {
9233                 rfs4_sw_exit(&sp->rs_owner->ro_sw);
9234                 rfs4_state_rele(sp);
9235         }
9236 
9237         DTRACE_NFSV4_2(op__lock__done, struct compound_state *, cs,
9238             LOCK4res *, resp);
9239 }
9240 
9241 /* free function for LOCK/LOCKT */
9242 static void
9243 lock_denied_free(nfs_resop4 *resop)
9244 {
9245         LOCK4denied *dp = NULL;
9246 
9247         switch (resop->resop) {
9248         case OP_LOCK:
9249                 if (resop->nfs_resop4_u.oplock.status == NFS4ERR_DENIED)
9250                         dp = &resop->nfs_resop4_u.oplock.LOCK4res_u.denied;
9251                 break;
9252         case OP_LOCKT:
9253                 if (resop->nfs_resop4_u.oplockt.status == NFS4ERR_DENIED)
9254                         dp = &resop->nfs_resop4_u.oplockt.denied;
9255                 break;
9256         default:
9257                 break;
9258         }
9259 
9260         if (dp)
9261                 kmem_free(dp->owner.owner_val, dp->owner.owner_len);
9262 }
9263 
9264 /*ARGSUSED*/
9265 void
9266 rfs4_op_locku(nfs_argop4 *argop, nfs_resop4 *resop,
9267     struct svc_req *req, struct compound_state *cs)
9268 {
9269         LOCKU4args *args = &argop->nfs_argop4_u.oplocku;
9270         LOCKU4res *resp = &resop->nfs_resop4_u.oplocku;
9271         nfsstat4 status;
9272         stateid4 *stateid = &args->lock_stateid;
9273         rfs4_lo_state_t *lsp;
9274 
9275         DTRACE_NFSV4_2(op__locku__start, struct compound_state *, cs,
9276             LOCKU4args *, args);
9277 
9278         if (cs->vp == NULL) {
9279                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
9280                 DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
9281                     LOCKU4res *, resp);
9282                 return;
9283         }
9284 
9285         if ((status = rfs4_get_lo_state(stateid, &lsp, TRUE)) != NFS4_OK) {
9286                 *cs->statusp = resp->status = status;
9287                 DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
9288                     LOCKU4res *, resp);
9289                 return;
9290         }
9291 
9292         /* Ensure specified filehandle matches */
9293         if (cs->vp != lsp->rls_state->rs_finfo->rf_vp) {
9294                 rfs4_lo_state_rele(lsp, TRUE);
9295                 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9296                 DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
9297                     LOCKU4res *, resp);
9298                 return;
9299         }
9300 
9301         /* hold off other access to lsp while we tinker */
9302         rfs4_sw_enter(&lsp->rls_sw);
9303 
9304         switch (rfs4_check_lo_stateid_seqid(lsp, stateid)) {
9305         case NFS4_CHECK_STATEID_OKAY:
9306                 if (rfs4_check_lock_seqid(args->seqid, lsp, resop)
9307                     != NFS4_CHKSEQ_OKAY) {
9308                         *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9309                         goto end;
9310                 }
9311                 break;
9312         case NFS4_CHECK_STATEID_OLD:
9313                 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9314                 goto end;
9315         case NFS4_CHECK_STATEID_BAD:
9316                 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9317                 goto end;
9318         case NFS4_CHECK_STATEID_EXPIRED:
9319                 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
9320                 goto end;
9321         case NFS4_CHECK_STATEID_CLOSED:
9322                 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9323                 goto end;
9324         case NFS4_CHECK_STATEID_REPLAY:
9325                 switch (rfs4_check_lock_seqid(args->seqid, lsp, resop)) {
9326                 case NFS4_CHKSEQ_OKAY:
9327                                 /*
9328                                  * This is a replayed stateid; if
9329                                  * seqid matches the next expected,
9330                                  * then client is using wrong seqid.
9331                                  */
9332                 case NFS4_CHKSEQ_BAD:
9333                         *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9334                         goto end;
9335                 case NFS4_CHKSEQ_REPLAY:
9336                         rfs4_update_lease(lsp->rls_locker->rl_client);
9337                         *cs->statusp = status = resp->status;
9338                         goto end;
9339                 }
9340                 break;
9341         default:
9342                 ASSERT(FALSE);
9343                 break;
9344         }
9345 
9346         rfs4_update_lock_sequence(lsp);
9347         rfs4_update_lease(lsp->rls_locker->rl_client);
9348 
9349         /*
9350          * NFS4 only allows locking on regular files, so
9351          * verify type of object.
9352          */
9353         if (cs->vp->v_type != VREG) {
9354                 if (cs->vp->v_type == VDIR)
9355                         status = NFS4ERR_ISDIR;
9356                 else
9357                         status = NFS4ERR_INVAL;
9358                 goto out;
9359         }
9360 
9361         if (rfs4_clnt_in_grace(lsp->rls_state->rs_owner->ro_client)) {
9362                 status = NFS4ERR_GRACE;
9363                 goto out;
9364         }
9365 
9366         status = rfs4_do_lock(lsp, args->locktype,
9367             args->offset, args->length, cs->cr, resop);
9368 
9369 out:
9370         *cs->statusp = resp->status = status;
9371 
9372         if (status == NFS4_OK)
9373                 resp->lock_stateid = lsp->rls_lockid.stateid;
9374 
9375         rfs4_update_lock_resp(lsp, resop);
9376 
9377 end:
9378         rfs4_sw_exit(&lsp->rls_sw);
9379         rfs4_lo_state_rele(lsp, TRUE);
9380 
9381         DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
9382             LOCKU4res *, resp);
9383 }
9384 
9385 /*
9386  * LOCKT is a best effort routine, the client can not be guaranteed that
9387  * the status return is still in effect by the time the reply is received.
9388  * They are numerous race conditions in this routine, but we are not required
9389  * and can not be accurate.
9390  */
9391 /*ARGSUSED*/
9392 void
9393 rfs4_op_lockt(nfs_argop4 *argop, nfs_resop4 *resop,
9394     struct svc_req *req, struct compound_state *cs)
9395 {
9396         LOCKT4args *args = &argop->nfs_argop4_u.oplockt;
9397         LOCKT4res *resp = &resop->nfs_resop4_u.oplockt;
9398         rfs4_lockowner_t *lo;
9399         rfs4_client_t *cp;
9400         bool_t create = FALSE;
9401         struct flock64 flk;
9402         int error;
9403         int flag = FREAD | FWRITE;
9404         int ltype;
9405         length4 posix_length;
9406         sysid_t sysid;
9407         pid_t pid;
9408 
9409         DTRACE_NFSV4_2(op__lockt__start, struct compound_state *, cs,
9410             LOCKT4args *, args);
9411 
9412         if (cs->vp == NULL) {
9413                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
9414                 goto out;
9415         }
9416 
9417         /*
9418          * NFS4 only allows locking on regular files, so
9419          * verify type of object.
9420          */
9421         if (cs->vp->v_type != VREG) {
9422                 if (cs->vp->v_type == VDIR)
9423                         *cs->statusp = resp->status = NFS4ERR_ISDIR;
9424                 else
9425                         *cs->statusp = resp->status =  NFS4ERR_INVAL;
9426                 goto out;
9427         }
9428 
9429         /*
9430          * Check out the clientid to ensure the server knows about it
9431          * so that we correctly inform the client of a server reboot.
9432          */
9433         if ((cp = rfs4_findclient_by_id(args->owner.clientid, FALSE))
9434             == NULL) {
9435                 *cs->statusp = resp->status =
9436                     rfs4_check_clientid(&args->owner.clientid, 0);
9437                 goto out;
9438         }
9439         if (rfs4_lease_expired(cp)) {
9440                 rfs4_client_close(cp);
9441                 /*
9442                  * Protocol doesn't allow returning NFS4ERR_STALE as
9443                  * other operations do on this check so STALE_CLIENTID
9444                  * is returned instead
9445                  */
9446                 *cs->statusp = resp->status = NFS4ERR_STALE_CLIENTID;
9447                 goto out;
9448         }
9449 
9450         if (rfs4_clnt_in_grace(cp) && !(cp->rc_can_reclaim)) {
9451                 *cs->statusp = resp->status = NFS4ERR_GRACE;
9452                 rfs4_client_rele(cp);
9453                 goto out;
9454         }
9455         rfs4_client_rele(cp);
9456 
9457         resp->status = NFS4_OK;
9458 
9459         switch (args->locktype) {
9460         case READ_LT:
9461         case READW_LT:
9462                 ltype = F_RDLCK;
9463                 break;
9464         case WRITE_LT:
9465         case WRITEW_LT:
9466                 ltype = F_WRLCK;
9467                 break;
9468         }
9469 
9470         posix_length = args->length;
9471         /* Check for zero length. To lock to end of file use all ones for V4 */
9472         if (posix_length == 0) {
9473                 *cs->statusp = resp->status = NFS4ERR_INVAL;
9474                 goto out;
9475         } else if (posix_length == (length4)(~0)) {
9476                 posix_length = 0;       /* Posix to end of file  */
9477         }
9478 
9479         /* Find or create a lockowner */
9480         lo = rfs4_findlockowner(&args->owner, &create);
9481 
9482         if (lo) {
9483                 pid = lo->rl_pid;
9484                 if ((resp->status =
9485                     rfs4_client_sysid(lo->rl_client, &sysid)) != NFS4_OK)
9486                         goto err;
9487         } else {
9488                 pid = 0;
9489                 sysid = lockt_sysid;
9490         }
9491 retry:
9492         flk.l_type = ltype;
9493         flk.l_whence = 0;               /* SEEK_SET */
9494         flk.l_start = args->offset;
9495         flk.l_len = posix_length;
9496         flk.l_sysid = sysid;
9497         flk.l_pid = pid;
9498         flag |= F_REMOTELOCK;
9499 
9500         LOCK_PRINT(rfs4_debug, "rfs4_op_lockt", F_GETLK, &flk);
9501 
9502         /* Note that length4 is uint64_t but l_len and l_start are off64_t */
9503         if (flk.l_len < 0 || flk.l_start < 0) {
9504                 resp->status = NFS4ERR_INVAL;
9505                 goto err;
9506         }
9507         error = VOP_FRLOCK(cs->vp, F_GETLK, &flk, flag, (u_offset_t)0,
9508             NULL, cs->cr, NULL);
9509 
9510         /*
9511          * N.B. We map error values to nfsv4 errors. This is differrent
9512          * than puterrno4 routine.
9513          */
9514         switch (error) {
9515         case 0:
9516                 if (flk.l_type == F_UNLCK)
9517                         resp->status = NFS4_OK;
9518                 else {
9519                         if (lock_denied(&resp->denied, &flk) == NFS4ERR_EXPIRED)
9520                                 goto retry;
9521                         resp->status = NFS4ERR_DENIED;
9522                 }
9523                 break;
9524         case EOVERFLOW:
9525                 resp->status = NFS4ERR_INVAL;
9526                 break;
9527         case EINVAL:
9528                 resp->status = NFS4ERR_NOTSUPP;
9529                 break;
9530         default:
9531                 cmn_err(CE_WARN, "rfs4_op_lockt: unexpected errno (%d)",
9532                     error);
9533                 resp->status = NFS4ERR_SERVERFAULT;
9534                 break;
9535         }
9536 
9537 err:
9538         if (lo)
9539                 rfs4_lockowner_rele(lo);
9540         *cs->statusp = resp->status;
9541 out:
9542         DTRACE_NFSV4_2(op__lockt__done, struct compound_state *, cs,
9543             LOCKT4res *, resp);
9544 }
9545 
9546 int
9547 rfs4_share(rfs4_state_t *sp, uint32_t access, uint32_t deny)
9548 {
9549         int err;
9550         int cmd;
9551         vnode_t *vp;
9552         struct shrlock shr;
9553         struct shr_locowner shr_loco;
9554         int fflags = 0;
9555 
9556         ASSERT(rfs4_dbe_islocked(sp->rs_dbe));
9557         ASSERT(sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID);
9558 
9559         if (sp->rs_closed)
9560                 return (NFS4ERR_OLD_STATEID);
9561 
9562         vp = sp->rs_finfo->rf_vp;
9563         ASSERT(vp);
9564 
9565         shr.s_access = shr.s_deny = 0;
9566 
9567         if (access & OPEN4_SHARE_ACCESS_READ) {
9568                 fflags |= FREAD;
9569                 shr.s_access |= F_RDACC;
9570         }
9571         if (access & OPEN4_SHARE_ACCESS_WRITE) {
9572                 fflags |= FWRITE;
9573                 shr.s_access |= F_WRACC;
9574         }
9575         ASSERT(shr.s_access);
9576 
9577         if (deny & OPEN4_SHARE_DENY_READ)
9578                 shr.s_deny |= F_RDDNY;
9579         if (deny & OPEN4_SHARE_DENY_WRITE)
9580                 shr.s_deny |= F_WRDNY;
9581 
9582         shr.s_pid = rfs4_dbe_getid(sp->rs_owner->ro_dbe);
9583         shr.s_sysid = sp->rs_owner->ro_client->rc_sysidt;
9584         shr_loco.sl_pid = shr.s_pid;
9585         shr_loco.sl_id = shr.s_sysid;
9586         shr.s_owner = (caddr_t)&shr_loco;
9587         shr.s_own_len = sizeof (shr_loco);
9588 
9589         cmd = nbl_need_check(vp) ? F_SHARE_NBMAND : F_SHARE;
9590 
9591         err = VOP_SHRLOCK(vp, cmd, &shr, fflags, CRED(), NULL);
9592         if (err != 0) {
9593                 if (err == EAGAIN)
9594                         err = NFS4ERR_SHARE_DENIED;
9595                 else
9596                         err = puterrno4(err);
9597                 return (err);
9598         }
9599 
9600         sp->rs_share_access |= access;
9601         sp->rs_share_deny |= deny;
9602 
9603         return (0);
9604 }
9605 
9606 int
9607 rfs4_unshare(rfs4_state_t *sp)
9608 {
9609         int err;
9610         struct shrlock shr;
9611         struct shr_locowner shr_loco;
9612 
9613         ASSERT(rfs4_dbe_islocked(sp->rs_dbe));
9614 
9615         if (sp->rs_closed || sp->rs_share_access == 0)
9616                 return (0);
9617 
9618         ASSERT(sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID);
9619         ASSERT(sp->rs_finfo->rf_vp);
9620 
9621         shr.s_access = shr.s_deny = 0;
9622         shr.s_pid = rfs4_dbe_getid(sp->rs_owner->ro_dbe);
9623         shr.s_sysid = sp->rs_owner->ro_client->rc_sysidt;
9624         shr_loco.sl_pid = shr.s_pid;
9625         shr_loco.sl_id = shr.s_sysid;
9626         shr.s_owner = (caddr_t)&shr_loco;
9627         shr.s_own_len = sizeof (shr_loco);
9628 
9629         err = VOP_SHRLOCK(sp->rs_finfo->rf_vp, F_UNSHARE, &shr, 0, CRED(),
9630             NULL);
9631         if (err != 0) {
9632                 err = puterrno4(err);
9633                 return (err);
9634         }
9635 
9636         sp->rs_share_access = 0;
9637         sp->rs_share_deny = 0;
9638 
9639         return (0);
9640 
9641 }
9642 
9643 static int
9644 rdma_setup_read_data4(READ4args *args, READ4res *rok)
9645 {
9646         struct clist    *wcl;
9647         count4          count = rok->data_len;
9648         int             wlist_len;
9649 
9650         wcl = args->wlist;
9651         if (rdma_setup_read_chunks(wcl, count, &wlist_len) == FALSE) {
9652                 return (FALSE);
9653         }
9654         wcl = args->wlist;
9655         rok->wlist_len = wlist_len;
9656         rok->wlist = wcl;
9657         return (TRUE);
9658 }
9659 
9660 /* tunable to disable server referrals */
9661 int rfs4_no_referrals = 0;
9662 
9663 /*
9664  * Find an NFS record in reparse point data.
9665  * Returns 0 for success and <0 or an errno value on failure.
9666  */
9667 int
9668 vn_find_nfs_record(vnode_t *vp, nvlist_t **nvlp, char **svcp, char **datap)
9669 {
9670         int err;
9671         char *stype, *val;
9672         nvlist_t *nvl;
9673         nvpair_t *curr;
9674 
9675         if ((nvl = reparse_init()) == NULL)
9676                 return (-1);
9677 
9678         if ((err = reparse_vnode_parse(vp, nvl)) != 0) {
9679                 reparse_free(nvl);
9680                 return (err);
9681         }
9682 
9683         curr = NULL;
9684         while ((curr = nvlist_next_nvpair(nvl, curr)) != NULL) {
9685                 if ((stype = nvpair_name(curr)) == NULL) {
9686                         reparse_free(nvl);
9687                         return (-2);
9688                 }
9689                 if (strncasecmp(stype, "NFS", 3) == 0)
9690                         break;
9691         }
9692 
9693         if ((curr == NULL) ||
9694             (nvpair_value_string(curr, &val))) {
9695                 reparse_free(nvl);
9696                 return (-3);
9697         }
9698         *nvlp = nvl;
9699         *svcp = stype;
9700         *datap = val;
9701         return (0);
9702 }
9703 
9704 int
9705 vn_is_nfs_reparse(vnode_t *vp, cred_t *cr)
9706 {
9707         nvlist_t *nvl;
9708         char *s, *d;
9709 
9710         if (rfs4_no_referrals != 0)
9711                 return (B_FALSE);
9712 
9713         if (vn_is_reparse(vp, cr, NULL) == B_FALSE)
9714                 return (B_FALSE);
9715 
9716         if (vn_find_nfs_record(vp, &nvl, &s, &d) != 0)
9717                 return (B_FALSE);
9718 
9719         reparse_free(nvl);
9720 
9721         return (B_TRUE);
9722 }
9723 
9724 /*
9725  * There is a user-level copy of this routine in ref_subr.c.
9726  * Changes should be kept in sync.
9727  */
9728 static int
9729 nfs4_create_components(char *path, component4 *comp4)
9730 {
9731         int slen, plen, ncomp;
9732         char *ori_path, *nxtc, buf[MAXNAMELEN];
9733 
9734         if (path == NULL)
9735                 return (0);
9736 
9737         plen = strlen(path) + 1;        /* include the terminator */
9738         ori_path = path;
9739         ncomp = 0;
9740 
9741         /* count number of components in the path */
9742         for (nxtc = path; nxtc < ori_path + plen; nxtc++) {
9743                 if (*nxtc == '/' || *nxtc == '\0' || *nxtc == '\n') {
9744                         if ((slen = nxtc - path) == 0) {
9745                                 path = nxtc + 1;
9746                                 continue;
9747                         }
9748 
9749                         if (comp4 != NULL) {
9750                                 bcopy(path, buf, slen);
9751                                 buf[slen] = '\0';
9752                                 (void) str_to_utf8(buf, &comp4[ncomp]);
9753                         }
9754 
9755                         ncomp++;        /* 1 valid component */
9756                         path = nxtc + 1;
9757                 }
9758                 if (*nxtc == '\0' || *nxtc == '\n')
9759                         break;
9760         }
9761 
9762         return (ncomp);
9763 }
9764 
9765 /*
9766  * There is a user-level copy of this routine in ref_subr.c.
9767  * Changes should be kept in sync.
9768  */
9769 static int
9770 make_pathname4(char *path, pathname4 *pathname)
9771 {
9772         int ncomp;
9773         component4 *comp4;
9774 
9775         if (pathname == NULL)
9776                 return (0);
9777 
9778         if (path == NULL) {
9779                 pathname->pathname4_val = NULL;
9780                 pathname->pathname4_len = 0;
9781                 return (0);
9782         }
9783 
9784         /* count number of components to alloc buffer */
9785         if ((ncomp = nfs4_create_components(path, NULL)) == 0) {
9786                 pathname->pathname4_val = NULL;
9787                 pathname->pathname4_len = 0;
9788                 return (0);
9789         }
9790         comp4 = kmem_zalloc(ncomp * sizeof (component4), KM_SLEEP);
9791 
9792         /* copy components into allocated buffer */
9793         ncomp = nfs4_create_components(path, comp4);
9794 
9795         pathname->pathname4_val = comp4;
9796         pathname->pathname4_len = ncomp;
9797 
9798         return (ncomp);
9799 }
9800 
9801 #define xdr_fs_locations4 xdr_fattr4_fs_locations
9802 
9803 fs_locations4 *
9804 fetch_referral(vnode_t *vp, cred_t *cr)
9805 {
9806         nvlist_t *nvl;
9807         char *stype, *sdata;
9808         fs_locations4 *result;
9809         char buf[1024];
9810         size_t bufsize;
9811         XDR xdr;
9812         int err;
9813 
9814         /*
9815          * Check attrs to ensure it's a reparse point
9816          */
9817         if (vn_is_reparse(vp, cr, NULL) == B_FALSE)
9818                 return (NULL);
9819 
9820         /*
9821          * Look for an NFS record and get the type and data
9822          */
9823         if (vn_find_nfs_record(vp, &nvl, &stype, &sdata) != 0)
9824                 return (NULL);
9825 
9826         /*
9827          * With the type and data, upcall to get the referral
9828          */
9829         bufsize = sizeof (buf);
9830         bzero(buf, sizeof (buf));
9831         err = reparse_kderef((const char *)stype, (const char *)sdata,
9832             buf, &bufsize);
9833         reparse_free(nvl);
9834 
9835         DTRACE_PROBE4(nfs4serv__func__referral__upcall,
9836             char *, stype, char *, sdata, char *, buf, int, err);
9837         if (err) {
9838                 cmn_err(CE_NOTE,
9839                     "reparsed daemon not running: unable to get referral (%d)",
9840                     err);
9841                 return (NULL);
9842         }
9843 
9844         /*
9845          * We get an XDR'ed record back from the kderef call
9846          */
9847         xdrmem_create(&xdr, buf, bufsize, XDR_DECODE);
9848         result = kmem_alloc(sizeof (fs_locations4), KM_SLEEP);
9849         err = xdr_fs_locations4(&xdr, result);
9850         XDR_DESTROY(&xdr);
9851         if (err != TRUE) {
9852                 DTRACE_PROBE1(nfs4serv__func__referral__upcall__xdrfail,
9853                     int, err);
9854                 return (NULL);
9855         }
9856 
9857         /*
9858          * Look at path to recover fs_root, ignoring the leading '/'
9859          */
9860         (void) make_pathname4(vp->v_path, &result->fs_root);
9861 
9862         return (result);
9863 }
9864 
9865 char *
9866 build_symlink(vnode_t *vp, cred_t *cr, size_t *strsz)
9867 {
9868         fs_locations4 *fsl;
9869         fs_location4 *fs;
9870         char *server, *path, *symbuf;
9871         static char *prefix = "/net/";
9872         int i, size, npaths;
9873         uint_t len;
9874 
9875         /* Get the referral */
9876         if ((fsl = fetch_referral(vp, cr)) == NULL)
9877                 return (NULL);
9878 
9879         /* Deal with only the first location and first server */
9880         fs = &fsl->locations_val[0];
9881         server = utf8_to_str(&fs->server_val[0], &len, NULL);
9882         if (server == NULL) {
9883                 rfs4_free_fs_locations4(fsl);
9884                 kmem_free(fsl, sizeof (fs_locations4));
9885                 return (NULL);
9886         }
9887 
9888         /* Figure out size for "/net/" + host + /path/path/path + NULL */
9889         size = strlen(prefix) + len;
9890         for (i = 0; i < fs->rootpath.pathname4_len; i++)
9891                 size += fs->rootpath.pathname4_val[i].utf8string_len + 1;
9892 
9893         /* Allocate the symlink buffer and fill it */
9894         symbuf = kmem_zalloc(size, KM_SLEEP);
9895         (void) strcat(symbuf, prefix);
9896         (void) strcat(symbuf, server);
9897         kmem_free(server, len);
9898 
9899         npaths = 0;
9900         for (i = 0; i < fs->rootpath.pathname4_len; i++) {
9901                 path = utf8_to_str(&fs->rootpath.pathname4_val[i], &len, NULL);
9902                 if (path == NULL)
9903                         continue;
9904                 (void) strcat(symbuf, "/");
9905                 (void) strcat(symbuf, path);
9906                 npaths++;
9907                 kmem_free(path, len);
9908         }
9909 
9910         rfs4_free_fs_locations4(fsl);
9911         kmem_free(fsl, sizeof (fs_locations4));
9912 
9913         if (strsz != NULL)
9914                 *strsz = size;
9915         return (symbuf);
9916 }
9917 
9918 /*
9919  * Check to see if we have a downrev Solaris client, so that we
9920  * can send it a symlink instead of a referral.
9921  */
9922 int
9923 client_is_downrev(struct svc_req *req)
9924 {
9925         struct sockaddr *ca;
9926         rfs4_clntip_t *ci;
9927         bool_t create = FALSE;
9928         int is_downrev;
9929 
9930         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
9931         ASSERT(ca);
9932         ci = rfs4_find_clntip(ca, &create);
9933         if (ci == NULL)
9934                 return (0);
9935         is_downrev = ci->ri_no_referrals;
9936         rfs4_dbe_rele(ci->ri_dbe);
9937         return (is_downrev);
9938 }
9939 
9940 /*
9941  * Do the main work of handling HA-NFSv4 Resource Group failover on
9942  * Sun Cluster.
9943  * We need to detect whether any RG admin paths have been added or removed,
9944  * and adjust resources accordingly.
9945  * Currently we're using a very inefficient algorithm, ~ 2 * O(n**2). In
9946  * order to scale, the list and array of paths need to be held in more
9947  * suitable data structures.
9948  */
9949 static void
9950 hanfsv4_failover(nfs4_srv_t *nsrv4)
9951 {
9952         int i, start_grace, numadded_paths = 0;
9953         char **added_paths = NULL;
9954         rfs4_dss_path_t *dss_path;
9955 
9956         /*
9957          * Note: currently, dss_pathlist cannot be NULL, since
9958          * it will always include an entry for NFS4_DSS_VAR_DIR. If we
9959          * make the latter dynamically specified too, the following will
9960          * need to be adjusted.
9961          */
9962 
9963         /*
9964          * First, look for removed paths: RGs that have been failed-over
9965          * away from this node.
9966          * Walk the "currently-serving" dss_pathlist and, for each
9967          * path, check if it is on the "passed-in" rfs4_dss_newpaths array
9968          * from nfsd. If not, that RG path has been removed.
9969          *
9970          * Note that nfsd has sorted rfs4_dss_newpaths for us, and removed
9971          * any duplicates.
9972          */
9973         dss_path = nsrv4->dss_pathlist;
9974         do {
9975                 int found = 0;
9976                 char *path = dss_path->path;
9977 
9978                 /* used only for non-HA so may not be removed */
9979                 if (strcmp(path, NFS4_DSS_VAR_DIR) == 0) {
9980                         dss_path = dss_path->next;
9981                         continue;
9982                 }
9983 
9984                 for (i = 0; i < rfs4_dss_numnewpaths; i++) {
9985                         int cmpret;
9986                         char *newpath = rfs4_dss_newpaths[i];
9987 
9988                         /*
9989                          * Since nfsd has sorted rfs4_dss_newpaths for us,
9990                          * once the return from strcmp is negative we know
9991                          * we've passed the point where "path" should be,
9992                          * and can stop searching: "path" has been removed.
9993                          */
9994                         cmpret = strcmp(path, newpath);
9995                         if (cmpret < 0)
9996                                 break;
9997                         if (cmpret == 0) {
9998                                 found = 1;
9999                                 break;
10000                         }
10001                 }
10002 
10003                 if (found == 0) {
10004                         unsigned index = dss_path->index;
10005                         rfs4_servinst_t *sip = dss_path->sip;
10006                         rfs4_dss_path_t *path_next = dss_path->next;
10007 
10008                         /*
10009                          * This path has been removed.
10010                          * We must clear out the servinst reference to
10011                          * it, since it's now owned by another
10012                          * node: we should not attempt to touch it.
10013                          */
10014                         ASSERT(dss_path == sip->dss_paths[index]);
10015                         sip->dss_paths[index] = NULL;
10016 
10017                         /* remove from "currently-serving" list, and destroy */
10018                         remque(dss_path);
10019                         /* allow for NUL */
10020                         kmem_free(dss_path->path, strlen(dss_path->path) + 1);
10021                         kmem_free(dss_path, sizeof (rfs4_dss_path_t));
10022 
10023                         dss_path = path_next;
10024                 } else {
10025                         /* path was found; not removed */
10026                         dss_path = dss_path->next;
10027                 }
10028         } while (dss_path != nsrv4->dss_pathlist);
10029 
10030         /*
10031          * Now, look for added paths: RGs that have been failed-over
10032          * to this node.
10033          * Walk the "passed-in" rfs4_dss_newpaths array from nfsd and,
10034          * for each path, check if it is on the "currently-serving"
10035          * dss_pathlist. If not, that RG path has been added.
10036          *
10037          * Note: we don't do duplicate detection here; nfsd does that for us.
10038          *
10039          * Note: numadded_paths <= rfs4_dss_numnewpaths, which gives us
10040          * an upper bound for the size needed for added_paths[numadded_paths].
10041          */
10042 
10043         /* probably more space than we need, but guaranteed to be enough */
10044         if (rfs4_dss_numnewpaths > 0) {
10045                 size_t sz = rfs4_dss_numnewpaths * sizeof (char *);
10046                 added_paths = kmem_zalloc(sz, KM_SLEEP);
10047         }
10048 
10049         /* walk the "passed-in" rfs4_dss_newpaths array from nfsd */
10050         for (i = 0; i < rfs4_dss_numnewpaths; i++) {
10051                 int found = 0;
10052                 char *newpath = rfs4_dss_newpaths[i];
10053 
10054                 dss_path = nsrv4->dss_pathlist;
10055                 do {
10056                         char *path = dss_path->path;
10057 
10058                         /* used only for non-HA */
10059                         if (strcmp(path, NFS4_DSS_VAR_DIR) == 0) {
10060                                 dss_path = dss_path->next;
10061                                 continue;
10062                         }
10063 
10064                         if (strncmp(path, newpath, strlen(path)) == 0) {
10065                                 found = 1;
10066                                 break;
10067                         }
10068 
10069                         dss_path = dss_path->next;
10070                 } while (dss_path != nsrv4->dss_pathlist);
10071 
10072                 if (found == 0) {
10073                         added_paths[numadded_paths] = newpath;
10074                         numadded_paths++;
10075                 }
10076         }
10077 
10078         /* did we find any added paths? */
10079         if (numadded_paths > 0) {
10080 
10081                 /* create a new server instance, and start its grace period */
10082                 start_grace = 1;
10083                 /* CSTYLED */
10084                 rfs4_servinst_create(nsrv4, start_grace, numadded_paths, added_paths);
10085 
10086                 /* read in the stable storage state from these paths */
10087                 rfs4_dss_readstate(nsrv4, numadded_paths, added_paths);
10088 
10089                 /*
10090                  * Multiple failovers during a grace period will cause
10091                  * clients of the same resource group to be partitioned
10092                  * into different server instances, with different
10093                  * grace periods.  Since clients of the same resource
10094                  * group must be subject to the same grace period,
10095                  * we need to reset all currently active grace periods.
10096                  */
10097                 rfs4_grace_reset_all(nsrv4);
10098         }
10099 
10100         if (rfs4_dss_numnewpaths > 0)
10101                 kmem_free(added_paths, rfs4_dss_numnewpaths * sizeof (char *));
10102 }