1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
  24  */
  25 
  26 /*
  27  *      Copyright (c) 1983,1984,1985,1986,1987,1988,1989  AT&T.
  28  *      All Rights Reserved
  29  */
  30 
  31 /*
  32  * Copyright (c) 2012, 2016 by Delphix. All rights reserved.
  33  * Copyright 2019 Nexenta Systems, Inc.
  34  * Copyright 2019 Nexenta by DDN, Inc.
  35  */
  36 
  37 #include <sys/param.h>
  38 #include <sys/types.h>
  39 #include <sys/systm.h>
  40 #include <sys/cred.h>
  41 #include <sys/buf.h>
  42 #include <sys/vfs.h>
  43 #include <sys/vfs_opreg.h>
  44 #include <sys/vnode.h>
  45 #include <sys/uio.h>
  46 #include <sys/errno.h>
  47 #include <sys/sysmacros.h>
  48 #include <sys/statvfs.h>
  49 #include <sys/kmem.h>
  50 #include <sys/dirent.h>
  51 #include <sys/cmn_err.h>
  52 #include <sys/debug.h>
  53 #include <sys/systeminfo.h>
  54 #include <sys/flock.h>
  55 #include <sys/pathname.h>
  56 #include <sys/nbmlock.h>
  57 #include <sys/share.h>
  58 #include <sys/atomic.h>
  59 #include <sys/policy.h>
  60 #include <sys/fem.h>
  61 #include <sys/sdt.h>
  62 #include <sys/ddi.h>
  63 #include <sys/zone.h>
  64 
  65 #include <fs/fs_reparse.h>
  66 
  67 #include <rpc/types.h>
  68 #include <rpc/auth.h>
  69 #include <rpc/rpcsec_gss.h>
  70 #include <rpc/svc.h>
  71 
  72 #include <nfs/nfs.h>
  73 #include <nfs/nfssys.h>
  74 #include <nfs/export.h>
  75 #include <nfs/nfs_cmd.h>
  76 #include <nfs/lm.h>
  77 #include <nfs/nfs4.h>
  78 #include <nfs/nfs4_drc.h>
  79 
  80 #include <sys/strsubr.h>
  81 #include <sys/strsun.h>
  82 
  83 #include <inet/common.h>
  84 #include <inet/ip.h>
  85 #include <inet/ip6.h>
  86 
  87 #include <sys/tsol/label.h>
  88 #include <sys/tsol/tndb.h>
  89 
  90 #define RFS4_MAXLOCK_TRIES 4    /* Try to get the lock this many times */
  91 static int rfs4_maxlock_tries = RFS4_MAXLOCK_TRIES;
  92 #define RFS4_LOCK_DELAY 10      /* Milliseconds */
  93 static clock_t  rfs4_lock_delay = RFS4_LOCK_DELAY;
  94 extern struct svc_ops rdma_svc_ops;
  95 extern int nfs_loaned_buffers;
  96 /* End of Tunables */
  97 
  98 static int rdma_setup_read_data4(READ4args *, READ4res *);
  99 
 100 /*
 101  * Used to bump the stateid4.seqid value and show changes in the stateid
 102  */
 103 #define next_stateid(sp) (++(sp)->bits.chgseq)
 104 
 105 /*
 106  * RFS4_MINLEN_ENTRY4: XDR-encoded size of smallest possible dirent.
 107  *      This is used to return NFS4ERR_TOOSMALL when clients specify
 108  *      maxcount that isn't large enough to hold the smallest possible
 109  *      XDR encoded dirent.
 110  *
 111  *          sizeof cookie (8 bytes) +
 112  *          sizeof name_len (4 bytes) +
 113  *          sizeof smallest (padded) name (4 bytes) +
 114  *          sizeof bitmap4_len (12 bytes) +   NOTE: we always encode len=2 bm4
 115  *          sizeof attrlist4_len (4 bytes) +
 116  *          sizeof next boolean (4 bytes)
 117  *
 118  * RFS4_MINLEN_RDDIR4: XDR-encoded size of READDIR op reply containing
 119  * the smallest possible entry4 (assumes no attrs requested).
 120  *      sizeof nfsstat4 (4 bytes) +
 121  *      sizeof verifier4 (8 bytes) +
 122  *      sizeof entry4list bool (4 bytes) +
 123  *      sizeof entry4   (36 bytes) +
 124  *      sizeof eof bool  (4 bytes)
 125  *
 126  * RFS4_MINLEN_RDDIR_BUF: minimum length of buffer server will provide to
 127  *      VOP_READDIR.  Its value is the size of the maximum possible dirent
 128  *      for solaris.  The DIRENT64_RECLEN macro returns the size of dirent
 129  *      required for a given name length.  MAXNAMELEN is the maximum
 130  *      filename length allowed in Solaris.  The first two DIRENT64_RECLEN()
 131  *      macros are to allow for . and .. entries -- just a minor tweak to try
 132  *      and guarantee that buffer we give to VOP_READDIR will be large enough
 133  *      to hold ., .., and the largest possible solaris dirent64.
 134  */
 135 #define RFS4_MINLEN_ENTRY4 36
 136 #define RFS4_MINLEN_RDDIR4 (4 + NFS4_VERIFIER_SIZE + 4 + RFS4_MINLEN_ENTRY4 + 4)
 137 #define RFS4_MINLEN_RDDIR_BUF \
 138         (DIRENT64_RECLEN(1) + DIRENT64_RECLEN(2) + DIRENT64_RECLEN(MAXNAMELEN))
 139 
 140 /*
 141  * It would be better to pad to 4 bytes since that's what XDR would do,
 142  * but the dirents UFS gives us are already padded to 8, so just take
 143  * what we're given.  Dircount is only a hint anyway.  Currently the
 144  * solaris kernel is ASCII only, so there's no point in calling the
 145  * UTF8 functions.
 146  *
 147  * dirent64: named padded to provide 8 byte struct alignment
 148  *      d_ino(8) + d_off(8) + d_reclen(2) + d_name(namelen + null(1) + pad)
 149  *
 150  * cookie: uint64_t   +  utf8namelen: uint_t  +   utf8name padded to 8 bytes
 151  *
 152  */
 153 #define DIRENT64_TO_DIRCOUNT(dp) \
 154         (3 * BYTES_PER_XDR_UNIT + DIRENT64_NAMELEN((dp)->d_reclen))
 155 
 156 
 157 static sysid_t          lockt_sysid;    /* dummy sysid for all LOCKT calls */
 158 
 159 u_longlong_t    nfs4_srv_caller_id;
 160 uint_t          nfs4_srv_vkey = 0;
 161 
 162 void    rfs4_init_compound_state(struct compound_state *);
 163 
 164 static void     nullfree(caddr_t);
 165 static void     rfs4_op_inval(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 166                     struct compound_state *);
 167 static void     rfs4_op_access(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 168                     struct compound_state *);
 169 static void     rfs4_op_close(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 170                     struct compound_state *);
 171 static void     rfs4_op_commit(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 172                     struct compound_state *);
 173 static void     rfs4_op_create(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 174                     struct compound_state *);
 175 static void     rfs4_op_create_free(nfs_resop4 *resop);
 176 static void     rfs4_op_delegreturn(nfs_argop4 *, nfs_resop4 *,
 177                     struct svc_req *, struct compound_state *);
 178 static void     rfs4_op_delegpurge(nfs_argop4 *, nfs_resop4 *,
 179                     struct svc_req *, struct compound_state *);
 180 static void     rfs4_op_getattr(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 181                     struct compound_state *);
 182 static void     rfs4_op_getattr_free(nfs_resop4 *);
 183 static void     rfs4_op_getfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 184                     struct compound_state *);
 185 static void     rfs4_op_getfh_free(nfs_resop4 *);
 186 static void     rfs4_op_illegal(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 187                     struct compound_state *);
 188 static void     rfs4_op_link(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 189                     struct compound_state *);
 190 static void     rfs4_op_lock(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 191                     struct compound_state *);
 192 static void     lock_denied_free(nfs_resop4 *);
 193 static void     rfs4_op_locku(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 194                     struct compound_state *);
 195 static void     rfs4_op_lockt(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 196                     struct compound_state *);
 197 static void     rfs4_op_lookup(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 198                     struct compound_state *);
 199 static void     rfs4_op_lookupp(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 200                     struct compound_state *);
 201 static void     rfs4_op_openattr(nfs_argop4 *argop, nfs_resop4 *resop,
 202                     struct svc_req *req, struct compound_state *cs);
 203 static void     rfs4_op_nverify(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 204                     struct compound_state *);
 205 static void     rfs4_op_open(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 206                     struct compound_state *);
 207 static void     rfs4_op_open_confirm(nfs_argop4 *, nfs_resop4 *,
 208                     struct svc_req *, struct compound_state *);
 209 static void     rfs4_op_open_downgrade(nfs_argop4 *, nfs_resop4 *,
 210                     struct svc_req *, struct compound_state *);
 211 static void     rfs4_op_putfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 212                     struct compound_state *);
 213 static void     rfs4_op_putpubfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 214                     struct compound_state *);
 215 static void     rfs4_op_putrootfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 216                     struct compound_state *);
 217 static void     rfs4_op_read(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 218                     struct compound_state *);
 219 static void     rfs4_op_read_free(nfs_resop4 *);
 220 static void     rfs4_op_readdir_free(nfs_resop4 *resop);
 221 static void     rfs4_op_readlink(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 222                     struct compound_state *);
 223 static void     rfs4_op_readlink_free(nfs_resop4 *);
 224 static void     rfs4_op_release_lockowner(nfs_argop4 *, nfs_resop4 *,
 225                     struct svc_req *, struct compound_state *);
 226 static void     rfs4_op_remove(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 227                     struct compound_state *);
 228 static void     rfs4_op_rename(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 229                     struct compound_state *);
 230 static void     rfs4_op_renew(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 231                     struct compound_state *);
 232 static void     rfs4_op_restorefh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 233                     struct compound_state *);
 234 static void     rfs4_op_savefh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 235                     struct compound_state *);
 236 static void     rfs4_op_setattr(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 237                     struct compound_state *);
 238 static void     rfs4_op_verify(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 239                     struct compound_state *);
 240 static void     rfs4_op_write(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 241                     struct compound_state *);
 242 static void     rfs4_op_setclientid(nfs_argop4 *, nfs_resop4 *,
 243                     struct svc_req *, struct compound_state *);
 244 static void     rfs4_op_setclientid_confirm(nfs_argop4 *, nfs_resop4 *,
 245                     struct svc_req *req, struct compound_state *);
 246 static void     rfs4_op_secinfo(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 247                     struct compound_state *);
 248 static void     rfs4_op_secinfo_free(nfs_resop4 *);
 249 
 250 static nfsstat4 check_open_access(uint32_t, struct compound_state *,
 251                     struct svc_req *);
 252 nfsstat4        rfs4_client_sysid(rfs4_client_t *, sysid_t *);
 253 void            rfs4_ss_clid(nfs4_srv_t *, rfs4_client_t *);
 254 
 255 
 256 /*
 257  * translation table for attrs
 258  */
 259 struct nfs4_ntov_table {
 260         union nfs4_attr_u *na;
 261         uint8_t amap[NFS4_MAXNUM_ATTRS];
 262         int attrcnt;
 263         bool_t vfsstat;
 264 };
 265 
 266 static void     nfs4_ntov_table_init(struct nfs4_ntov_table *ntovp);
 267 static void     nfs4_ntov_table_free(struct nfs4_ntov_table *ntovp,
 268                     struct nfs4_svgetit_arg *sargp);
 269 
 270 static nfsstat4 do_rfs4_set_attrs(bitmap4 *resp, fattr4 *fattrp,
 271                     struct compound_state *cs, struct nfs4_svgetit_arg *sargp,
 272                     struct nfs4_ntov_table *ntovp, nfs4_attr_cmd_t cmd);
 273 
 274 static void     hanfsv4_failover(nfs4_srv_t *);
 275 
 276 fem_t           *deleg_rdops;
 277 fem_t           *deleg_wrops;
 278 
 279 /*
 280  * NFS4 op dispatch table
 281  */
 282 
 283 struct rfsv4disp {
 284         void    (*dis_proc)();          /* proc to call */
 285         void    (*dis_resfree)();       /* frees space allocated by proc */
 286         int     dis_flags;              /* RPC_IDEMPOTENT, etc... */
 287 };
 288 
 289 static struct rfsv4disp rfsv4disptab[] = {
 290         /*
 291          * NFS VERSION 4
 292          */
 293 
 294         /* RFS_NULL = 0 */
 295         {rfs4_op_illegal, nullfree, 0},
 296 
 297         /* UNUSED = 1 */
 298         {rfs4_op_illegal, nullfree, 0},
 299 
 300         /* UNUSED = 2 */
 301         {rfs4_op_illegal, nullfree, 0},
 302 
 303         /* OP_ACCESS = 3 */
 304         {rfs4_op_access, nullfree, RPC_IDEMPOTENT},
 305 
 306         /* OP_CLOSE = 4 */
 307         {rfs4_op_close, nullfree, 0},
 308 
 309         /* OP_COMMIT = 5 */
 310         {rfs4_op_commit, nullfree, RPC_IDEMPOTENT},
 311 
 312         /* OP_CREATE = 6 */
 313         {rfs4_op_create, nullfree, 0},
 314 
 315         /* OP_DELEGPURGE = 7 */
 316         {rfs4_op_delegpurge, nullfree, 0},
 317 
 318         /* OP_DELEGRETURN = 8 */
 319         {rfs4_op_delegreturn, nullfree, 0},
 320 
 321         /* OP_GETATTR = 9 */
 322         {rfs4_op_getattr, rfs4_op_getattr_free, RPC_IDEMPOTENT},
 323 
 324         /* OP_GETFH = 10 */
 325         {rfs4_op_getfh, rfs4_op_getfh_free, RPC_ALL},
 326 
 327         /* OP_LINK = 11 */
 328         {rfs4_op_link, nullfree, 0},
 329 
 330         /* OP_LOCK = 12 */
 331         {rfs4_op_lock, lock_denied_free, 0},
 332 
 333         /* OP_LOCKT = 13 */
 334         {rfs4_op_lockt, lock_denied_free, 0},
 335 
 336         /* OP_LOCKU = 14 */
 337         {rfs4_op_locku, nullfree, 0},
 338 
 339         /* OP_LOOKUP = 15 */
 340         {rfs4_op_lookup, nullfree, (RPC_IDEMPOTENT | RPC_PUBLICFH_OK)},
 341 
 342         /* OP_LOOKUPP = 16 */
 343         {rfs4_op_lookupp, nullfree, (RPC_IDEMPOTENT | RPC_PUBLICFH_OK)},
 344 
 345         /* OP_NVERIFY = 17 */
 346         {rfs4_op_nverify, nullfree, RPC_IDEMPOTENT},
 347 
 348         /* OP_OPEN = 18 */
 349         {rfs4_op_open, rfs4_free_reply, 0},
 350 
 351         /* OP_OPENATTR = 19 */
 352         {rfs4_op_openattr, nullfree, 0},
 353 
 354         /* OP_OPEN_CONFIRM = 20 */
 355         {rfs4_op_open_confirm, nullfree, 0},
 356 
 357         /* OP_OPEN_DOWNGRADE = 21 */
 358         {rfs4_op_open_downgrade, nullfree, 0},
 359 
 360         /* OP_OPEN_PUTFH = 22 */
 361         {rfs4_op_putfh, nullfree, RPC_ALL},
 362 
 363         /* OP_PUTPUBFH = 23 */
 364         {rfs4_op_putpubfh, nullfree, RPC_ALL},
 365 
 366         /* OP_PUTROOTFH = 24 */
 367         {rfs4_op_putrootfh, nullfree, RPC_ALL},
 368 
 369         /* OP_READ = 25 */
 370         {rfs4_op_read, rfs4_op_read_free, RPC_IDEMPOTENT},
 371 
 372         /* OP_READDIR = 26 */
 373         {rfs4_op_readdir, rfs4_op_readdir_free, RPC_IDEMPOTENT},
 374 
 375         /* OP_READLINK = 27 */
 376         {rfs4_op_readlink, rfs4_op_readlink_free, RPC_IDEMPOTENT},
 377 
 378         /* OP_REMOVE = 28 */
 379         {rfs4_op_remove, nullfree, 0},
 380 
 381         /* OP_RENAME = 29 */
 382         {rfs4_op_rename, nullfree, 0},
 383 
 384         /* OP_RENEW = 30 */
 385         {rfs4_op_renew, nullfree, 0},
 386 
 387         /* OP_RESTOREFH = 31 */
 388         {rfs4_op_restorefh, nullfree, RPC_ALL},
 389 
 390         /* OP_SAVEFH = 32 */
 391         {rfs4_op_savefh, nullfree, RPC_ALL},
 392 
 393         /* OP_SECINFO = 33 */
 394         {rfs4_op_secinfo, rfs4_op_secinfo_free, 0},
 395 
 396         /* OP_SETATTR = 34 */
 397         {rfs4_op_setattr, nullfree, 0},
 398 
 399         /* OP_SETCLIENTID = 35 */
 400         {rfs4_op_setclientid, nullfree, 0},
 401 
 402         /* OP_SETCLIENTID_CONFIRM = 36 */
 403         {rfs4_op_setclientid_confirm, nullfree, 0},
 404 
 405         /* OP_VERIFY = 37 */
 406         {rfs4_op_verify, nullfree, RPC_IDEMPOTENT},
 407 
 408         /* OP_WRITE = 38 */
 409         {rfs4_op_write, nullfree, 0},
 410 
 411         /* OP_RELEASE_LOCKOWNER = 39 */
 412         {rfs4_op_release_lockowner, nullfree, 0},
 413 };
 414 
 415 static uint_t rfsv4disp_cnt = sizeof (rfsv4disptab) / sizeof (rfsv4disptab[0]);
 416 
 417 #define OP_ILLEGAL_IDX (rfsv4disp_cnt)
 418 
 419 #ifdef DEBUG
 420 
 421 int             rfs4_fillone_debug = 0;
 422 int             rfs4_no_stub_access = 1;
 423 int             rfs4_rddir_debug = 0;
 424 
 425 static char    *rfs4_op_string[] = {
 426         "rfs4_op_null",
 427         "rfs4_op_1 unused",
 428         "rfs4_op_2 unused",
 429         "rfs4_op_access",
 430         "rfs4_op_close",
 431         "rfs4_op_commit",
 432         "rfs4_op_create",
 433         "rfs4_op_delegpurge",
 434         "rfs4_op_delegreturn",
 435         "rfs4_op_getattr",
 436         "rfs4_op_getfh",
 437         "rfs4_op_link",
 438         "rfs4_op_lock",
 439         "rfs4_op_lockt",
 440         "rfs4_op_locku",
 441         "rfs4_op_lookup",
 442         "rfs4_op_lookupp",
 443         "rfs4_op_nverify",
 444         "rfs4_op_open",
 445         "rfs4_op_openattr",
 446         "rfs4_op_open_confirm",
 447         "rfs4_op_open_downgrade",
 448         "rfs4_op_putfh",
 449         "rfs4_op_putpubfh",
 450         "rfs4_op_putrootfh",
 451         "rfs4_op_read",
 452         "rfs4_op_readdir",
 453         "rfs4_op_readlink",
 454         "rfs4_op_remove",
 455         "rfs4_op_rename",
 456         "rfs4_op_renew",
 457         "rfs4_op_restorefh",
 458         "rfs4_op_savefh",
 459         "rfs4_op_secinfo",
 460         "rfs4_op_setattr",
 461         "rfs4_op_setclientid",
 462         "rfs4_op_setclient_confirm",
 463         "rfs4_op_verify",
 464         "rfs4_op_write",
 465         "rfs4_op_release_lockowner",
 466         "rfs4_op_illegal"
 467 };
 468 #endif
 469 
 470 void    rfs4_ss_chkclid(nfs4_srv_t *, rfs4_client_t *);
 471 
 472 extern size_t   strlcpy(char *dst, const char *src, size_t dstsize);
 473 
 474 extern void     rfs4_free_fs_locations4(fs_locations4 *);
 475 
 476 #ifdef  nextdp
 477 #undef nextdp
 478 #endif
 479 #define nextdp(dp)      ((struct dirent64 *)((char *)(dp) + (dp)->d_reclen))
 480 
 481 static const fs_operation_def_t nfs4_rd_deleg_tmpl[] = {
 482         VOPNAME_OPEN,           { .femop_open = deleg_rd_open },
 483         VOPNAME_WRITE,          { .femop_write = deleg_rd_write },
 484         VOPNAME_SETATTR,        { .femop_setattr = deleg_rd_setattr },
 485         VOPNAME_RWLOCK,         { .femop_rwlock = deleg_rd_rwlock },
 486         VOPNAME_SPACE,          { .femop_space = deleg_rd_space },
 487         VOPNAME_SETSECATTR,     { .femop_setsecattr = deleg_rd_setsecattr },
 488         VOPNAME_VNEVENT,        { .femop_vnevent = deleg_rd_vnevent },
 489         NULL,                   NULL
 490 };
 491 static const fs_operation_def_t nfs4_wr_deleg_tmpl[] = {
 492         VOPNAME_OPEN,           { .femop_open = deleg_wr_open },
 493         VOPNAME_READ,           { .femop_read = deleg_wr_read },
 494         VOPNAME_WRITE,          { .femop_write = deleg_wr_write },
 495         VOPNAME_SETATTR,        { .femop_setattr = deleg_wr_setattr },
 496         VOPNAME_RWLOCK,         { .femop_rwlock = deleg_wr_rwlock },
 497         VOPNAME_SPACE,          { .femop_space = deleg_wr_space },
 498         VOPNAME_SETSECATTR,     { .femop_setsecattr = deleg_wr_setsecattr },
 499         VOPNAME_VNEVENT,        { .femop_vnevent = deleg_wr_vnevent },
 500         NULL,                   NULL
 501 };
 502 
 503 nfs4_srv_t *
 504 nfs4_get_srv(void)
 505 {
 506         nfs_globals_t *ng = zone_getspecific(nfssrv_zone_key, curzone);
 507         nfs4_srv_t *srv = ng->nfs4_srv;
 508         ASSERT(srv != NULL);
 509         return (srv);
 510 }
 511 
 512 void
 513 rfs4_srv_zone_init(nfs_globals_t *ng)
 514 {
 515         nfs4_srv_t *nsrv4;
 516         timespec32_t verf;
 517 
 518         nsrv4 = kmem_zalloc(sizeof (*nsrv4), KM_SLEEP);
 519 
 520         /*
 521          * The following algorithm attempts to find a unique verifier
 522          * to be used as the write verifier returned from the server
 523          * to the client.  It is important that this verifier change
 524          * whenever the server reboots.  Of secondary importance, it
 525          * is important for the verifier to be unique between two
 526          * different servers.
 527          *
 528          * Thus, an attempt is made to use the system hostid and the
 529          * current time in seconds when the nfssrv kernel module is
 530          * loaded.  It is assumed that an NFS server will not be able
 531          * to boot and then to reboot in less than a second.  If the
 532          * hostid has not been set, then the current high resolution
 533          * time is used.  This will ensure different verifiers each
 534          * time the server reboots and minimize the chances that two
 535          * different servers will have the same verifier.
 536          * XXX - this is broken on LP64 kernels.
 537          */
 538         verf.tv_sec = (time_t)zone_get_hostid(NULL);
 539         if (verf.tv_sec != 0) {
 540                 verf.tv_nsec = gethrestime_sec();
 541         } else {
 542                 timespec_t tverf;
 543 
 544                 gethrestime(&tverf);
 545                 verf.tv_sec = (time_t)tverf.tv_sec;
 546                 verf.tv_nsec = tverf.tv_nsec;
 547         }
 548         nsrv4->write4verf = *(uint64_t *)&verf;
 549 
 550         /* Used to manage create/destroy of server state */
 551         nsrv4->nfs4_server_state = NULL;
 552         nsrv4->nfs4_cur_servinst = NULL;
 553         nsrv4->nfs4_deleg_policy = SRV_NEVER_DELEGATE;
 554         mutex_init(&nsrv4->deleg_lock, NULL, MUTEX_DEFAULT, NULL);
 555         mutex_init(&nsrv4->state_lock, NULL, MUTEX_DEFAULT, NULL);
 556         mutex_init(&nsrv4->servinst_lock, NULL, MUTEX_DEFAULT, NULL);
 557         rw_init(&nsrv4->deleg_policy_lock, NULL, RW_DEFAULT, NULL);
 558 
 559         ng->nfs4_srv = nsrv4;
 560 }
 561 
 562 void
 563 rfs4_srv_zone_fini(nfs_globals_t *ng)
 564 {
 565         nfs4_srv_t *nsrv4 = ng->nfs4_srv;
 566 
 567         ng->nfs4_srv = NULL;
 568 
 569         mutex_destroy(&nsrv4->deleg_lock);
 570         mutex_destroy(&nsrv4->state_lock);
 571         mutex_destroy(&nsrv4->servinst_lock);
 572         rw_destroy(&nsrv4->deleg_policy_lock);
 573 
 574         kmem_free(nsrv4, sizeof (*nsrv4));
 575 }
 576 
 577 void
 578 rfs4_srvrinit(void)
 579 {
 580         extern void rfs4_attr_init();
 581 
 582         rfs4_attr_init();
 583 
 584         if (fem_create("deleg_rdops", nfs4_rd_deleg_tmpl, &deleg_rdops) != 0) {
 585                 rfs4_disable_delegation();
 586         } else if (fem_create("deleg_wrops", nfs4_wr_deleg_tmpl,
 587             &deleg_wrops) != 0) {
 588                 rfs4_disable_delegation();
 589                 fem_free(deleg_rdops);
 590         }
 591 
 592         nfs4_srv_caller_id = fs_new_caller_id();
 593         lockt_sysid = lm_alloc_sysidt();
 594         vsd_create(&nfs4_srv_vkey, NULL);
 595         rfs4_state_g_init();
 596 }
 597 
 598 void
 599 rfs4_srvrfini(void)
 600 {
 601         if (lockt_sysid != LM_NOSYSID) {
 602                 lm_free_sysidt(lockt_sysid);
 603                 lockt_sysid = LM_NOSYSID;
 604         }
 605 
 606         rfs4_state_g_fini();
 607 
 608         fem_free(deleg_rdops);
 609         fem_free(deleg_wrops);
 610 }
 611 
 612 void
 613 rfs4_do_server_start(int server_upordown,
 614     int srv_delegation, int cluster_booted)
 615 {
 616         nfs4_srv_t *nsrv4 = nfs4_get_srv();
 617 
 618         /* Is this a warm start? */
 619         if (server_upordown == NFS_SERVER_QUIESCED) {
 620                 cmn_err(CE_NOTE, "nfs4_srv: "
 621                     "server was previously quiesced; "
 622                     "existing NFSv4 state will be re-used");
 623 
 624                 /*
 625                  * HA-NFSv4: this is also the signal
 626                  * that a Resource Group failover has
 627                  * occurred.
 628                  */
 629                 if (cluster_booted)
 630                         hanfsv4_failover(nsrv4);
 631         } else {
 632                 /* Cold start */
 633                 nsrv4->rfs4_start_time = 0;
 634                 rfs4_state_zone_init(nsrv4);
 635                 nsrv4->nfs4_drc = rfs4_init_drc(nfs4_drc_max,
 636                     nfs4_drc_hash);
 637 
 638                 /*
 639                  * The nfsd service was started with the -s option
 640                  * we need to pull in any state from the paths indicated.
 641                  */
 642                 if (curzone == global_zone && rfs4_dss_numnewpaths > 0) {
 643                         /* read in the stable storage state from these paths */
 644                         rfs4_dss_readstate(nsrv4, rfs4_dss_numnewpaths,
 645                             rfs4_dss_newpaths);
 646                 }
 647         }
 648 
 649         /* Check if delegation is to be enabled */
 650         if (srv_delegation != FALSE)
 651                 rfs4_set_deleg_policy(nsrv4, SRV_NORMAL_DELEGATE);
 652 }
 653 
 654 void
 655 rfs4_init_compound_state(struct compound_state *cs)
 656 {
 657         bzero(cs, sizeof (*cs));
 658         cs->cont = TRUE;
 659         cs->access = CS_ACCESS_DENIED;
 660         cs->deleg = FALSE;
 661         cs->mandlock = FALSE;
 662         cs->fh.nfs_fh4_val = cs->fhbuf;
 663 }
 664 
 665 void
 666 rfs4_grace_start(rfs4_servinst_t *sip)
 667 {
 668         rw_enter(&sip->rwlock, RW_WRITER);
 669         sip->start_time = (time_t)TICK_TO_SEC(ddi_get_lbolt());
 670         sip->grace_period = rfs4_grace_period;
 671         rw_exit(&sip->rwlock);
 672 }
 673 
 674 /*
 675  * returns true if the instance's grace period has never been started
 676  */
 677 int
 678 rfs4_servinst_grace_new(rfs4_servinst_t *sip)
 679 {
 680         time_t start_time;
 681 
 682         rw_enter(&sip->rwlock, RW_READER);
 683         start_time = sip->start_time;
 684         rw_exit(&sip->rwlock);
 685 
 686         return (start_time == 0);
 687 }
 688 
 689 /*
 690  * Indicates if server instance is within the
 691  * grace period.
 692  */
 693 int
 694 rfs4_servinst_in_grace(rfs4_servinst_t *sip)
 695 {
 696         time_t grace_expiry;
 697 
 698         rw_enter(&sip->rwlock, RW_READER);
 699         grace_expiry = sip->start_time + sip->grace_period;
 700         rw_exit(&sip->rwlock);
 701 
 702         return (((time_t)TICK_TO_SEC(ddi_get_lbolt())) < grace_expiry);
 703 }
 704 
 705 int
 706 rfs4_clnt_in_grace(rfs4_client_t *cp)
 707 {
 708         ASSERT(rfs4_dbe_refcnt(cp->rc_dbe) > 0);
 709 
 710         return (rfs4_servinst_in_grace(cp->rc_server_instance));
 711 }
 712 
 713 /*
 714  * reset all currently active grace periods
 715  */
 716 void
 717 rfs4_grace_reset_all(nfs4_srv_t *nsrv4)
 718 {
 719         rfs4_servinst_t *sip;
 720 
 721         mutex_enter(&nsrv4->servinst_lock);
 722         for (sip = nsrv4->nfs4_cur_servinst; sip != NULL; sip = sip->prev)
 723                 if (rfs4_servinst_in_grace(sip))
 724                         rfs4_grace_start(sip);
 725         mutex_exit(&nsrv4->servinst_lock);
 726 }
 727 
 728 /*
 729  * start any new instances' grace periods
 730  */
 731 void
 732 rfs4_grace_start_new(nfs4_srv_t *nsrv4)
 733 {
 734         rfs4_servinst_t *sip;
 735 
 736         mutex_enter(&nsrv4->servinst_lock);
 737         for (sip = nsrv4->nfs4_cur_servinst; sip != NULL; sip = sip->prev)
 738                 if (rfs4_servinst_grace_new(sip))
 739                         rfs4_grace_start(sip);
 740         mutex_exit(&nsrv4->servinst_lock);
 741 }
 742 
 743 static rfs4_dss_path_t *
 744 rfs4_dss_newpath(nfs4_srv_t *nsrv4, rfs4_servinst_t *sip,
 745     char *path, unsigned index)
 746 {
 747         size_t len;
 748         rfs4_dss_path_t *dss_path;
 749 
 750         dss_path = kmem_alloc(sizeof (rfs4_dss_path_t), KM_SLEEP);
 751 
 752         /*
 753          * Take a copy of the string, since the original may be overwritten.
 754          * Sadly, no strdup() in the kernel.
 755          */
 756         /* allow for NUL */
 757         len = strlen(path) + 1;
 758         dss_path->path = kmem_alloc(len, KM_SLEEP);
 759         (void) strlcpy(dss_path->path, path, len);
 760 
 761         /* associate with servinst */
 762         dss_path->sip = sip;
 763         dss_path->index = index;
 764 
 765         /*
 766          * Add to list of served paths.
 767          * No locking required, as we're only ever called at startup.
 768          */
 769         if (nsrv4->dss_pathlist == NULL) {
 770                 /* this is the first dss_path_t */
 771 
 772                 /* needed for insque/remque */
 773                 dss_path->next = dss_path->prev = dss_path;
 774 
 775                 nsrv4->dss_pathlist = dss_path;
 776         } else {
 777                 insque(dss_path, nsrv4->dss_pathlist);
 778         }
 779 
 780         return (dss_path);
 781 }
 782 
 783 /*
 784  * Create a new server instance, and make it the currently active instance.
 785  * Note that starting the grace period too early will reduce the clients'
 786  * recovery window.
 787  */
 788 void
 789 rfs4_servinst_create(nfs4_srv_t *nsrv4, int start_grace,
 790     int dss_npaths, char **dss_paths)
 791 {
 792         unsigned i;
 793         rfs4_servinst_t *sip;
 794         rfs4_oldstate_t *oldstate;
 795 
 796         sip = kmem_alloc(sizeof (rfs4_servinst_t), KM_SLEEP);
 797         rw_init(&sip->rwlock, NULL, RW_DEFAULT, NULL);
 798 
 799         sip->start_time = (time_t)0;
 800         sip->grace_period = (time_t)0;
 801         sip->next = NULL;
 802         sip->prev = NULL;
 803 
 804         rw_init(&sip->oldstate_lock, NULL, RW_DEFAULT, NULL);
 805         /*
 806          * This initial dummy entry is required to setup for insque/remque.
 807          * It must be skipped over whenever the list is traversed.
 808          */
 809         oldstate = kmem_alloc(sizeof (rfs4_oldstate_t), KM_SLEEP);
 810         /* insque/remque require initial list entry to be self-terminated */
 811         oldstate->next = oldstate;
 812         oldstate->prev = oldstate;
 813         sip->oldstate = oldstate;
 814 
 815 
 816         sip->dss_npaths = dss_npaths;
 817         sip->dss_paths = kmem_alloc(dss_npaths *
 818             sizeof (rfs4_dss_path_t *), KM_SLEEP);
 819 
 820         for (i = 0; i < dss_npaths; i++) {
 821                 sip->dss_paths[i] =
 822                     rfs4_dss_newpath(nsrv4, sip, dss_paths[i], i);
 823         }
 824 
 825         mutex_enter(&nsrv4->servinst_lock);
 826         if (nsrv4->nfs4_cur_servinst != NULL) {
 827                 /* add to linked list */
 828                 sip->prev = nsrv4->nfs4_cur_servinst;
 829                 nsrv4->nfs4_cur_servinst->next = sip;
 830         }
 831         if (start_grace)
 832                 rfs4_grace_start(sip);
 833         /* make the new instance "current" */
 834         nsrv4->nfs4_cur_servinst = sip;
 835 
 836         mutex_exit(&nsrv4->servinst_lock);
 837 }
 838 
 839 /*
 840  * In future, we might add a rfs4_servinst_destroy(sip) but, for now, destroy
 841  * all instances directly.
 842  */
 843 void
 844 rfs4_servinst_destroy_all(nfs4_srv_t *nsrv4)
 845 {
 846         rfs4_servinst_t *sip, *prev, *current;
 847 #ifdef DEBUG
 848         int n = 0;
 849 #endif
 850 
 851         mutex_enter(&nsrv4->servinst_lock);
 852         ASSERT(nsrv4->nfs4_cur_servinst != NULL);
 853         current = nsrv4->nfs4_cur_servinst;
 854         nsrv4->nfs4_cur_servinst = NULL;
 855         for (sip = current; sip != NULL; sip = prev) {
 856                 prev = sip->prev;
 857                 rw_destroy(&sip->rwlock);
 858                 if (sip->oldstate)
 859                         kmem_free(sip->oldstate, sizeof (rfs4_oldstate_t));
 860                 if (sip->dss_paths) {
 861                         int i = sip->dss_npaths;
 862 
 863                         while (i > 0) {
 864                                 i--;
 865                                 if (sip->dss_paths[i] != NULL) {
 866                                         char *path = sip->dss_paths[i]->path;
 867 
 868                                         if (path != NULL) {
 869                                                 kmem_free(path,
 870                                                     strlen(path) + 1);
 871                                         }
 872                                         kmem_free(sip->dss_paths[i],
 873                                             sizeof (rfs4_dss_path_t));
 874                                 }
 875                         }
 876                         kmem_free(sip->dss_paths,
 877                             sip->dss_npaths * sizeof (rfs4_dss_path_t *));
 878                 }
 879                 kmem_free(sip, sizeof (rfs4_servinst_t));
 880 #ifdef DEBUG
 881                 n++;
 882 #endif
 883         }
 884         mutex_exit(&nsrv4->servinst_lock);
 885 }
 886 
 887 /*
 888  * Assign the current server instance to a client_t.
 889  * Should be called with cp->rc_dbe held.
 890  */
 891 void
 892 rfs4_servinst_assign(nfs4_srv_t *nsrv4, rfs4_client_t *cp,
 893     rfs4_servinst_t *sip)
 894 {
 895         ASSERT(rfs4_dbe_refcnt(cp->rc_dbe) > 0);
 896 
 897         /*
 898          * The lock ensures that if the current instance is in the process
 899          * of changing, we will see the new one.
 900          */
 901         mutex_enter(&nsrv4->servinst_lock);
 902         cp->rc_server_instance = sip;
 903         mutex_exit(&nsrv4->servinst_lock);
 904 }
 905 
 906 rfs4_servinst_t *
 907 rfs4_servinst(rfs4_client_t *cp)
 908 {
 909         ASSERT(rfs4_dbe_refcnt(cp->rc_dbe) > 0);
 910 
 911         return (cp->rc_server_instance);
 912 }
 913 
 914 /* ARGSUSED */
 915 static void
 916 nullfree(caddr_t resop)
 917 {
 918 }
 919 
 920 /*
 921  * This is a fall-through for invalid or not implemented (yet) ops
 922  */
 923 /* ARGSUSED */
 924 static void
 925 rfs4_op_inval(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
 926     struct compound_state *cs)
 927 {
 928         *cs->statusp = *((nfsstat4 *)&(resop)->nfs_resop4_u) = NFS4ERR_INVAL;
 929 }
 930 
 931 /*
 932  * Check if the security flavor, nfsnum, is in the flavor_list.
 933  */
 934 bool_t
 935 in_flavor_list(int nfsnum, int *flavor_list, int count)
 936 {
 937         int i;
 938 
 939         for (i = 0; i < count; i++) {
 940                 if (nfsnum == flavor_list[i])
 941                         return (TRUE);
 942         }
 943         return (FALSE);
 944 }
 945 
 946 /*
 947  * Used by rfs4_op_secinfo to get the security information from the
 948  * export structure associated with the component.
 949  */
 950 /* ARGSUSED */
 951 static nfsstat4
 952 do_rfs4_op_secinfo(struct compound_state *cs, char *nm, SECINFO4res *resp)
 953 {
 954         int error, different_export = 0;
 955         vnode_t *dvp, *vp;
 956         struct exportinfo *exi = NULL;
 957         fid_t fid;
 958         uint_t count, i;
 959         secinfo4 *resok_val;
 960         struct secinfo *secp;
 961         seconfig_t *si;
 962         bool_t did_traverse = FALSE;
 963         int dotdot, walk;
 964         nfs_export_t *ne = nfs_get_export();
 965 
 966         dvp = cs->vp;
 967         dotdot = (nm[0] == '.' && nm[1] == '.' && nm[2] == '\0');
 968 
 969         /*
 970          * If dotdotting, then need to check whether it's above the
 971          * root of a filesystem, or above an export point.
 972          */
 973         if (dotdot) {
 974 
 975                 /*
 976                  * If dotdotting at the root of a filesystem, then
 977                  * need to traverse back to the mounted-on filesystem
 978                  * and do the dotdot lookup there.
 979                  */
 980                 if ((cs->vp->v_flag & VROOT) || VN_IS_CURZONEROOT(cs->vp)) {
 981 
 982                         /*
 983                          * If at the system root, then can
 984                          * go up no further.
 985                          */
 986                         if (VN_CMP(dvp, ZONE_ROOTVP()))
 987                                 return (puterrno4(ENOENT));
 988 
 989                         /*
 990                          * Traverse back to the mounted-on filesystem
 991                          */
 992                         dvp = untraverse(cs->vp);
 993 
 994                         /*
 995                          * Set the different_export flag so we remember
 996                          * to pick up a new exportinfo entry for
 997                          * this new filesystem.
 998                          */
 999                         different_export = 1;
1000                 } else {
1001 
1002                         /*
1003                          * If dotdotting above an export point then set
1004                          * the different_export to get new export info.
1005                          */
1006                         different_export = nfs_exported(cs->exi, cs->vp);
1007                 }
1008         }
1009 
1010         /*
1011          * Get the vnode for the component "nm".
1012          */
1013         error = VOP_LOOKUP(dvp, nm, &vp, NULL, 0, NULL, cs->cr,
1014             NULL, NULL, NULL);
1015         if (error)
1016                 return (puterrno4(error));
1017 
1018         /*
1019          * If the vnode is in a pseudo filesystem, or if the security flavor
1020          * used in the request is valid but not an explicitly shared flavor,
1021          * or the access bit indicates that this is a limited access,
1022          * check whether this vnode is visible.
1023          */
1024         if (!different_export &&
1025             (PSEUDO(cs->exi) || ! is_exported_sec(cs->nfsflavor, cs->exi) ||
1026             cs->access & CS_ACCESS_LIMITED)) {
1027                 if (! nfs_visible(cs->exi, vp, &different_export)) {
1028                         VN_RELE(vp);
1029                         return (puterrno4(ENOENT));
1030                 }
1031         }
1032 
1033         /*
1034          * If it's a mountpoint, then traverse it.
1035          */
1036         if (vn_ismntpt(vp)) {
1037                 if ((error = traverse(&vp)) != 0) {
1038                         VN_RELE(vp);
1039                         return (puterrno4(error));
1040                 }
1041                 /* remember that we had to traverse mountpoint */
1042                 did_traverse = TRUE;
1043                 different_export = 1;
1044         } else if (vp->v_vfsp != dvp->v_vfsp) {
1045                 /*
1046                  * If vp isn't a mountpoint and the vfs ptrs aren't the same,
1047                  * then vp is probably an LOFS object.  We don't need the
1048                  * realvp, we just need to know that we might have crossed
1049                  * a server fs boundary and need to call checkexport4.
1050                  * (LOFS lookup hides server fs mountpoints, and actually calls
1051                  * traverse)
1052                  */
1053                 different_export = 1;
1054         }
1055 
1056         /*
1057          * Get the export information for it.
1058          */
1059         if (different_export) {
1060 
1061                 bzero(&fid, sizeof (fid));
1062                 fid.fid_len = MAXFIDSZ;
1063                 error = vop_fid_pseudo(vp, &fid);
1064                 if (error) {
1065                         VN_RELE(vp);
1066                         return (puterrno4(error));
1067                 }
1068 
1069                 if (dotdot)
1070                         exi = nfs_vptoexi(NULL, vp, cs->cr, &walk, NULL, TRUE);
1071                 else
1072                         exi = checkexport4(&vp->v_vfsp->vfs_fsid, &fid, vp);
1073 
1074                 if (exi == NULL) {
1075                         if (did_traverse == TRUE) {
1076                                 /*
1077                                  * If this vnode is a mounted-on vnode,
1078                                  * but the mounted-on file system is not
1079                                  * exported, send back the secinfo for
1080                                  * the exported node that the mounted-on
1081                                  * vnode lives in.
1082                                  */
1083                                 exi = cs->exi;
1084                         } else {
1085                                 VN_RELE(vp);
1086                                 return (puterrno4(EACCES));
1087                         }
1088                 }
1089         } else {
1090                 exi = cs->exi;
1091         }
1092         ASSERT(exi != NULL);
1093 
1094 
1095         /*
1096          * Create the secinfo result based on the security information
1097          * from the exportinfo structure (exi).
1098          *
1099          * Return all flavors for a pseudo node.
1100          * For a real export node, return the flavor that the client
1101          * has access with.
1102          */
1103         ASSERT(RW_LOCK_HELD(&ne->exported_lock));
1104         if (PSEUDO(exi)) {
1105                 count = exi->exi_export.ex_seccnt; /* total sec count */
1106                 resok_val = kmem_alloc(count * sizeof (secinfo4), KM_SLEEP);
1107                 secp = exi->exi_export.ex_secinfo;
1108 
1109                 for (i = 0; i < count; i++) {
1110                         si = &secp[i].s_secinfo;
1111                         resok_val[i].flavor = si->sc_rpcnum;
1112                         if (resok_val[i].flavor == RPCSEC_GSS) {
1113                                 rpcsec_gss_info *info;
1114 
1115                                 info = &resok_val[i].flavor_info;
1116                                 info->qop = si->sc_qop;
1117                                 info->service = (rpc_gss_svc_t)si->sc_service;
1118 
1119                                 /* get oid opaque data */
1120                                 info->oid.sec_oid4_len =
1121                                     si->sc_gss_mech_type->length;
1122                                 info->oid.sec_oid4_val = kmem_alloc(
1123                                     si->sc_gss_mech_type->length, KM_SLEEP);
1124                                 bcopy(
1125                                     si->sc_gss_mech_type->elements,
1126                                     info->oid.sec_oid4_val,
1127                                     info->oid.sec_oid4_len);
1128                         }
1129                 }
1130                 resp->SECINFO4resok_len = count;
1131                 resp->SECINFO4resok_val = resok_val;
1132         } else {
1133                 int ret_cnt = 0, k = 0;
1134                 int *flavor_list;
1135 
1136                 count = exi->exi_export.ex_seccnt; /* total sec count */
1137                 secp = exi->exi_export.ex_secinfo;
1138 
1139                 flavor_list = kmem_alloc(count * sizeof (int), KM_SLEEP);
1140                 /* find out which flavors to return */
1141                 for (i = 0; i < count; i ++) {
1142                         int access, flavor, perm;
1143 
1144                         flavor = secp[i].s_secinfo.sc_nfsnum;
1145                         perm = secp[i].s_flags;
1146 
1147                         access = nfsauth4_secinfo_access(exi, cs->req,
1148                             flavor, perm, cs->basecr);
1149 
1150                         if (! (access & NFSAUTH_DENIED) &&
1151                             ! (access & NFSAUTH_WRONGSEC)) {
1152                                 flavor_list[ret_cnt] = flavor;
1153                                 ret_cnt++;
1154                         }
1155                 }
1156 
1157                 /* Create the returning SECINFO value */
1158                 resok_val = kmem_alloc(ret_cnt * sizeof (secinfo4), KM_SLEEP);
1159 
1160                 for (i = 0; i < count; i++) {
1161                         /*
1162                          * If the flavor is in the flavor list,
1163                          * fill in resok_val.
1164                          */
1165                         si = &secp[i].s_secinfo;
1166                         if (in_flavor_list(si->sc_nfsnum,
1167                             flavor_list, ret_cnt)) {
1168                                 resok_val[k].flavor = si->sc_rpcnum;
1169                                 if (resok_val[k].flavor == RPCSEC_GSS) {
1170                                         rpcsec_gss_info *info;
1171 
1172                                         info = &resok_val[k].flavor_info;
1173                                         info->qop = si->sc_qop;
1174                                         info->service = (rpc_gss_svc_t)
1175                                             si->sc_service;
1176 
1177                                         /* get oid opaque data */
1178                                         info->oid.sec_oid4_len =
1179                                             si->sc_gss_mech_type->length;
1180                                         info->oid.sec_oid4_val = kmem_alloc(
1181                                             si->sc_gss_mech_type->length,
1182                                             KM_SLEEP);
1183                                         bcopy(si->sc_gss_mech_type->elements,
1184                                             info->oid.sec_oid4_val,
1185                                             info->oid.sec_oid4_len);
1186                                 }
1187                                 k++;
1188                         }
1189                         if (k >= ret_cnt)
1190                                 break;
1191                 }
1192                 resp->SECINFO4resok_len = ret_cnt;
1193                 resp->SECINFO4resok_val = resok_val;
1194                 kmem_free(flavor_list, count * sizeof (int));
1195         }
1196 
1197         VN_RELE(vp);
1198         return (NFS4_OK);
1199 }
1200 
1201 /*
1202  * SECINFO (Operation 33): Obtain required security information on
1203  * the component name in the format of (security-mechanism-oid, qop, service)
1204  * triplets.
1205  */
1206 /* ARGSUSED */
1207 static void
1208 rfs4_op_secinfo(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1209     struct compound_state *cs)
1210 {
1211         SECINFO4args *args = &argop->nfs_argop4_u.opsecinfo;
1212         SECINFO4res *resp = &resop->nfs_resop4_u.opsecinfo;
1213         utf8string *utfnm = &args->name;
1214         uint_t len;
1215         char *nm;
1216         struct sockaddr *ca;
1217         char *name = NULL;
1218         nfsstat4 status = NFS4_OK;
1219 
1220         DTRACE_NFSV4_2(op__secinfo__start, struct compound_state *, cs,
1221             SECINFO4args *, args);
1222 
1223         /*
1224          * Current file handle (cfh) should have been set before getting
1225          * into this function. If not, return error.
1226          */
1227         if (cs->vp == NULL) {
1228                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1229                 goto out;
1230         }
1231 
1232         if (cs->vp->v_type != VDIR) {
1233                 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
1234                 goto out;
1235         }
1236 
1237         /*
1238          * Verify the component name. If failed, error out, but
1239          * do not error out if the component name is a "..".
1240          * SECINFO will return its parents secinfo data for SECINFO "..".
1241          */
1242         status = utf8_dir_verify(utfnm);
1243         if (status != NFS4_OK) {
1244                 if (utfnm->utf8string_len != 2 ||
1245                     utfnm->utf8string_val[0] != '.' ||
1246                     utfnm->utf8string_val[1] != '.') {
1247                         *cs->statusp = resp->status = status;
1248                         goto out;
1249                 }
1250         }
1251 
1252         nm = utf8_to_str(utfnm, &len, NULL);
1253         if (nm == NULL) {
1254                 *cs->statusp = resp->status = NFS4ERR_INVAL;
1255                 goto out;
1256         }
1257 
1258         if (len > MAXNAMELEN) {
1259                 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
1260                 kmem_free(nm, len);
1261                 goto out;
1262         }
1263 
1264         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
1265         name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
1266             MAXPATHLEN  + 1);
1267 
1268         if (name == NULL) {
1269                 *cs->statusp = resp->status = NFS4ERR_INVAL;
1270                 kmem_free(nm, len);
1271                 goto out;
1272         }
1273 
1274 
1275         *cs->statusp = resp->status = do_rfs4_op_secinfo(cs, name, resp);
1276 
1277         if (name != nm)
1278                 kmem_free(name, MAXPATHLEN + 1);
1279         kmem_free(nm, len);
1280 
1281 out:
1282         DTRACE_NFSV4_2(op__secinfo__done, struct compound_state *, cs,
1283             SECINFO4res *, resp);
1284 }
1285 
1286 /*
1287  * Free SECINFO result.
1288  */
1289 /* ARGSUSED */
1290 static void
1291 rfs4_op_secinfo_free(nfs_resop4 *resop)
1292 {
1293         SECINFO4res *resp = &resop->nfs_resop4_u.opsecinfo;
1294         int count, i;
1295         secinfo4 *resok_val;
1296 
1297         /* If this is not an Ok result, nothing to free. */
1298         if (resp->status != NFS4_OK) {
1299                 return;
1300         }
1301 
1302         count = resp->SECINFO4resok_len;
1303         resok_val = resp->SECINFO4resok_val;
1304 
1305         for (i = 0; i < count; i++) {
1306                 if (resok_val[i].flavor == RPCSEC_GSS) {
1307                         rpcsec_gss_info *info;
1308 
1309                         info = &resok_val[i].flavor_info;
1310                         kmem_free(info->oid.sec_oid4_val,
1311                             info->oid.sec_oid4_len);
1312                 }
1313         }
1314         kmem_free(resok_val, count * sizeof (secinfo4));
1315         resp->SECINFO4resok_len = 0;
1316         resp->SECINFO4resok_val = NULL;
1317 }
1318 
1319 /* ARGSUSED */
1320 static void
1321 rfs4_op_access(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1322     struct compound_state *cs)
1323 {
1324         ACCESS4args *args = &argop->nfs_argop4_u.opaccess;
1325         ACCESS4res *resp = &resop->nfs_resop4_u.opaccess;
1326         int error;
1327         vnode_t *vp;
1328         struct vattr va;
1329         int checkwriteperm;
1330         cred_t *cr = cs->cr;
1331         bslabel_t *clabel, *slabel;
1332         ts_label_t *tslabel;
1333         boolean_t admin_low_client;
1334 
1335         DTRACE_NFSV4_2(op__access__start, struct compound_state *, cs,
1336             ACCESS4args *, args);
1337 
1338 #if 0   /* XXX allow access even if !cs->access. Eventually only pseudo fs */
1339         if (cs->access == CS_ACCESS_DENIED) {
1340                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
1341                 goto out;
1342         }
1343 #endif
1344         if (cs->vp == NULL) {
1345                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1346                 goto out;
1347         }
1348 
1349         ASSERT(cr != NULL);
1350 
1351         vp = cs->vp;
1352 
1353         /*
1354          * If the file system is exported read only, it is not appropriate
1355          * to check write permissions for regular files and directories.
1356          * Special files are interpreted by the client, so the underlying
1357          * permissions are sent back to the client for interpretation.
1358          */
1359         if (rdonly4(req, cs) &&
1360             (vp->v_type == VREG || vp->v_type == VDIR))
1361                 checkwriteperm = 0;
1362         else
1363                 checkwriteperm = 1;
1364 
1365         /*
1366          * XXX
1367          * We need the mode so that we can correctly determine access
1368          * permissions relative to a mandatory lock file.  Access to
1369          * mandatory lock files is denied on the server, so it might
1370          * as well be reflected to the server during the open.
1371          */
1372         va.va_mask = AT_MODE;
1373         error = VOP_GETATTR(vp, &va, 0, cr, NULL);
1374         if (error) {
1375                 *cs->statusp = resp->status = puterrno4(error);
1376                 goto out;
1377         }
1378         resp->access = 0;
1379         resp->supported = 0;
1380 
1381         if (is_system_labeled()) {
1382                 ASSERT(req->rq_label != NULL);
1383                 clabel = req->rq_label;
1384                 DTRACE_PROBE2(tx__rfs4__log__info__opaccess__clabel, char *,
1385                     "got client label from request(1)",
1386                     struct svc_req *, req);
1387                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
1388                         if ((tslabel = nfs_getflabel(vp, cs->exi)) == NULL) {
1389                                 *cs->statusp = resp->status = puterrno4(EACCES);
1390                                 goto out;
1391                         }
1392                         slabel = label2bslabel(tslabel);
1393                         DTRACE_PROBE3(tx__rfs4__log__info__opaccess__slabel,
1394                             char *, "got server label(1) for vp(2)",
1395                             bslabel_t *, slabel, vnode_t *, vp);
1396 
1397                         admin_low_client = B_FALSE;
1398                 } else
1399                         admin_low_client = B_TRUE;
1400         }
1401 
1402         if (args->access & ACCESS4_READ) {
1403                 error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
1404                 if (!error && !MANDLOCK(vp, va.va_mode) &&
1405                     (!is_system_labeled() || admin_low_client ||
1406                     bldominates(clabel, slabel)))
1407                         resp->access |= ACCESS4_READ;
1408                 resp->supported |= ACCESS4_READ;
1409         }
1410         if ((args->access & ACCESS4_LOOKUP) && vp->v_type == VDIR) {
1411                 error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL);
1412                 if (!error && (!is_system_labeled() || admin_low_client ||
1413                     bldominates(clabel, slabel)))
1414                         resp->access |= ACCESS4_LOOKUP;
1415                 resp->supported |= ACCESS4_LOOKUP;
1416         }
1417         if (checkwriteperm &&
1418             (args->access & (ACCESS4_MODIFY|ACCESS4_EXTEND))) {
1419                 error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
1420                 if (!error && !MANDLOCK(vp, va.va_mode) &&
1421                     (!is_system_labeled() || admin_low_client ||
1422                     blequal(clabel, slabel)))
1423                         resp->access |=
1424                             (args->access & (ACCESS4_MODIFY | ACCESS4_EXTEND));
1425                 resp->supported |=
1426                     resp->access & (ACCESS4_MODIFY | ACCESS4_EXTEND);
1427         }
1428 
1429         if (checkwriteperm &&
1430             (args->access & ACCESS4_DELETE) && vp->v_type == VDIR) {
1431                 error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
1432                 if (!error && (!is_system_labeled() || admin_low_client ||
1433                     blequal(clabel, slabel)))
1434                         resp->access |= ACCESS4_DELETE;
1435                 resp->supported |= ACCESS4_DELETE;
1436         }
1437         if (args->access & ACCESS4_EXECUTE && vp->v_type != VDIR) {
1438                 error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL);
1439                 if (!error && !MANDLOCK(vp, va.va_mode) &&
1440                     (!is_system_labeled() || admin_low_client ||
1441                     bldominates(clabel, slabel)))
1442                         resp->access |= ACCESS4_EXECUTE;
1443                 resp->supported |= ACCESS4_EXECUTE;
1444         }
1445 
1446         if (is_system_labeled() && !admin_low_client)
1447                 label_rele(tslabel);
1448 
1449         *cs->statusp = resp->status = NFS4_OK;
1450 out:
1451         DTRACE_NFSV4_2(op__access__done, struct compound_state *, cs,
1452             ACCESS4res *, resp);
1453 }
1454 
1455 /* ARGSUSED */
1456 static void
1457 rfs4_op_commit(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1458     struct compound_state *cs)
1459 {
1460         COMMIT4args *args = &argop->nfs_argop4_u.opcommit;
1461         COMMIT4res *resp = &resop->nfs_resop4_u.opcommit;
1462         int error;
1463         vnode_t *vp = cs->vp;
1464         cred_t *cr = cs->cr;
1465         vattr_t va;
1466         nfs4_srv_t *nsrv4;
1467 
1468         DTRACE_NFSV4_2(op__commit__start, struct compound_state *, cs,
1469             COMMIT4args *, args);
1470 
1471         if (vp == NULL) {
1472                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1473                 goto out;
1474         }
1475         if (cs->access == CS_ACCESS_DENIED) {
1476                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
1477                 goto out;
1478         }
1479 
1480         if (args->offset + args->count < args->offset) {
1481                 *cs->statusp = resp->status = NFS4ERR_INVAL;
1482                 goto out;
1483         }
1484 
1485         va.va_mask = AT_UID;
1486         error = VOP_GETATTR(vp, &va, 0, cr, NULL);
1487 
1488         /*
1489          * If we can't get the attributes, then we can't do the
1490          * right access checking.  So, we'll fail the request.
1491          */
1492         if (error) {
1493                 *cs->statusp = resp->status = puterrno4(error);
1494                 goto out;
1495         }
1496         if (rdonly4(req, cs)) {
1497                 *cs->statusp = resp->status = NFS4ERR_ROFS;
1498                 goto out;
1499         }
1500 
1501         if (vp->v_type != VREG) {
1502                 if (vp->v_type == VDIR)
1503                         resp->status = NFS4ERR_ISDIR;
1504                 else
1505                         resp->status = NFS4ERR_INVAL;
1506                 *cs->statusp = resp->status;
1507                 goto out;
1508         }
1509 
1510         if (crgetuid(cr) != va.va_uid &&
1511             (error = VOP_ACCESS(vp, VWRITE, 0, cs->cr, NULL))) {
1512                 *cs->statusp = resp->status = puterrno4(error);
1513                 goto out;
1514         }
1515 
1516         error = VOP_FSYNC(vp, FSYNC, cr, NULL);
1517 
1518         if (error) {
1519                 *cs->statusp = resp->status = puterrno4(error);
1520                 goto out;
1521         }
1522 
1523         nsrv4 = nfs4_get_srv();
1524         *cs->statusp = resp->status = NFS4_OK;
1525         resp->writeverf = nsrv4->write4verf;
1526 out:
1527         DTRACE_NFSV4_2(op__commit__done, struct compound_state *, cs,
1528             COMMIT4res *, resp);
1529 }
1530 
1531 /*
1532  * rfs4_op_mknod is called from rfs4_op_create after all initial verification
1533  * was completed. It does the nfsv4 create for special files.
1534  */
1535 /* ARGSUSED */
1536 static vnode_t *
1537 do_rfs4_op_mknod(CREATE4args *args, CREATE4res *resp, struct svc_req *req,
1538     struct compound_state *cs, vattr_t *vap, char *nm)
1539 {
1540         int error;
1541         cred_t *cr = cs->cr;
1542         vnode_t *dvp = cs->vp;
1543         vnode_t *vp = NULL;
1544         int mode;
1545         enum vcexcl excl;
1546 
1547         switch (args->type) {
1548         case NF4CHR:
1549         case NF4BLK:
1550                 if (secpolicy_sys_devices(cr) != 0) {
1551                         *cs->statusp = resp->status = NFS4ERR_PERM;
1552                         return (NULL);
1553                 }
1554                 if (args->type == NF4CHR)
1555                         vap->va_type = VCHR;
1556                 else
1557                         vap->va_type = VBLK;
1558                 vap->va_rdev = makedevice(args->ftype4_u.devdata.specdata1,
1559                     args->ftype4_u.devdata.specdata2);
1560                 vap->va_mask |= AT_RDEV;
1561                 break;
1562         case NF4SOCK:
1563                 vap->va_type = VSOCK;
1564                 break;
1565         case NF4FIFO:
1566                 vap->va_type = VFIFO;
1567                 break;
1568         default:
1569                 *cs->statusp = resp->status = NFS4ERR_BADTYPE;
1570                 return (NULL);
1571         }
1572 
1573         /*
1574          * Must specify the mode.
1575          */
1576         if (!(vap->va_mask & AT_MODE)) {
1577                 *cs->statusp = resp->status = NFS4ERR_INVAL;
1578                 return (NULL);
1579         }
1580 
1581         excl = EXCL;
1582 
1583         mode = 0;
1584 
1585         error = VOP_CREATE(dvp, nm, vap, excl, mode, &vp, cr, 0, NULL, NULL);
1586         if (error) {
1587                 *cs->statusp = resp->status = puterrno4(error);
1588                 return (NULL);
1589         }
1590         return (vp);
1591 }
1592 
1593 /*
1594  * nfsv4 create is used to create non-regular files. For regular files,
1595  * use nfsv4 open.
1596  */
1597 /* ARGSUSED */
1598 static void
1599 rfs4_op_create(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1600     struct compound_state *cs)
1601 {
1602         CREATE4args *args = &argop->nfs_argop4_u.opcreate;
1603         CREATE4res *resp = &resop->nfs_resop4_u.opcreate;
1604         int error;
1605         struct vattr bva, iva, iva2, ava, *vap;
1606         cred_t *cr = cs->cr;
1607         vnode_t *dvp = cs->vp;
1608         vnode_t *vp = NULL;
1609         vnode_t *realvp;
1610         char *nm, *lnm;
1611         uint_t len, llen;
1612         int syncval = 0;
1613         struct nfs4_svgetit_arg sarg;
1614         struct nfs4_ntov_table ntov;
1615         struct statvfs64 sb;
1616         nfsstat4 status;
1617         struct sockaddr *ca;
1618         char *name = NULL;
1619         char *lname = NULL;
1620 
1621         DTRACE_NFSV4_2(op__create__start, struct compound_state *, cs,
1622             CREATE4args *, args);
1623 
1624         resp->attrset = 0;
1625 
1626         if (dvp == NULL) {
1627                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1628                 goto out;
1629         }
1630 
1631         /*
1632          * If there is an unshared filesystem mounted on this vnode,
1633          * do not allow to create an object in this directory.
1634          */
1635         if (vn_ismntpt(dvp)) {
1636                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
1637                 goto out;
1638         }
1639 
1640         /* Verify that type is correct */
1641         switch (args->type) {
1642         case NF4LNK:
1643         case NF4BLK:
1644         case NF4CHR:
1645         case NF4SOCK:
1646         case NF4FIFO:
1647         case NF4DIR:
1648                 break;
1649         default:
1650                 *cs->statusp = resp->status = NFS4ERR_BADTYPE;
1651                 goto out;
1652         };
1653 
1654         if (cs->access == CS_ACCESS_DENIED) {
1655                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
1656                 goto out;
1657         }
1658         if (dvp->v_type != VDIR) {
1659                 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
1660                 goto out;
1661         }
1662         status = utf8_dir_verify(&args->objname);
1663         if (status != NFS4_OK) {
1664                 *cs->statusp = resp->status = status;
1665                 goto out;
1666         }
1667 
1668         if (rdonly4(req, cs)) {
1669                 *cs->statusp = resp->status = NFS4ERR_ROFS;
1670                 goto out;
1671         }
1672 
1673         /*
1674          * Name of newly created object
1675          */
1676         nm = utf8_to_fn(&args->objname, &len, NULL);
1677         if (nm == NULL) {
1678                 *cs->statusp = resp->status = NFS4ERR_INVAL;
1679                 goto out;
1680         }
1681 
1682         if (len > MAXNAMELEN) {
1683                 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
1684                 kmem_free(nm, len);
1685                 goto out;
1686         }
1687 
1688         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
1689         name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
1690             MAXPATHLEN  + 1);
1691 
1692         if (name == NULL) {
1693                 *cs->statusp = resp->status = NFS4ERR_INVAL;
1694                 kmem_free(nm, len);
1695                 goto out;
1696         }
1697 
1698         resp->attrset = 0;
1699 
1700         sarg.sbp = &sb;
1701         sarg.is_referral = B_FALSE;
1702         nfs4_ntov_table_init(&ntov);
1703 
1704         status = do_rfs4_set_attrs(&resp->attrset,
1705             &args->createattrs, cs, &sarg, &ntov, NFS4ATTR_SETIT);
1706 
1707         if (sarg.vap->va_mask == 0 && status == NFS4_OK)
1708                 status = NFS4ERR_INVAL;
1709 
1710         if (status != NFS4_OK) {
1711                 *cs->statusp = resp->status = status;
1712                 if (name != nm)
1713                         kmem_free(name, MAXPATHLEN + 1);
1714                 kmem_free(nm, len);
1715                 nfs4_ntov_table_free(&ntov, &sarg);
1716                 resp->attrset = 0;
1717                 goto out;
1718         }
1719 
1720         /* Get "before" change value */
1721         bva.va_mask = AT_CTIME|AT_SEQ|AT_MODE;
1722         error = VOP_GETATTR(dvp, &bva, 0, cr, NULL);
1723         if (error) {
1724                 *cs->statusp = resp->status = puterrno4(error);
1725                 if (name != nm)
1726                         kmem_free(name, MAXPATHLEN + 1);
1727                 kmem_free(nm, len);
1728                 nfs4_ntov_table_free(&ntov, &sarg);
1729                 resp->attrset = 0;
1730                 goto out;
1731         }
1732         NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bva.va_ctime)
1733 
1734         vap = sarg.vap;
1735 
1736         /*
1737          * Set the default initial values for attributes when the parent
1738          * directory does not have the VSUID/VSGID bit set and they have
1739          * not been specified in createattrs.
1740          */
1741         if (!(bva.va_mode & VSUID) && (vap->va_mask & AT_UID) == 0) {
1742                 vap->va_uid = crgetuid(cr);
1743                 vap->va_mask |= AT_UID;
1744         }
1745         if (!(bva.va_mode & VSGID) && (vap->va_mask & AT_GID) == 0) {
1746                 vap->va_gid = crgetgid(cr);
1747                 vap->va_mask |= AT_GID;
1748         }
1749 
1750         vap->va_mask |= AT_TYPE;
1751         switch (args->type) {
1752         case NF4DIR:
1753                 vap->va_type = VDIR;
1754                 if ((vap->va_mask & AT_MODE) == 0) {
1755                         vap->va_mode = 0700; /* default: owner rwx only */
1756                         vap->va_mask |= AT_MODE;
1757                 }
1758                 error = VOP_MKDIR(dvp, name, vap, &vp, cr, NULL, 0, NULL);
1759                 if (error)
1760                         break;
1761 
1762                 /*
1763                  * Get the initial "after" sequence number, if it fails,
1764                  * set to zero
1765                  */
1766                 iva.va_mask = AT_SEQ;
1767                 if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL))
1768                         iva.va_seq = 0;
1769                 break;
1770         case NF4LNK:
1771                 vap->va_type = VLNK;
1772                 if ((vap->va_mask & AT_MODE) == 0) {
1773                         vap->va_mode = 0700; /* default: owner rwx only */
1774                         vap->va_mask |= AT_MODE;
1775                 }
1776 
1777                 /*
1778                  * symlink names must be treated as data
1779                  */
1780                 lnm = utf8_to_str((utf8string *)&args->ftype4_u.linkdata,
1781                     &llen, NULL);
1782 
1783                 if (lnm == NULL) {
1784                         *cs->statusp = resp->status = NFS4ERR_INVAL;
1785                         if (name != nm)
1786                                 kmem_free(name, MAXPATHLEN + 1);
1787                         kmem_free(nm, len);
1788                         nfs4_ntov_table_free(&ntov, &sarg);
1789                         resp->attrset = 0;
1790                         goto out;
1791                 }
1792 
1793                 if (llen > MAXPATHLEN) {
1794                         *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
1795                         if (name != nm)
1796                                 kmem_free(name, MAXPATHLEN + 1);
1797                         kmem_free(nm, len);
1798                         kmem_free(lnm, llen);
1799                         nfs4_ntov_table_free(&ntov, &sarg);
1800                         resp->attrset = 0;
1801                         goto out;
1802                 }
1803 
1804                 lname = nfscmd_convname(ca, cs->exi, lnm,
1805                     NFSCMD_CONV_INBOUND, MAXPATHLEN  + 1);
1806 
1807                 if (lname == NULL) {
1808                         *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
1809                         if (name != nm)
1810                                 kmem_free(name, MAXPATHLEN + 1);
1811                         kmem_free(nm, len);
1812                         kmem_free(lnm, llen);
1813                         nfs4_ntov_table_free(&ntov, &sarg);
1814                         resp->attrset = 0;
1815                         goto out;
1816                 }
1817 
1818                 error = VOP_SYMLINK(dvp, name, vap, lname, cr, NULL, 0);
1819                 if (lname != lnm)
1820                         kmem_free(lname, MAXPATHLEN + 1);
1821                 kmem_free(lnm, llen);
1822                 if (error)
1823                         break;
1824 
1825                 /*
1826                  * Get the initial "after" sequence number, if it fails,
1827                  * set to zero
1828                  */
1829                 iva.va_mask = AT_SEQ;
1830                 if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL))
1831                         iva.va_seq = 0;
1832 
1833                 error = VOP_LOOKUP(dvp, name, &vp, NULL, 0, NULL, cr,
1834                     NULL, NULL, NULL);
1835                 if (error)
1836                         break;
1837 
1838                 /*
1839                  * va_seq is not safe over VOP calls, check it again
1840                  * if it has changed zero out iva to force atomic = FALSE.
1841                  */
1842                 iva2.va_mask = AT_SEQ;
1843                 if (VOP_GETATTR(dvp, &iva2, 0, cs->cr, NULL) ||
1844                     iva2.va_seq != iva.va_seq)
1845                         iva.va_seq = 0;
1846                 break;
1847         default:
1848                 /*
1849                  * probably a special file.
1850                  */
1851                 if ((vap->va_mask & AT_MODE) == 0) {
1852                         vap->va_mode = 0600; /* default: owner rw only */
1853                         vap->va_mask |= AT_MODE;
1854                 }
1855                 syncval = FNODSYNC;
1856                 /*
1857                  * We know this will only generate one VOP call
1858                  */
1859                 vp = do_rfs4_op_mknod(args, resp, req, cs, vap, name);
1860 
1861                 if (vp == NULL) {
1862                         if (name != nm)
1863                                 kmem_free(name, MAXPATHLEN + 1);
1864                         kmem_free(nm, len);
1865                         nfs4_ntov_table_free(&ntov, &sarg);
1866                         resp->attrset = 0;
1867                         goto out;
1868                 }
1869 
1870                 /*
1871                  * Get the initial "after" sequence number, if it fails,
1872                  * set to zero
1873                  */
1874                 iva.va_mask = AT_SEQ;
1875                 if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL))
1876                         iva.va_seq = 0;
1877 
1878                 break;
1879         }
1880         if (name != nm)
1881                 kmem_free(name, MAXPATHLEN + 1);
1882         kmem_free(nm, len);
1883 
1884         if (error) {
1885                 *cs->statusp = resp->status = puterrno4(error);
1886         }
1887 
1888         /*
1889          * Force modified data and metadata out to stable storage.
1890          */
1891         (void) VOP_FSYNC(dvp, 0, cr, NULL);
1892 
1893         if (resp->status != NFS4_OK) {
1894                 if (vp != NULL)
1895                         VN_RELE(vp);
1896                 nfs4_ntov_table_free(&ntov, &sarg);
1897                 resp->attrset = 0;
1898                 goto out;
1899         }
1900 
1901         /*
1902          * Finish setup of cinfo response, "before" value already set.
1903          * Get "after" change value, if it fails, simply return the
1904          * before value.
1905          */
1906         ava.va_mask = AT_CTIME|AT_SEQ;
1907         if (VOP_GETATTR(dvp, &ava, 0, cr, NULL)) {
1908                 ava.va_ctime = bva.va_ctime;
1909                 ava.va_seq = 0;
1910         }
1911         NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, ava.va_ctime);
1912 
1913         /*
1914          * True verification that object was created with correct
1915          * attrs is impossible.  The attrs could have been changed
1916          * immediately after object creation.  If attributes did
1917          * not verify, the only recourse for the server is to
1918          * destroy the object.  Maybe if some attrs (like gid)
1919          * are set incorrectly, the object should be destroyed;
1920          * however, seems bad as a default policy.  Do we really
1921          * want to destroy an object over one of the times not
1922          * verifying correctly?  For these reasons, the server
1923          * currently sets bits in attrset for createattrs
1924          * that were set; however, no verification is done.
1925          *
1926          * vmask_to_nmask accounts for vattr bits set on create
1927          *      [do_rfs4_set_attrs() only sets resp bits for
1928          *       non-vattr/vfs bits.]
1929          * Mask off any bits set by default so as not to return
1930          * more attrset bits than were requested in createattrs
1931          */
1932         nfs4_vmask_to_nmask(sarg.vap->va_mask, &resp->attrset);
1933         resp->attrset &= args->createattrs.attrmask;
1934         nfs4_ntov_table_free(&ntov, &sarg);
1935 
1936         error = makefh4(&cs->fh, vp, cs->exi);
1937         if (error) {
1938                 *cs->statusp = resp->status = puterrno4(error);
1939         }
1940 
1941         /*
1942          * The cinfo.atomic = TRUE only if we got no errors, we have
1943          * non-zero va_seq's, and it has incremented by exactly one
1944          * during the creation and it didn't change during the VOP_LOOKUP
1945          * or VOP_FSYNC.
1946          */
1947         if (!error && bva.va_seq && iva.va_seq && ava.va_seq &&
1948             iva.va_seq == (bva.va_seq + 1) && iva.va_seq == ava.va_seq)
1949                 resp->cinfo.atomic = TRUE;
1950         else
1951                 resp->cinfo.atomic = FALSE;
1952 
1953         /*
1954          * Force modified metadata out to stable storage.
1955          *
1956          * if a underlying vp exists, pass it to VOP_FSYNC
1957          */
1958         if (VOP_REALVP(vp, &realvp, NULL) == 0)
1959                 (void) VOP_FSYNC(realvp, syncval, cr, NULL);
1960         else
1961                 (void) VOP_FSYNC(vp, syncval, cr, NULL);
1962 
1963         if (resp->status != NFS4_OK) {
1964                 VN_RELE(vp);
1965                 goto out;
1966         }
1967         if (cs->vp)
1968                 VN_RELE(cs->vp);
1969 
1970         cs->vp = vp;
1971         *cs->statusp = resp->status = NFS4_OK;
1972 out:
1973         DTRACE_NFSV4_2(op__create__done, struct compound_state *, cs,
1974             CREATE4res *, resp);
1975 }
1976 
1977 /*ARGSUSED*/
1978 static void
1979 rfs4_op_delegpurge(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1980     struct compound_state *cs)
1981 {
1982         DTRACE_NFSV4_2(op__delegpurge__start, struct compound_state *, cs,
1983             DELEGPURGE4args *, &argop->nfs_argop4_u.opdelegpurge);
1984 
1985         rfs4_op_inval(argop, resop, req, cs);
1986 
1987         DTRACE_NFSV4_2(op__delegpurge__done, struct compound_state *, cs,
1988             DELEGPURGE4res *, &resop->nfs_resop4_u.opdelegpurge);
1989 }
1990 
1991 /*ARGSUSED*/
1992 static void
1993 rfs4_op_delegreturn(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1994     struct compound_state *cs)
1995 {
1996         DELEGRETURN4args *args = &argop->nfs_argop4_u.opdelegreturn;
1997         DELEGRETURN4res *resp = &resop->nfs_resop4_u.opdelegreturn;
1998         rfs4_deleg_state_t *dsp;
1999         nfsstat4 status;
2000 
2001         DTRACE_NFSV4_2(op__delegreturn__start, struct compound_state *, cs,
2002             DELEGRETURN4args *, args);
2003 
2004         status = rfs4_get_deleg_state(&args->deleg_stateid, &dsp);
2005         resp->status = *cs->statusp = status;
2006         if (status != NFS4_OK)
2007                 goto out;
2008 
2009         /* Ensure specified filehandle matches */
2010         if (cs->vp != dsp->rds_finfo->rf_vp) {
2011                 resp->status = *cs->statusp = NFS4ERR_BAD_STATEID;
2012         } else
2013                 rfs4_return_deleg(dsp, FALSE);
2014 
2015         rfs4_update_lease(dsp->rds_client);
2016 
2017         rfs4_deleg_state_rele(dsp);
2018 out:
2019         DTRACE_NFSV4_2(op__delegreturn__done, struct compound_state *, cs,
2020             DELEGRETURN4res *, resp);
2021 }
2022 
2023 /*
2024  * Check to see if a given "flavor" is an explicitly shared flavor.
2025  * The assumption of this routine is the "flavor" is already a valid
2026  * flavor in the secinfo list of "exi".
2027  *
2028  *      e.g.
2029  *              # share -o sec=flavor1 /export
2030  *              # share -o sec=flavor2 /export/home
2031  *
2032  *              flavor2 is not an explicitly shared flavor for /export,
2033  *              however it is in the secinfo list for /export thru the
2034  *              server namespace setup.
2035  */
2036 int
2037 is_exported_sec(int flavor, struct exportinfo *exi)
2038 {
2039         int     i;
2040         struct secinfo *sp;
2041 
2042         sp = exi->exi_export.ex_secinfo;
2043         for (i = 0; i < exi->exi_export.ex_seccnt; i++) {
2044                 if (flavor == sp[i].s_secinfo.sc_nfsnum ||
2045                     sp[i].s_secinfo.sc_nfsnum == AUTH_NONE) {
2046                         return (SEC_REF_EXPORTED(&sp[i]));
2047                 }
2048         }
2049 
2050         /* Should not reach this point based on the assumption */
2051         return (0);
2052 }
2053 
2054 /*
2055  * Check if the security flavor used in the request matches what is
2056  * required at the export point or at the root pseudo node (exi_root).
2057  *
2058  * returns 1 if there's a match or if exported with AUTH_NONE; 0 otherwise.
2059  *
2060  */
2061 static int
2062 secinfo_match_or_authnone(struct compound_state *cs)
2063 {
2064         int     i;
2065         struct secinfo *sp;
2066 
2067         /*
2068          * Check cs->nfsflavor (from the request) against
2069          * the current export data in cs->exi.
2070          */
2071         sp = cs->exi->exi_export.ex_secinfo;
2072         for (i = 0; i < cs->exi->exi_export.ex_seccnt; i++) {
2073                 if (cs->nfsflavor == sp[i].s_secinfo.sc_nfsnum ||
2074                     sp[i].s_secinfo.sc_nfsnum == AUTH_NONE)
2075                         return (1);
2076         }
2077 
2078         return (0);
2079 }
2080 
2081 /*
2082  * Check the access authority for the client and return the correct error.
2083  */
2084 nfsstat4
2085 call_checkauth4(struct compound_state *cs, struct svc_req *req)
2086 {
2087         int     authres;
2088 
2089         /*
2090          * First, check if the security flavor used in the request
2091          * are among the flavors set in the server namespace.
2092          */
2093         if (!secinfo_match_or_authnone(cs)) {
2094                 *cs->statusp = NFS4ERR_WRONGSEC;
2095                 return (*cs->statusp);
2096         }
2097 
2098         authres = checkauth4(cs, req);
2099 
2100         if (authres > 0) {
2101                 *cs->statusp = NFS4_OK;
2102                 if (! (cs->access & CS_ACCESS_LIMITED))
2103                         cs->access = CS_ACCESS_OK;
2104         } else if (authres == 0) {
2105                 *cs->statusp = NFS4ERR_ACCESS;
2106         } else if (authres == -2) {
2107                 *cs->statusp = NFS4ERR_WRONGSEC;
2108         } else {
2109                 *cs->statusp = NFS4ERR_DELAY;
2110         }
2111         return (*cs->statusp);
2112 }
2113 
2114 /*
2115  * bitmap4_to_attrmask is called by getattr and readdir.
2116  * It sets up the vattr mask and determines whether vfsstat call is needed
2117  * based on the input bitmap.
2118  * Returns nfsv4 status.
2119  */
2120 static nfsstat4
2121 bitmap4_to_attrmask(bitmap4 breq, struct nfs4_svgetit_arg *sargp)
2122 {
2123         int i;
2124         uint_t  va_mask;
2125         struct statvfs64 *sbp = sargp->sbp;
2126 
2127         sargp->sbp = NULL;
2128         sargp->flag = 0;
2129         sargp->rdattr_error = NFS4_OK;
2130         sargp->mntdfid_set = FALSE;
2131         if (sargp->cs->vp)
2132                 sargp->xattr = get_fh4_flag(&sargp->cs->fh,
2133                     FH4_ATTRDIR | FH4_NAMEDATTR);
2134         else
2135                 sargp->xattr = 0;
2136 
2137         /*
2138          * Set rdattr_error_req to true if return error per
2139          * failed entry rather than fail the readdir.
2140          */
2141         if (breq & FATTR4_RDATTR_ERROR_MASK)
2142                 sargp->rdattr_error_req = 1;
2143         else
2144                 sargp->rdattr_error_req = 0;
2145 
2146         /*
2147          * generate the va_mask
2148          * Handle the easy cases first
2149          */
2150         switch (breq) {
2151         case NFS4_NTOV_ATTR_MASK:
2152                 sargp->vap->va_mask = NFS4_NTOV_ATTR_AT_MASK;
2153                 return (NFS4_OK);
2154 
2155         case NFS4_FS_ATTR_MASK:
2156                 sargp->vap->va_mask = NFS4_FS_ATTR_AT_MASK;
2157                 sargp->sbp = sbp;
2158                 return (NFS4_OK);
2159 
2160         case NFS4_NTOV_ATTR_CACHE_MASK:
2161                 sargp->vap->va_mask = NFS4_NTOV_ATTR_CACHE_AT_MASK;
2162                 return (NFS4_OK);
2163 
2164         case FATTR4_LEASE_TIME_MASK:
2165                 sargp->vap->va_mask = 0;
2166                 return (NFS4_OK);
2167 
2168         default:
2169                 va_mask = 0;
2170                 for (i = 0; i < nfs4_ntov_map_size; i++) {
2171                         if ((breq & nfs4_ntov_map[i].fbit) &&
2172                             nfs4_ntov_map[i].vbit)
2173                                 va_mask |= nfs4_ntov_map[i].vbit;
2174                 }
2175 
2176                 /*
2177                  * Check is vfsstat is needed
2178                  */
2179                 if (breq & NFS4_FS_ATTR_MASK)
2180                         sargp->sbp = sbp;
2181 
2182                 sargp->vap->va_mask = va_mask;
2183                 return (NFS4_OK);
2184         }
2185         /* NOTREACHED */
2186 }
2187 
2188 /*
2189  * bitmap4_get_sysattrs is called by getattr and readdir.
2190  * It calls both VOP_GETATTR and VFS_STATVFS calls to get the attrs.
2191  * Returns nfsv4 status.
2192  */
2193 static nfsstat4
2194 bitmap4_get_sysattrs(struct nfs4_svgetit_arg *sargp)
2195 {
2196         int error;
2197         struct compound_state *cs = sargp->cs;
2198         vnode_t *vp = cs->vp;
2199 
2200         if (sargp->sbp != NULL) {
2201                 if (error = VFS_STATVFS(vp->v_vfsp, sargp->sbp)) {
2202                         sargp->sbp = NULL;   /* to identify error */
2203                         return (puterrno4(error));
2204                 }
2205         }
2206 
2207         return (rfs4_vop_getattr(vp, sargp->vap, 0, cs->cr));
2208 }
2209 
2210 static void
2211 nfs4_ntov_table_init(struct nfs4_ntov_table *ntovp)
2212 {
2213         ntovp->na = kmem_zalloc(sizeof (union nfs4_attr_u) * nfs4_ntov_map_size,
2214             KM_SLEEP);
2215         ntovp->attrcnt = 0;
2216         ntovp->vfsstat = FALSE;
2217 }
2218 
2219 static void
2220 nfs4_ntov_table_free(struct nfs4_ntov_table *ntovp,
2221     struct nfs4_svgetit_arg *sargp)
2222 {
2223         int i;
2224         union nfs4_attr_u *na;
2225         uint8_t *amap;
2226 
2227         /*
2228          * XXX Should do the same checks for whether the bit is set
2229          */
2230         for (i = 0, na = ntovp->na, amap = ntovp->amap;
2231             i < ntovp->attrcnt; i++, na++, amap++) {
2232                 (void) (*nfs4_ntov_map[*amap].sv_getit)(
2233                     NFS4ATTR_FREEIT, sargp, na);
2234         }
2235         if ((sargp->op == NFS4ATTR_SETIT) || (sargp->op == NFS4ATTR_VERIT)) {
2236                 /*
2237                  * xdr_free for getattr will be done later
2238                  */
2239                 for (i = 0, na = ntovp->na, amap = ntovp->amap;
2240                     i < ntovp->attrcnt; i++, na++, amap++) {
2241                         xdr_free(nfs4_ntov_map[*amap].xfunc, (caddr_t)na);
2242                 }
2243         }
2244         kmem_free(ntovp->na, sizeof (union nfs4_attr_u) * nfs4_ntov_map_size);
2245 }
2246 
2247 /*
2248  * do_rfs4_op_getattr gets the system attrs and converts into fattr4.
2249  */
2250 static nfsstat4
2251 do_rfs4_op_getattr(bitmap4 breq, fattr4 *fattrp,
2252     struct nfs4_svgetit_arg *sargp)
2253 {
2254         int error = 0;
2255         int i, k;
2256         struct nfs4_ntov_table ntov;
2257         XDR xdr;
2258         ulong_t xdr_size;
2259         char *xdr_attrs;
2260         nfsstat4 status = NFS4_OK;
2261         nfsstat4 prev_rdattr_error = sargp->rdattr_error;
2262         union nfs4_attr_u *na;
2263         uint8_t *amap;
2264 
2265         sargp->op = NFS4ATTR_GETIT;
2266         sargp->flag = 0;
2267 
2268         fattrp->attrmask = 0;
2269         /* if no bits requested, then return empty fattr4 */
2270         if (breq == 0) {
2271                 fattrp->attrlist4_len = 0;
2272                 fattrp->attrlist4 = NULL;
2273                 return (NFS4_OK);
2274         }
2275 
2276         /*
2277          * return NFS4ERR_INVAL when client requests write-only attrs
2278          */
2279         if (breq & (FATTR4_TIME_ACCESS_SET_MASK | FATTR4_TIME_MODIFY_SET_MASK))
2280                 return (NFS4ERR_INVAL);
2281 
2282         nfs4_ntov_table_init(&ntov);
2283         na = ntov.na;
2284         amap = ntov.amap;
2285 
2286         /*
2287          * Now loop to get or verify the attrs
2288          */
2289         for (i = 0; i < nfs4_ntov_map_size; i++) {
2290                 if (breq & nfs4_ntov_map[i].fbit) {
2291                         if ((*nfs4_ntov_map[i].sv_getit)(
2292                             NFS4ATTR_SUPPORTED, sargp, NULL) == 0) {
2293 
2294                                 error = (*nfs4_ntov_map[i].sv_getit)(
2295                                     NFS4ATTR_GETIT, sargp, na);
2296 
2297                                 /*
2298                                  * Possible error values:
2299                                  * >0 if sv_getit failed to
2300                                  * get the attr; 0 if succeeded;
2301                                  * <0 if rdattr_error and the
2302                                  * attribute cannot be returned.
2303                                  */
2304                                 if (error && !(sargp->rdattr_error_req))
2305                                         goto done;
2306                                 /*
2307                                  * If error then just for entry
2308                                  */
2309                                 if (error == 0) {
2310                                         fattrp->attrmask |=
2311                                             nfs4_ntov_map[i].fbit;
2312                                         *amap++ =
2313                                             (uint8_t)nfs4_ntov_map[i].nval;
2314                                         na++;
2315                                         (ntov.attrcnt)++;
2316                                 } else if ((error > 0) &&
2317                                     (sargp->rdattr_error == NFS4_OK)) {
2318                                         sargp->rdattr_error = puterrno4(error);
2319                                 }
2320                                 error = 0;
2321                         }
2322                 }
2323         }
2324 
2325         /*
2326          * If rdattr_error was set after the return value for it was assigned,
2327          * update it.
2328          */
2329         if (prev_rdattr_error != sargp->rdattr_error) {
2330                 na = ntov.na;
2331                 amap = ntov.amap;
2332                 for (i = 0; i < ntov.attrcnt; i++, na++, amap++) {
2333                         k = *amap;
2334                         if (k < FATTR4_RDATTR_ERROR) {
2335                                 continue;
2336                         }
2337                         if ((k == FATTR4_RDATTR_ERROR) &&
2338                             ((*nfs4_ntov_map[k].sv_getit)(
2339                             NFS4ATTR_SUPPORTED, sargp, NULL) == 0)) {
2340 
2341                                 (void) (*nfs4_ntov_map[k].sv_getit)(
2342                                     NFS4ATTR_GETIT, sargp, na);
2343                         }
2344                         break;
2345                 }
2346         }
2347 
2348         xdr_size = 0;
2349         na = ntov.na;
2350         amap = ntov.amap;
2351         for (i = 0; i < ntov.attrcnt; i++, na++, amap++) {
2352                 xdr_size += xdr_sizeof(nfs4_ntov_map[*amap].xfunc, na);
2353         }
2354 
2355         fattrp->attrlist4_len = xdr_size;
2356         if (xdr_size) {
2357                 /* freed by rfs4_op_getattr_free() */
2358                 fattrp->attrlist4 = xdr_attrs = kmem_zalloc(xdr_size, KM_SLEEP);
2359 
2360                 xdrmem_create(&xdr, xdr_attrs, xdr_size, XDR_ENCODE);
2361 
2362                 na = ntov.na;
2363                 amap = ntov.amap;
2364                 for (i = 0; i < ntov.attrcnt; i++, na++, amap++) {
2365                         if (!(*nfs4_ntov_map[*amap].xfunc)(&xdr, na)) {
2366                                 DTRACE_PROBE1(nfss__e__getattr4_encfail,
2367                                     int, *amap);
2368                                 status = NFS4ERR_SERVERFAULT;
2369                                 break;
2370                         }
2371                 }
2372                 /* xdrmem_destroy(&xdrs); */        /* NO-OP */
2373         } else {
2374                 fattrp->attrlist4 = NULL;
2375         }
2376 done:
2377 
2378         nfs4_ntov_table_free(&ntov, sargp);
2379 
2380         if (error != 0)
2381                 status = puterrno4(error);
2382 
2383         return (status);
2384 }
2385 
2386 /* ARGSUSED */
2387 static void
2388 rfs4_op_getattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2389     struct compound_state *cs)
2390 {
2391         GETATTR4args *args = &argop->nfs_argop4_u.opgetattr;
2392         GETATTR4res *resp = &resop->nfs_resop4_u.opgetattr;
2393         struct nfs4_svgetit_arg sarg;
2394         struct statvfs64 sb;
2395         nfsstat4 status;
2396 
2397         DTRACE_NFSV4_2(op__getattr__start, struct compound_state *, cs,
2398             GETATTR4args *, args);
2399 
2400         if (cs->vp == NULL) {
2401                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2402                 goto out;
2403         }
2404 
2405         if (cs->access == CS_ACCESS_DENIED) {
2406                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
2407                 goto out;
2408         }
2409 
2410         sarg.sbp = &sb;
2411         sarg.cs = cs;
2412         sarg.is_referral = B_FALSE;
2413 
2414         status = bitmap4_to_attrmask(args->attr_request, &sarg);
2415         if (status == NFS4_OK) {
2416 
2417                 status = bitmap4_get_sysattrs(&sarg);
2418                 if (status == NFS4_OK) {
2419 
2420                         /* Is this a referral? */
2421                         if (vn_is_nfs_reparse(cs->vp, cs->cr)) {
2422                                 /* Older V4 Solaris client sees a link */
2423                                 if (client_is_downrev(req))
2424                                         sarg.vap->va_type = VLNK;
2425                                 else
2426                                         sarg.is_referral = B_TRUE;
2427                         }
2428 
2429                         status = do_rfs4_op_getattr(args->attr_request,
2430                             &resp->obj_attributes, &sarg);
2431                 }
2432         }
2433         *cs->statusp = resp->status = status;
2434 out:
2435         DTRACE_NFSV4_2(op__getattr__done, struct compound_state *, cs,
2436             GETATTR4res *, resp);
2437 }
2438 
2439 static void
2440 rfs4_op_getattr_free(nfs_resop4 *resop)
2441 {
2442         GETATTR4res *resp = &resop->nfs_resop4_u.opgetattr;
2443 
2444         nfs4_fattr4_free(&resp->obj_attributes);
2445 }
2446 
2447 /* ARGSUSED */
2448 static void
2449 rfs4_op_getfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2450     struct compound_state *cs)
2451 {
2452         GETFH4res *resp = &resop->nfs_resop4_u.opgetfh;
2453 
2454         DTRACE_NFSV4_1(op__getfh__start, struct compound_state *, cs);
2455 
2456         if (cs->vp == NULL) {
2457                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2458                 goto out;
2459         }
2460         if (cs->access == CS_ACCESS_DENIED) {
2461                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
2462                 goto out;
2463         }
2464 
2465         /* check for reparse point at the share point */
2466         if (cs->exi->exi_moved || vn_is_nfs_reparse(cs->exi->exi_vp, cs->cr)) {
2467                 /* it's all bad */
2468                 cs->exi->exi_moved = 1;
2469                 *cs->statusp = resp->status = NFS4ERR_MOVED;
2470                 DTRACE_PROBE2(nfs4serv__func__referral__shared__moved,
2471                     vnode_t *, cs->vp, char *, "rfs4_op_getfh");
2472                 return;
2473         }
2474 
2475         /* check for reparse point at vp */
2476         if (vn_is_nfs_reparse(cs->vp, cs->cr) && !client_is_downrev(req)) {
2477                 /* it's not all bad */
2478                 *cs->statusp = resp->status = NFS4ERR_MOVED;
2479                 DTRACE_PROBE2(nfs4serv__func__referral__moved,
2480                     vnode_t *, cs->vp, char *, "rfs4_op_getfh");
2481                 return;
2482         }
2483 
2484         resp->object.nfs_fh4_val =
2485             kmem_alloc(cs->fh.nfs_fh4_len, KM_SLEEP);
2486         nfs_fh4_copy(&cs->fh, &resp->object);
2487         *cs->statusp = resp->status = NFS4_OK;
2488 out:
2489         DTRACE_NFSV4_2(op__getfh__done, struct compound_state *, cs,
2490             GETFH4res *, resp);
2491 }
2492 
2493 static void
2494 rfs4_op_getfh_free(nfs_resop4 *resop)
2495 {
2496         GETFH4res *resp = &resop->nfs_resop4_u.opgetfh;
2497 
2498         if (resp->status == NFS4_OK &&
2499             resp->object.nfs_fh4_val != NULL) {
2500                 kmem_free(resp->object.nfs_fh4_val, resp->object.nfs_fh4_len);
2501                 resp->object.nfs_fh4_val = NULL;
2502                 resp->object.nfs_fh4_len = 0;
2503         }
2504 }
2505 
2506 /*
2507  * illegal: args: void
2508  *          res : status (NFS4ERR_OP_ILLEGAL)
2509  */
2510 /* ARGSUSED */
2511 static void
2512 rfs4_op_illegal(nfs_argop4 *argop, nfs_resop4 *resop,
2513     struct svc_req *req, struct compound_state *cs)
2514 {
2515         ILLEGAL4res *resp = &resop->nfs_resop4_u.opillegal;
2516 
2517         resop->resop = OP_ILLEGAL;
2518         *cs->statusp = resp->status = NFS4ERR_OP_ILLEGAL;
2519 }
2520 
2521 /*
2522  * link: args: SAVED_FH: file, CURRENT_FH: target directory
2523  *       res: status. If success - CURRENT_FH unchanged, return change_info
2524  */
2525 /* ARGSUSED */
2526 static void
2527 rfs4_op_link(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2528     struct compound_state *cs)
2529 {
2530         LINK4args *args = &argop->nfs_argop4_u.oplink;
2531         LINK4res *resp = &resop->nfs_resop4_u.oplink;
2532         int error;
2533         vnode_t *vp;
2534         vnode_t *dvp;
2535         struct vattr bdva, idva, adva;
2536         char *nm;
2537         uint_t  len;
2538         struct sockaddr *ca;
2539         char *name = NULL;
2540         nfsstat4 status;
2541 
2542         DTRACE_NFSV4_2(op__link__start, struct compound_state *, cs,
2543             LINK4args *, args);
2544 
2545         /* SAVED_FH: source object */
2546         vp = cs->saved_vp;
2547         if (vp == NULL) {
2548                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2549                 goto out;
2550         }
2551 
2552         /* CURRENT_FH: target directory */
2553         dvp = cs->vp;
2554         if (dvp == NULL) {
2555                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2556                 goto out;
2557         }
2558 
2559         /*
2560          * If there is a non-shared filesystem mounted on this vnode,
2561          * do not allow to link any file in this directory.
2562          */
2563         if (vn_ismntpt(dvp)) {
2564                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
2565                 goto out;
2566         }
2567 
2568         if (cs->access == CS_ACCESS_DENIED) {
2569                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
2570                 goto out;
2571         }
2572 
2573         /* Check source object's type validity */
2574         if (vp->v_type == VDIR) {
2575                 *cs->statusp = resp->status = NFS4ERR_ISDIR;
2576                 goto out;
2577         }
2578 
2579         /* Check target directory's type */
2580         if (dvp->v_type != VDIR) {
2581                 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
2582                 goto out;
2583         }
2584 
2585         if (cs->saved_exi != cs->exi) {
2586                 *cs->statusp = resp->status = NFS4ERR_XDEV;
2587                 goto out;
2588         }
2589 
2590         status = utf8_dir_verify(&args->newname);
2591         if (status != NFS4_OK) {
2592                 *cs->statusp = resp->status = status;
2593                 goto out;
2594         }
2595 
2596         nm = utf8_to_fn(&args->newname, &len, NULL);
2597         if (nm == NULL) {
2598                 *cs->statusp = resp->status = NFS4ERR_INVAL;
2599                 goto out;
2600         }
2601 
2602         if (len > MAXNAMELEN) {
2603                 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
2604                 kmem_free(nm, len);
2605                 goto out;
2606         }
2607 
2608         if (rdonly4(req, cs)) {
2609                 *cs->statusp = resp->status = NFS4ERR_ROFS;
2610                 kmem_free(nm, len);
2611                 goto out;
2612         }
2613 
2614         /* Get "before" change value */
2615         bdva.va_mask = AT_CTIME|AT_SEQ;
2616         error = VOP_GETATTR(dvp, &bdva, 0, cs->cr, NULL);
2617         if (error) {
2618                 *cs->statusp = resp->status = puterrno4(error);
2619                 kmem_free(nm, len);
2620                 goto out;
2621         }
2622 
2623         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2624         name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
2625             MAXPATHLEN  + 1);
2626 
2627         if (name == NULL) {
2628                 *cs->statusp = resp->status = NFS4ERR_INVAL;
2629                 kmem_free(nm, len);
2630                 goto out;
2631         }
2632 
2633         NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bdva.va_ctime)
2634 
2635         error = VOP_LINK(dvp, vp, name, cs->cr, NULL, 0);
2636 
2637         if (nm != name)
2638                 kmem_free(name, MAXPATHLEN + 1);
2639         kmem_free(nm, len);
2640 
2641         /*
2642          * Get the initial "after" sequence number, if it fails, set to zero
2643          */
2644         idva.va_mask = AT_SEQ;
2645         if (VOP_GETATTR(dvp, &idva, 0, cs->cr, NULL))
2646                 idva.va_seq = 0;
2647 
2648         /*
2649          * Force modified data and metadata out to stable storage.
2650          */
2651         (void) VOP_FSYNC(vp, FNODSYNC, cs->cr, NULL);
2652         (void) VOP_FSYNC(dvp, 0, cs->cr, NULL);
2653 
2654         if (error) {
2655                 *cs->statusp = resp->status = puterrno4(error);
2656                 goto out;
2657         }
2658 
2659         /*
2660          * Get "after" change value, if it fails, simply return the
2661          * before value.
2662          */
2663         adva.va_mask = AT_CTIME|AT_SEQ;
2664         if (VOP_GETATTR(dvp, &adva, 0, cs->cr, NULL)) {
2665                 adva.va_ctime = bdva.va_ctime;
2666                 adva.va_seq = 0;
2667         }
2668 
2669         NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, adva.va_ctime)
2670 
2671         /*
2672          * The cinfo.atomic = TRUE only if we have
2673          * non-zero va_seq's, and it has incremented by exactly one
2674          * during the VOP_LINK and it didn't change during the VOP_FSYNC.
2675          */
2676         if (bdva.va_seq && idva.va_seq && adva.va_seq &&
2677             idva.va_seq == (bdva.va_seq + 1) && idva.va_seq == adva.va_seq)
2678                 resp->cinfo.atomic = TRUE;
2679         else
2680                 resp->cinfo.atomic = FALSE;
2681 
2682         *cs->statusp = resp->status = NFS4_OK;
2683 out:
2684         DTRACE_NFSV4_2(op__link__done, struct compound_state *, cs,
2685             LINK4res *, resp);
2686 }
2687 
2688 /*
2689  * Used by rfs4_op_lookup and rfs4_op_lookupp to do the actual work.
2690  */
2691 
2692 /* ARGSUSED */
2693 static nfsstat4
2694 do_rfs4_op_lookup(char *nm, struct svc_req *req, struct compound_state *cs)
2695 {
2696         int error;
2697         int different_export = 0;
2698         vnode_t *vp, *pre_tvp = NULL, *oldvp = NULL;
2699         struct exportinfo *exi = NULL, *pre_exi = NULL;
2700         nfsstat4 stat;
2701         fid_t fid;
2702         int attrdir, dotdot, walk;
2703         bool_t is_newvp = FALSE;
2704 
2705         if (cs->vp->v_flag & V_XATTRDIR) {
2706                 attrdir = 1;
2707                 ASSERT(get_fh4_flag(&cs->fh, FH4_ATTRDIR));
2708         } else {
2709                 attrdir = 0;
2710                 ASSERT(! get_fh4_flag(&cs->fh, FH4_ATTRDIR));
2711         }
2712 
2713         dotdot = (nm[0] == '.' && nm[1] == '.' && nm[2] == '\0');
2714 
2715         /*
2716          * If dotdotting, then need to check whether it's
2717          * above the root of a filesystem, or above an
2718          * export point.
2719          */
2720         if (dotdot) {
2721 
2722                 /*
2723                  * If dotdotting at the root of a filesystem, then
2724                  * need to traverse back to the mounted-on filesystem
2725                  * and do the dotdot lookup there.
2726                  */
2727                 if ((cs->vp->v_flag & VROOT) || VN_IS_CURZONEROOT(cs->vp)) {
2728 
2729                         /*
2730                          * If at the system root, then can
2731                          * go up no further.
2732                          */
2733                         if (VN_CMP(cs->vp, ZONE_ROOTVP()))
2734                                 return (puterrno4(ENOENT));
2735 
2736                         /*
2737                          * Traverse back to the mounted-on filesystem
2738                          */
2739                         cs->vp = untraverse(cs->vp);
2740 
2741                         /*
2742                          * Set the different_export flag so we remember
2743                          * to pick up a new exportinfo entry for
2744                          * this new filesystem.
2745                          */
2746                         different_export = 1;
2747                 } else {
2748 
2749                         /*
2750                          * If dotdotting above an export point then set
2751                          * the different_export to get new export info.
2752                          */
2753                         different_export = nfs_exported(cs->exi, cs->vp);
2754                 }
2755         }
2756 
2757         error = VOP_LOOKUP(cs->vp, nm, &vp, NULL, 0, NULL, cs->cr,
2758             NULL, NULL, NULL);
2759         if (error)
2760                 return (puterrno4(error));
2761 
2762         /*
2763          * If the vnode is in a pseudo filesystem, check whether it is visible.
2764          *
2765          * XXX if the vnode is a symlink and it is not visible in
2766          * a pseudo filesystem, return ENOENT (not following symlink).
2767          * V4 client can not mount such symlink. This is a regression
2768          * from V2/V3.
2769          *
2770          * In the same exported filesystem, if the security flavor used
2771          * is not an explicitly shared flavor, limit the view to the visible
2772          * list entries only. This is not a WRONGSEC case because it's already
2773          * checked via PUTROOTFH/PUTPUBFH or PUTFH.
2774          */
2775         if (!different_export &&
2776             (PSEUDO(cs->exi) || ! is_exported_sec(cs->nfsflavor, cs->exi) ||
2777             cs->access & CS_ACCESS_LIMITED)) {
2778                 if (! nfs_visible(cs->exi, vp, &different_export)) {
2779                         VN_RELE(vp);
2780                         return (puterrno4(ENOENT));
2781                 }
2782         }
2783 
2784         /*
2785          * If it's a mountpoint, then traverse it.
2786          */
2787         if (vn_ismntpt(vp)) {
2788                 pre_exi = cs->exi;   /* save pre-traversed exportinfo */
2789                 pre_tvp = vp;           /* save pre-traversed vnode     */
2790 
2791                 /*
2792                  * hold pre_tvp to counteract rele by traverse.  We will
2793                  * need pre_tvp below if checkexport4 fails
2794                  */
2795                 VN_HOLD(pre_tvp);
2796                 if ((error = traverse(&vp)) != 0) {
2797                         VN_RELE(vp);
2798                         VN_RELE(pre_tvp);
2799                         return (puterrno4(error));
2800                 }
2801                 different_export = 1;
2802         } else if (vp->v_vfsp != cs->vp->v_vfsp) {
2803                 /*
2804                  * The vfsp comparison is to handle the case where
2805                  * a LOFS mount is shared.  lo_lookup traverses mount points,
2806                  * and NFS is unaware of local fs transistions because
2807                  * v_vfsmountedhere isn't set.  For this special LOFS case,
2808                  * the dir and the obj returned by lookup will have different
2809                  * vfs ptrs.
2810                  */
2811                 different_export = 1;
2812         }
2813 
2814         if (different_export) {
2815 
2816                 bzero(&fid, sizeof (fid));
2817                 fid.fid_len = MAXFIDSZ;
2818                 error = vop_fid_pseudo(vp, &fid);
2819                 if (error) {
2820                         VN_RELE(vp);
2821                         if (pre_tvp)
2822                                 VN_RELE(pre_tvp);
2823                         return (puterrno4(error));
2824                 }
2825 
2826                 if (dotdot)
2827                         exi = nfs_vptoexi(NULL, vp, cs->cr, &walk, NULL, TRUE);
2828                 else
2829                         exi = checkexport4(&vp->v_vfsp->vfs_fsid, &fid, vp);
2830 
2831                 if (exi == NULL) {
2832                         if (pre_tvp) {
2833                                 /*
2834                                  * If this vnode is a mounted-on vnode,
2835                                  * but the mounted-on file system is not
2836                                  * exported, send back the filehandle for
2837                                  * the mounted-on vnode, not the root of
2838                                  * the mounted-on file system.
2839                                  */
2840                                 VN_RELE(vp);
2841                                 vp = pre_tvp;
2842                                 exi = pre_exi;
2843                         } else {
2844                                 VN_RELE(vp);
2845                                 return (puterrno4(EACCES));
2846                         }
2847                 } else if (pre_tvp) {
2848                         /* we're done with pre_tvp now. release extra hold */
2849                         VN_RELE(pre_tvp);
2850                 }
2851 
2852                 cs->exi = exi;
2853 
2854                 /*
2855                  * Now we do a checkauth4. The reason is that
2856                  * this client/user may not have access to the new
2857                  * exported file system, and if they do,
2858                  * the client/user may be mapped to a different uid.
2859                  *
2860                  * We start with a new cr, because the checkauth4 done
2861                  * in the PUT*FH operation over wrote the cred's uid,
2862                  * gid, etc, and we want the real thing before calling
2863                  * checkauth4()
2864                  */
2865                 crfree(cs->cr);
2866                 cs->cr = crdup(cs->basecr);
2867 
2868                 oldvp = cs->vp;
2869                 cs->vp = vp;
2870                 is_newvp = TRUE;
2871 
2872                 stat = call_checkauth4(cs, req);
2873                 if (stat != NFS4_OK) {
2874                         VN_RELE(cs->vp);
2875                         cs->vp = oldvp;
2876                         return (stat);
2877                 }
2878         }
2879 
2880         /*
2881          * After various NFS checks, do a label check on the path
2882          * component. The label on this path should either be the
2883          * global zone's label or a zone's label. We are only
2884          * interested in the zone's label because exported files
2885          * in global zone is accessible (though read-only) to
2886          * clients. The exportability/visibility check is already
2887          * done before reaching this code.
2888          */
2889         if (is_system_labeled()) {
2890                 bslabel_t *clabel;
2891 
2892                 ASSERT(req->rq_label != NULL);
2893                 clabel = req->rq_label;
2894                 DTRACE_PROBE2(tx__rfs4__log__info__oplookup__clabel, char *,
2895                     "got client label from request(1)", struct svc_req *, req);
2896 
2897                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2898                         if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
2899                             cs->exi)) {
2900                                 error = EACCES;
2901                                 goto err_out;
2902                         }
2903                 } else {
2904                         /*
2905                          * We grant access to admin_low label clients
2906                          * only if the client is trusted, i.e. also
2907                          * running Solaris Trusted Extension.
2908                          */
2909                         struct sockaddr *ca;
2910                         int             addr_type;
2911                         void            *ipaddr;
2912                         tsol_tpc_t      *tp;
2913 
2914                         ca = (struct sockaddr *)svc_getrpccaller(
2915                             req->rq_xprt)->buf;
2916                         if (ca->sa_family == AF_INET) {
2917                                 addr_type = IPV4_VERSION;
2918                                 ipaddr = &((struct sockaddr_in *)ca)->sin_addr;
2919                         } else if (ca->sa_family == AF_INET6) {
2920                                 addr_type = IPV6_VERSION;
2921                                 ipaddr = &((struct sockaddr_in6 *)
2922                                     ca)->sin6_addr;
2923                         }
2924                         tp = find_tpc(ipaddr, addr_type, B_FALSE);
2925                         if (tp == NULL || tp->tpc_tp.tp_doi !=
2926                             l_admin_low->tsl_doi || tp->tpc_tp.host_type !=
2927                             SUN_CIPSO) {
2928                                 if (tp != NULL)
2929                                         TPC_RELE(tp);
2930                                 error = EACCES;
2931                                 goto err_out;
2932                         }
2933                         TPC_RELE(tp);
2934                 }
2935         }
2936 
2937         error = makefh4(&cs->fh, vp, cs->exi);
2938 
2939 err_out:
2940         if (error) {
2941                 if (is_newvp) {
2942                         VN_RELE(cs->vp);
2943                         cs->vp = oldvp;
2944                 } else
2945                         VN_RELE(vp);
2946                 return (puterrno4(error));
2947         }
2948 
2949         if (!is_newvp) {
2950                 if (cs->vp)
2951                         VN_RELE(cs->vp);
2952                 cs->vp = vp;
2953         } else if (oldvp)
2954                 VN_RELE(oldvp);
2955 
2956         /*
2957          * if did lookup on attrdir and didn't lookup .., set named
2958          * attr fh flag
2959          */
2960         if (attrdir && ! dotdot)
2961                 set_fh4_flag(&cs->fh, FH4_NAMEDATTR);
2962 
2963         /* Assume false for now, open proc will set this */
2964         cs->mandlock = FALSE;
2965 
2966         return (NFS4_OK);
2967 }
2968 
2969 /* ARGSUSED */
2970 static void
2971 rfs4_op_lookup(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2972     struct compound_state *cs)
2973 {
2974         LOOKUP4args *args = &argop->nfs_argop4_u.oplookup;
2975         LOOKUP4res *resp = &resop->nfs_resop4_u.oplookup;
2976         char *nm;
2977         uint_t len;
2978         struct sockaddr *ca;
2979         char *name = NULL;
2980         nfsstat4 status;
2981 
2982         DTRACE_NFSV4_2(op__lookup__start, struct compound_state *, cs,
2983             LOOKUP4args *, args);
2984 
2985         if (cs->vp == NULL) {
2986                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2987                 goto out;
2988         }
2989 
2990         if (cs->vp->v_type == VLNK) {
2991                 *cs->statusp = resp->status = NFS4ERR_SYMLINK;
2992                 goto out;
2993         }
2994 
2995         if (cs->vp->v_type != VDIR) {
2996                 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
2997                 goto out;
2998         }
2999 
3000         status = utf8_dir_verify(&args->objname);
3001         if (status != NFS4_OK) {
3002                 *cs->statusp = resp->status = status;
3003                 goto out;
3004         }
3005 
3006         nm = utf8_to_str(&args->objname, &len, NULL);
3007         if (nm == NULL) {
3008                 *cs->statusp = resp->status = NFS4ERR_INVAL;
3009                 goto out;
3010         }
3011 
3012         if (len > MAXNAMELEN) {
3013                 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
3014                 kmem_free(nm, len);
3015                 goto out;
3016         }
3017 
3018         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
3019         name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
3020             MAXPATHLEN  + 1);
3021 
3022         if (name == NULL) {
3023                 *cs->statusp = resp->status = NFS4ERR_INVAL;
3024                 kmem_free(nm, len);
3025                 goto out;
3026         }
3027 
3028         *cs->statusp = resp->status = do_rfs4_op_lookup(name, req, cs);
3029 
3030         if (name != nm)
3031                 kmem_free(name, MAXPATHLEN + 1);
3032         kmem_free(nm, len);
3033 
3034 out:
3035         DTRACE_NFSV4_2(op__lookup__done, struct compound_state *, cs,
3036             LOOKUP4res *, resp);
3037 }
3038 
3039 /* ARGSUSED */
3040 static void
3041 rfs4_op_lookupp(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req,
3042     struct compound_state *cs)
3043 {
3044         LOOKUPP4res *resp = &resop->nfs_resop4_u.oplookupp;
3045 
3046         DTRACE_NFSV4_1(op__lookupp__start, struct compound_state *, cs);
3047 
3048         if (cs->vp == NULL) {
3049                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3050                 goto out;
3051         }
3052 
3053         if (cs->vp->v_type != VDIR) {
3054                 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
3055                 goto out;
3056         }
3057 
3058         *cs->statusp = resp->status = do_rfs4_op_lookup("..", req, cs);
3059 
3060         /*
3061          * From NFSV4 Specification, LOOKUPP should not check for
3062          * NFS4ERR_WRONGSEC. Retrun NFS4_OK instead.
3063          */
3064         if (resp->status == NFS4ERR_WRONGSEC) {
3065                 *cs->statusp = resp->status = NFS4_OK;
3066         }
3067 
3068 out:
3069         DTRACE_NFSV4_2(op__lookupp__done, struct compound_state *, cs,
3070             LOOKUPP4res *, resp);
3071 }
3072 
3073 
3074 /*ARGSUSED2*/
3075 static void
3076 rfs4_op_openattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3077     struct compound_state *cs)
3078 {
3079         OPENATTR4args   *args = &argop->nfs_argop4_u.opopenattr;
3080         OPENATTR4res    *resp = &resop->nfs_resop4_u.opopenattr;
3081         vnode_t         *avp = NULL;
3082         int             lookup_flags = LOOKUP_XATTR, error;
3083         int             exp_ro = 0;
3084 
3085         DTRACE_NFSV4_2(op__openattr__start, struct compound_state *, cs,
3086             OPENATTR4args *, args);
3087 
3088         if (cs->vp == NULL) {
3089                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3090                 goto out;
3091         }
3092 
3093         if ((cs->vp->v_vfsp->vfs_flag & VFS_XATTR) == 0 &&
3094             !vfs_has_feature(cs->vp->v_vfsp, VFSFT_SYSATTR_VIEWS)) {
3095                 *cs->statusp = resp->status = puterrno4(ENOTSUP);
3096                 goto out;
3097         }
3098 
3099         /*
3100          * If file system supports passing ACE mask to VOP_ACCESS then
3101          * check for ACE_READ_NAMED_ATTRS, otherwise do legacy checks
3102          */
3103 
3104         if (vfs_has_feature(cs->vp->v_vfsp, VFSFT_ACEMASKONACCESS))
3105                 error = VOP_ACCESS(cs->vp, ACE_READ_NAMED_ATTRS,
3106                     V_ACE_MASK, cs->cr, NULL);
3107         else
3108                 error = ((VOP_ACCESS(cs->vp, VREAD, 0, cs->cr, NULL) != 0) &&
3109                     (VOP_ACCESS(cs->vp, VWRITE, 0, cs->cr, NULL) != 0) &&
3110                     (VOP_ACCESS(cs->vp, VEXEC, 0, cs->cr, NULL) != 0));
3111 
3112         if (error) {
3113                 *cs->statusp = resp->status = puterrno4(EACCES);
3114                 goto out;
3115         }
3116 
3117         /*
3118          * The CREATE_XATTR_DIR VOP flag cannot be specified if
3119          * the file system is exported read-only -- regardless of
3120          * createdir flag.  Otherwise the attrdir would be created
3121          * (assuming server fs isn't mounted readonly locally).  If
3122          * VOP_LOOKUP returns ENOENT in this case, the error will
3123          * be translated into EROFS.  ENOSYS is mapped to ENOTSUP
3124          * because specfs has no VOP_LOOKUP op, so the macro would
3125          * return ENOSYS.  EINVAL is returned by all (current)
3126          * Solaris file system implementations when any of their
3127          * restrictions are violated (xattr(dir) can't have xattrdir).
3128          * Returning NOTSUPP is more appropriate in this case
3129          * because the object will never be able to have an attrdir.
3130          */
3131         if (args->createdir && ! (exp_ro = rdonly4(req, cs)))
3132                 lookup_flags |= CREATE_XATTR_DIR;
3133 
3134         error = VOP_LOOKUP(cs->vp, "", &avp, NULL, lookup_flags, NULL, cs->cr,
3135             NULL, NULL, NULL);
3136 
3137         if (error) {
3138                 if (error == ENOENT && args->createdir && exp_ro)
3139                         *cs->statusp = resp->status = puterrno4(EROFS);
3140                 else if (error == EINVAL || error == ENOSYS)
3141                         *cs->statusp = resp->status = puterrno4(ENOTSUP);
3142                 else
3143                         *cs->statusp = resp->status = puterrno4(error);
3144                 goto out;
3145         }
3146 
3147         ASSERT(avp->v_flag & V_XATTRDIR);
3148 
3149         error = makefh4(&cs->fh, avp, cs->exi);
3150 
3151         if (error) {
3152                 VN_RELE(avp);
3153                 *cs->statusp = resp->status = puterrno4(error);
3154                 goto out;
3155         }
3156 
3157         VN_RELE(cs->vp);
3158         cs->vp = avp;
3159 
3160         /*
3161          * There is no requirement for an attrdir fh flag
3162          * because the attrdir has a vnode flag to distinguish
3163          * it from regular (non-xattr) directories.  The
3164          * FH4_ATTRDIR flag is set for future sanity checks.
3165          */
3166         set_fh4_flag(&cs->fh, FH4_ATTRDIR);
3167         *cs->statusp = resp->status = NFS4_OK;
3168 
3169 out:
3170         DTRACE_NFSV4_2(op__openattr__done, struct compound_state *, cs,
3171             OPENATTR4res *, resp);
3172 }
3173 
3174 static int
3175 do_io(int direction, vnode_t *vp, struct uio *uio, int ioflag, cred_t *cred,
3176     caller_context_t *ct)
3177 {
3178         int error;
3179         int i;
3180         clock_t delaytime;
3181 
3182         delaytime = MSEC_TO_TICK_ROUNDUP(rfs4_lock_delay);
3183 
3184         /*
3185          * Don't block on mandatory locks. If this routine returns
3186          * EAGAIN, the caller should return NFS4ERR_LOCKED.
3187          */
3188         uio->uio_fmode = FNONBLOCK;
3189 
3190         for (i = 0; i < rfs4_maxlock_tries; i++) {
3191 
3192 
3193                 if (direction == FREAD) {
3194                         (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, ct);
3195                         error = VOP_READ(vp, uio, ioflag, cred, ct);
3196                         VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, ct);
3197                 } else {
3198                         (void) VOP_RWLOCK(vp, V_WRITELOCK_TRUE, ct);
3199                         error = VOP_WRITE(vp, uio, ioflag, cred, ct);
3200                         VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, ct);
3201                 }
3202 
3203                 if (error != EAGAIN)
3204                         break;
3205 
3206                 if (i < rfs4_maxlock_tries - 1) {
3207                         delay(delaytime);
3208                         delaytime *= 2;
3209                 }
3210         }
3211 
3212         return (error);
3213 }
3214 
3215 /* ARGSUSED */
3216 static void
3217 rfs4_op_read(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3218     struct compound_state *cs)
3219 {
3220         READ4args *args = &argop->nfs_argop4_u.opread;
3221         READ4res *resp = &resop->nfs_resop4_u.opread;
3222         int error;
3223         int verror;
3224         vnode_t *vp;
3225         struct vattr va;
3226         struct iovec iov, *iovp = NULL;
3227         int iovcnt;
3228         struct uio uio;
3229         u_offset_t offset;
3230         bool_t *deleg = &cs->deleg;
3231         nfsstat4 stat;
3232         int in_crit = 0;
3233         mblk_t *mp = NULL;
3234         int alloc_err = 0;
3235         int rdma_used = 0;
3236         int loaned_buffers;
3237         caller_context_t ct;
3238         struct uio *uiop;
3239 
3240         DTRACE_NFSV4_2(op__read__start, struct compound_state *, cs,
3241             READ4args, args);
3242 
3243         vp = cs->vp;
3244         if (vp == NULL) {
3245                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3246                 goto out;
3247         }
3248         if (cs->access == CS_ACCESS_DENIED) {
3249                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
3250                 goto out;
3251         }
3252 
3253         if ((stat = rfs4_check_stateid(FREAD, vp, &args->stateid, FALSE,
3254             deleg, TRUE, &ct)) != NFS4_OK) {
3255                 *cs->statusp = resp->status = stat;
3256                 goto out;
3257         }
3258 
3259         /*
3260          * Enter the critical region before calling VOP_RWLOCK
3261          * to avoid a deadlock with write requests.
3262          */
3263         if (nbl_need_check(vp)) {
3264                 nbl_start_crit(vp, RW_READER);
3265                 in_crit = 1;
3266                 if (nbl_conflict(vp, NBL_READ, args->offset, args->count, 0,
3267                     &ct)) {
3268                         *cs->statusp = resp->status = NFS4ERR_LOCKED;
3269                         goto out;
3270                 }
3271         }
3272 
3273         if (args->wlist) {
3274                 if (args->count > clist_len(args->wlist)) {
3275                         *cs->statusp = resp->status = NFS4ERR_INVAL;
3276                         goto out;
3277                 }
3278                 rdma_used = 1;
3279         }
3280 
3281         /* use loaned buffers for TCP */
3282         loaned_buffers = (nfs_loaned_buffers && !rdma_used) ? 1 : 0;
3283 
3284         va.va_mask = AT_MODE|AT_SIZE|AT_UID;
3285         verror = VOP_GETATTR(vp, &va, 0, cs->cr, &ct);
3286 
3287         /*
3288          * If we can't get the attributes, then we can't do the
3289          * right access checking.  So, we'll fail the request.
3290          */
3291         if (verror) {
3292                 *cs->statusp = resp->status = puterrno4(verror);
3293                 goto out;
3294         }
3295 
3296         if (vp->v_type != VREG) {
3297                 *cs->statusp = resp->status =
3298                     ((vp->v_type == VDIR) ? NFS4ERR_ISDIR : NFS4ERR_INVAL);
3299                 goto out;
3300         }
3301 
3302         if (crgetuid(cs->cr) != va.va_uid &&
3303             (error = VOP_ACCESS(vp, VREAD, 0, cs->cr, &ct)) &&
3304             (error = VOP_ACCESS(vp, VEXEC, 0, cs->cr, &ct))) {
3305                 *cs->statusp = resp->status = puterrno4(error);
3306                 goto out;
3307         }
3308 
3309         if (MANDLOCK(vp, va.va_mode)) { /* XXX - V4 supports mand locking */
3310                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
3311                 goto out;
3312         }
3313 
3314         offset = args->offset;
3315         if (offset >= va.va_size) {
3316                 *cs->statusp = resp->status = NFS4_OK;
3317                 resp->eof = TRUE;
3318                 resp->data_len = 0;
3319                 resp->data_val = NULL;
3320                 resp->mblk = NULL;
3321                 /* RDMA */
3322                 resp->wlist = args->wlist;
3323                 resp->wlist_len = resp->data_len;
3324                 *cs->statusp = resp->status = NFS4_OK;
3325                 if (resp->wlist)
3326                         clist_zero_len(resp->wlist);
3327                 goto out;
3328         }
3329 
3330         if (args->count == 0) {
3331                 *cs->statusp = resp->status = NFS4_OK;
3332                 resp->eof = FALSE;
3333                 resp->data_len = 0;
3334                 resp->data_val = NULL;
3335                 resp->mblk = NULL;
3336                 /* RDMA */
3337                 resp->wlist = args->wlist;
3338                 resp->wlist_len = resp->data_len;
3339                 if (resp->wlist)
3340                         clist_zero_len(resp->wlist);
3341                 goto out;
3342         }
3343 
3344         /*
3345          * Do not allocate memory more than maximum allowed
3346          * transfer size
3347          */
3348         if (args->count > rfs4_tsize(req))
3349                 args->count = rfs4_tsize(req);
3350 
3351         if (loaned_buffers) {
3352                 uiop = (uio_t *)rfs_setup_xuio(vp);
3353                 ASSERT(uiop != NULL);
3354                 uiop->uio_segflg = UIO_SYSSPACE;
3355                 uiop->uio_loffset = args->offset;
3356                 uiop->uio_resid = args->count;
3357 
3358                 /* Jump to do the read if successful */
3359                 if (!VOP_REQZCBUF(vp, UIO_READ, (xuio_t *)uiop, cs->cr, &ct)) {
3360                         /*
3361                          * Need to hold the vnode until after VOP_RETZCBUF()
3362                          * is called.
3363                          */
3364                         VN_HOLD(vp);
3365                         goto doio_read;
3366                 }
3367 
3368                 DTRACE_PROBE2(nfss__i__reqzcbuf_failed, int,
3369                     uiop->uio_loffset, int, uiop->uio_resid);
3370 
3371                 uiop->uio_extflg = 0;
3372 
3373                 /* failure to setup for zero copy */
3374                 rfs_free_xuio((void *)uiop);
3375                 loaned_buffers = 0;
3376         }
3377 
3378         /*
3379          * If returning data via RDMA Write, then grab the chunk list. If we
3380          * aren't returning READ data w/RDMA_WRITE, then grab a mblk.
3381          */
3382         if (rdma_used) {
3383                 mp = NULL;
3384                 (void) rdma_get_wchunk(req, &iov, args->wlist);
3385                 uio.uio_iov = &iov;
3386                 uio.uio_iovcnt = 1;
3387         } else {
3388                 /*
3389                  * mp will contain the data to be sent out in the read reply.
3390                  * It will be freed after the reply has been sent.
3391                  */
3392                 mp = rfs_read_alloc(args->count, &iovp, &iovcnt);
3393                 ASSERT(mp != NULL);
3394                 ASSERT(alloc_err == 0);
3395                 uio.uio_iov = iovp;
3396                 uio.uio_iovcnt = iovcnt;
3397         }
3398 
3399         uio.uio_segflg = UIO_SYSSPACE;
3400         uio.uio_extflg = UIO_COPY_CACHED;
3401         uio.uio_loffset = args->offset;
3402         uio.uio_resid = args->count;
3403         uiop = &uio;
3404 
3405 doio_read:
3406         error = do_io(FREAD, vp, uiop, 0, cs->cr, &ct);
3407 
3408         va.va_mask = AT_SIZE;
3409         verror = VOP_GETATTR(vp, &va, 0, cs->cr, &ct);
3410 
3411         if (error) {
3412                 if (mp)
3413                         freemsg(mp);
3414                 *cs->statusp = resp->status = puterrno4(error);
3415                 goto out;
3416         }
3417 
3418         /* make mblk using zc buffers */
3419         if (loaned_buffers) {
3420                 mp = uio_to_mblk(uiop);
3421                 ASSERT(mp != NULL);
3422         }
3423 
3424         *cs->statusp = resp->status = NFS4_OK;
3425 
3426         ASSERT(uiop->uio_resid >= 0);
3427         resp->data_len = args->count - uiop->uio_resid;
3428         if (mp) {
3429                 resp->data_val = (char *)mp->b_datap->db_base;
3430                 rfs_rndup_mblks(mp, resp->data_len, loaned_buffers);
3431         } else {
3432                 resp->data_val = (caddr_t)iov.iov_base;
3433         }
3434 
3435         resp->mblk = mp;
3436 
3437         if (!verror && offset + resp->data_len == va.va_size)
3438                 resp->eof = TRUE;
3439         else
3440                 resp->eof = FALSE;
3441 
3442         if (rdma_used) {
3443                 if (!rdma_setup_read_data4(args, resp)) {
3444                         *cs->statusp = resp->status = NFS4ERR_INVAL;
3445                 }
3446         } else {
3447                 resp->wlist = NULL;
3448         }
3449 
3450 out:
3451         if (in_crit)
3452                 nbl_end_crit(vp);
3453 
3454         if (iovp != NULL)
3455                 kmem_free(iovp, iovcnt * sizeof (struct iovec));
3456 
3457         DTRACE_NFSV4_2(op__read__done, struct compound_state *, cs,
3458             READ4res *, resp);
3459 }
3460 
3461 static void
3462 rfs4_op_read_free(nfs_resop4 *resop)
3463 {
3464         READ4res        *resp = &resop->nfs_resop4_u.opread;
3465 
3466         if (resp->status == NFS4_OK && resp->mblk != NULL) {
3467                 freemsg(resp->mblk);
3468                 resp->mblk = NULL;
3469                 resp->data_val = NULL;
3470                 resp->data_len = 0;
3471         }
3472 }
3473 
3474 static void
3475 rfs4_op_readdir_free(nfs_resop4 * resop)
3476 {
3477         READDIR4res    *resp = &resop->nfs_resop4_u.opreaddir;
3478 
3479         if (resp->status == NFS4_OK && resp->mblk != NULL) {
3480                 freeb(resp->mblk);
3481                 resp->mblk = NULL;
3482                 resp->data_len = 0;
3483         }
3484 }
3485 
3486 
3487 /* ARGSUSED */
3488 static void
3489 rfs4_op_putpubfh(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req,
3490     struct compound_state *cs)
3491 {
3492         PUTPUBFH4res    *resp = &resop->nfs_resop4_u.opputpubfh;
3493         int             error;
3494         vnode_t         *vp;
3495         struct exportinfo *exi, *sav_exi;
3496         nfs_fh4_fmt_t   *fh_fmtp;
3497         nfs_export_t *ne = nfs_get_export();
3498 
3499         DTRACE_NFSV4_1(op__putpubfh__start, struct compound_state *, cs);
3500 
3501         if (cs->vp) {
3502                 VN_RELE(cs->vp);
3503                 cs->vp = NULL;
3504         }
3505 
3506         if (cs->cr)
3507                 crfree(cs->cr);
3508 
3509         cs->cr = crdup(cs->basecr);
3510 
3511         vp = ne->exi_public->exi_vp;
3512         if (vp == NULL) {
3513                 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
3514                 goto out;
3515         }
3516 
3517         error = makefh4(&cs->fh, vp, ne->exi_public);
3518         if (error != 0) {
3519                 *cs->statusp = resp->status = puterrno4(error);
3520                 goto out;
3521         }
3522         sav_exi = cs->exi;
3523         if (ne->exi_public == ne->exi_root) {
3524                 /*
3525                  * No filesystem is actually shared public, so we default
3526                  * to exi_root. In this case, we must check whether root
3527                  * is exported.
3528                  */
3529                 fh_fmtp = (nfs_fh4_fmt_t *)cs->fh.nfs_fh4_val;
3530 
3531                 /*
3532                  * if root filesystem is exported, the exportinfo struct that we
3533                  * should use is what checkexport4 returns, because root_exi is
3534                  * actually a mostly empty struct.
3535                  */
3536                 exi = checkexport4(&fh_fmtp->fh4_fsid,
3537                     (fid_t *)&fh_fmtp->fh4_xlen, NULL);
3538                 cs->exi = ((exi != NULL) ? exi : ne->exi_public);
3539         } else {
3540                 /*
3541                  * it's a properly shared filesystem
3542                  */
3543                 cs->exi = ne->exi_public;
3544         }
3545 
3546         if (is_system_labeled()) {
3547                 bslabel_t *clabel;
3548 
3549                 ASSERT(req->rq_label != NULL);
3550                 clabel = req->rq_label;
3551                 DTRACE_PROBE2(tx__rfs4__log__info__opputpubfh__clabel, char *,
3552                     "got client label from request(1)",
3553                     struct svc_req *, req);
3554                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3555                         if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3556                             cs->exi)) {
3557                                 *cs->statusp = resp->status =
3558                                     NFS4ERR_SERVERFAULT;
3559                                 goto out;
3560                         }
3561                 }
3562         }
3563 
3564         VN_HOLD(vp);
3565         cs->vp = vp;
3566 
3567         if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) {
3568                 VN_RELE(cs->vp);
3569                 cs->vp = NULL;
3570                 cs->exi = sav_exi;
3571                 goto out;
3572         }
3573 
3574         *cs->statusp = resp->status = NFS4_OK;
3575 out:
3576         DTRACE_NFSV4_2(op__putpubfh__done, struct compound_state *, cs,
3577             PUTPUBFH4res *, resp);
3578 }
3579 
3580 /*
3581  * XXX - issue with put*fh operations. Suppose /export/home is exported.
3582  * Suppose an NFS client goes to mount /export/home/joe. If /export, home,
3583  * or joe have restrictive search permissions, then we shouldn't let
3584  * the client get a file handle. This is easy to enforce. However, we
3585  * don't know what security flavor should be used until we resolve the
3586  * path name. Another complication is uid mapping. If root is
3587  * the user, then it will be mapped to the anonymous user by default,
3588  * but we won't know that till we've resolved the path name. And we won't
3589  * know what the anonymous user is.
3590  * Luckily, SECINFO is specified to take a full filename.
3591  * So what we will have to in rfs4_op_lookup is check that flavor of
3592  * the target object matches that of the request, and if root was the
3593  * caller, check for the root= and anon= options, and if necessary,
3594  * repeat the lookup using the right cred_t. But that's not done yet.
3595  */
3596 /* ARGSUSED */
3597 static void
3598 rfs4_op_putfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3599     struct compound_state *cs)
3600 {
3601         PUTFH4args *args = &argop->nfs_argop4_u.opputfh;
3602         PUTFH4res *resp = &resop->nfs_resop4_u.opputfh;
3603         nfs_fh4_fmt_t *fh_fmtp;
3604 
3605         DTRACE_NFSV4_2(op__putfh__start, struct compound_state *, cs,
3606             PUTFH4args *, args);
3607 
3608         if (cs->vp) {
3609                 VN_RELE(cs->vp);
3610                 cs->vp = NULL;
3611         }
3612 
3613         if (cs->cr) {
3614                 crfree(cs->cr);
3615                 cs->cr = NULL;
3616         }
3617 
3618 
3619         if (args->object.nfs_fh4_len < NFS_FH4_LEN) {
3620                 *cs->statusp = resp->status = NFS4ERR_BADHANDLE;
3621                 goto out;
3622         }
3623 
3624         fh_fmtp = (nfs_fh4_fmt_t *)args->object.nfs_fh4_val;
3625         cs->exi = checkexport4(&fh_fmtp->fh4_fsid, (fid_t *)&fh_fmtp->fh4_xlen,
3626             NULL);
3627 
3628         if (cs->exi == NULL) {
3629                 *cs->statusp = resp->status = NFS4ERR_STALE;
3630                 goto out;
3631         }
3632 
3633         cs->cr = crdup(cs->basecr);
3634 
3635         ASSERT(cs->cr != NULL);
3636 
3637         if (! (cs->vp = nfs4_fhtovp(&args->object, cs->exi, &resp->status))) {
3638                 *cs->statusp = resp->status;
3639                 goto out;
3640         }
3641 
3642         if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) {
3643                 VN_RELE(cs->vp);
3644                 cs->vp = NULL;
3645                 goto out;
3646         }
3647 
3648         nfs_fh4_copy(&args->object, &cs->fh);
3649         *cs->statusp = resp->status = NFS4_OK;
3650         cs->deleg = FALSE;
3651 
3652 out:
3653         DTRACE_NFSV4_2(op__putfh__done, struct compound_state *, cs,
3654             PUTFH4res *, resp);
3655 }
3656 
3657 /* ARGSUSED */
3658 static void
3659 rfs4_op_putrootfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3660     struct compound_state *cs)
3661 {
3662         PUTROOTFH4res *resp = &resop->nfs_resop4_u.opputrootfh;
3663         int error;
3664         fid_t fid;
3665         struct exportinfo *exi, *sav_exi;
3666 
3667         DTRACE_NFSV4_1(op__putrootfh__start, struct compound_state *, cs);
3668 
3669         if (cs->vp) {
3670                 VN_RELE(cs->vp);
3671                 cs->vp = NULL;
3672         }
3673 
3674         if (cs->cr)
3675                 crfree(cs->cr);
3676 
3677         cs->cr = crdup(cs->basecr);
3678 
3679         /*
3680          * Using rootdir, the system root vnode,
3681          * get its fid.
3682          */
3683         bzero(&fid, sizeof (fid));
3684         fid.fid_len = MAXFIDSZ;
3685         error = vop_fid_pseudo(ZONE_ROOTVP(), &fid);
3686         if (error != 0) {
3687                 *cs->statusp = resp->status = puterrno4(error);
3688                 goto out;
3689         }
3690 
3691         /*
3692          * Then use the root fsid & fid it to find out if it's exported
3693          *
3694          * If the server root isn't exported directly, then
3695          * it should at least be a pseudo export based on
3696          * one or more exports further down in the server's
3697          * file tree.
3698          */
3699         exi = checkexport4(&ZONE_ROOTVP()->v_vfsp->vfs_fsid, &fid, NULL);
3700         if (exi == NULL || exi->exi_export.ex_flags & EX_PUBLIC) {
3701                 NFS4_DEBUG(rfs4_debug,
3702                     (CE_WARN, "rfs4_op_putrootfh: export check failure"));
3703                 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
3704                 goto out;
3705         }
3706 
3707         /*
3708          * Now make a filehandle based on the root
3709          * export and root vnode.
3710          */
3711         error = makefh4(&cs->fh, ZONE_ROOTVP(), exi);
3712         if (error != 0) {
3713                 *cs->statusp = resp->status = puterrno4(error);
3714                 goto out;
3715         }
3716 
3717         sav_exi = cs->exi;
3718         cs->exi = exi;
3719 
3720         VN_HOLD(ZONE_ROOTVP());
3721         cs->vp = ZONE_ROOTVP();
3722 
3723         if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) {
3724                 VN_RELE(cs->vp);
3725                 cs->vp = NULL;
3726                 cs->exi = sav_exi;
3727                 goto out;
3728         }
3729 
3730         *cs->statusp = resp->status = NFS4_OK;
3731         cs->deleg = FALSE;
3732 out:
3733         DTRACE_NFSV4_2(op__putrootfh__done, struct compound_state *, cs,
3734             PUTROOTFH4res *, resp);
3735 }
3736 
3737 /*
3738  * readlink: args: CURRENT_FH.
3739  *      res: status. If success - CURRENT_FH unchanged, return linktext.
3740  */
3741 
3742 /* ARGSUSED */
3743 static void
3744 rfs4_op_readlink(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3745     struct compound_state *cs)
3746 {
3747         READLINK4res *resp = &resop->nfs_resop4_u.opreadlink;
3748         int error;
3749         vnode_t *vp;
3750         struct iovec iov;
3751         struct vattr va;
3752         struct uio uio;
3753         char *data;
3754         struct sockaddr *ca;
3755         char *name = NULL;
3756         int is_referral;
3757 
3758         DTRACE_NFSV4_1(op__readlink__start, struct compound_state *, cs);
3759 
3760         /* CURRENT_FH: directory */
3761         vp = cs->vp;
3762         if (vp == NULL) {
3763                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3764                 goto out;
3765         }
3766 
3767         if (cs->access == CS_ACCESS_DENIED) {
3768                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
3769                 goto out;
3770         }
3771 
3772         /* Is it a referral? */
3773         if (vn_is_nfs_reparse(vp, cs->cr) && client_is_downrev(req)) {
3774 
3775                 is_referral = 1;
3776 
3777         } else {
3778 
3779                 is_referral = 0;
3780 
3781                 if (vp->v_type == VDIR) {
3782                         *cs->statusp = resp->status = NFS4ERR_ISDIR;
3783                         goto out;
3784                 }
3785 
3786                 if (vp->v_type != VLNK) {
3787                         *cs->statusp = resp->status = NFS4ERR_INVAL;
3788                         goto out;
3789                 }
3790 
3791         }
3792 
3793         va.va_mask = AT_MODE;
3794         error = VOP_GETATTR(vp, &va, 0, cs->cr, NULL);
3795         if (error) {
3796                 *cs->statusp = resp->status = puterrno4(error);
3797                 goto out;
3798         }
3799 
3800         if (MANDLOCK(vp, va.va_mode)) {
3801                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
3802                 goto out;
3803         }
3804 
3805         data = kmem_alloc(MAXPATHLEN + 1, KM_SLEEP);
3806 
3807         if (is_referral) {
3808                 char *s;
3809                 size_t strsz;
3810 
3811                 /* Get an artificial symlink based on a referral */
3812                 s = build_symlink(vp, cs->cr, &strsz);
3813                 global_svstat_ptr[4][NFS_REFERLINKS].value.ui64++;
3814                 DTRACE_PROBE2(nfs4serv__func__referral__reflink,
3815                     vnode_t *, vp, char *, s);
3816                 if (s == NULL)
3817                         error = EINVAL;
3818                 else {
3819                         error = 0;
3820                         (void) strlcpy(data, s, MAXPATHLEN + 1);
3821                         kmem_free(s, strsz);
3822                 }
3823 
3824         } else {
3825 
3826                 iov.iov_base = data;
3827                 iov.iov_len = MAXPATHLEN;
3828                 uio.uio_iov = &iov;
3829                 uio.uio_iovcnt = 1;
3830                 uio.uio_segflg = UIO_SYSSPACE;
3831                 uio.uio_extflg = UIO_COPY_CACHED;
3832                 uio.uio_loffset = 0;
3833                 uio.uio_resid = MAXPATHLEN;
3834 
3835                 error = VOP_READLINK(vp, &uio, cs->cr, NULL);
3836 
3837                 if (!error)
3838                         *(data + MAXPATHLEN - uio.uio_resid) = '\0';
3839         }
3840 
3841         if (error) {
3842                 kmem_free((caddr_t)data, (uint_t)MAXPATHLEN + 1);
3843                 *cs->statusp = resp->status = puterrno4(error);
3844                 goto out;
3845         }
3846 
3847         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
3848         name = nfscmd_convname(ca, cs->exi, data, NFSCMD_CONV_OUTBOUND,
3849             MAXPATHLEN  + 1);
3850 
3851         if (name == NULL) {
3852                 /*
3853                  * Even though the conversion failed, we return
3854                  * something. We just don't translate it.
3855                  */
3856                 name = data;
3857         }
3858 
3859         /*
3860          * treat link name as data
3861          */
3862         (void) str_to_utf8(name, (utf8string *)&resp->link);
3863 
3864         if (name != data)
3865                 kmem_free(name, MAXPATHLEN + 1);
3866         kmem_free((caddr_t)data, (uint_t)MAXPATHLEN + 1);
3867         *cs->statusp = resp->status = NFS4_OK;
3868 
3869 out:
3870         DTRACE_NFSV4_2(op__readlink__done, struct compound_state *, cs,
3871             READLINK4res *, resp);
3872 }
3873 
3874 static void
3875 rfs4_op_readlink_free(nfs_resop4 *resop)
3876 {
3877         READLINK4res *resp = &resop->nfs_resop4_u.opreadlink;
3878         utf8string *symlink = (utf8string *)&resp->link;
3879 
3880         if (symlink->utf8string_val) {
3881                 UTF8STRING_FREE(*symlink)
3882         }
3883 }
3884 
3885 /*
3886  * release_lockowner:
3887  *      Release any state associated with the supplied
3888  *      lockowner. Note if any lo_state is holding locks we will not
3889  *      rele that lo_state and thus the lockowner will not be destroyed.
3890  *      A client using lock after the lock owner stateid has been released
3891  *      will suffer the consequence of NFS4ERR_BAD_STATEID and would have
3892  *      to reissue the lock with new_lock_owner set to TRUE.
3893  *      args: lock_owner
3894  *      res:  status
3895  */
3896 /* ARGSUSED */
3897 static void
3898 rfs4_op_release_lockowner(nfs_argop4 *argop, nfs_resop4 *resop,
3899     struct svc_req *req, struct compound_state *cs)
3900 {
3901         RELEASE_LOCKOWNER4args *ap = &argop->nfs_argop4_u.oprelease_lockowner;
3902         RELEASE_LOCKOWNER4res *resp = &resop->nfs_resop4_u.oprelease_lockowner;
3903         rfs4_lockowner_t *lo;
3904         rfs4_openowner_t *oo;
3905         rfs4_state_t *sp;
3906         rfs4_lo_state_t *lsp;
3907         rfs4_client_t *cp;
3908         bool_t create = FALSE;
3909         locklist_t *llist;
3910         sysid_t sysid;
3911 
3912         DTRACE_NFSV4_2(op__release__lockowner__start, struct compound_state *,
3913             cs, RELEASE_LOCKOWNER4args *, ap);
3914 
3915         /* Make sure there is a clientid around for this request */
3916         cp = rfs4_findclient_by_id(ap->lock_owner.clientid, FALSE);
3917 
3918         if (cp == NULL) {
3919                 *cs->statusp = resp->status =
3920                     rfs4_check_clientid(&ap->lock_owner.clientid, 0);
3921                 goto out;
3922         }
3923         rfs4_client_rele(cp);
3924 
3925         lo = rfs4_findlockowner(&ap->lock_owner, &create);
3926         if (lo == NULL) {
3927                 *cs->statusp = resp->status = NFS4_OK;
3928                 goto out;
3929         }
3930         ASSERT(lo->rl_client != NULL);
3931 
3932         /*
3933          * Check for EXPIRED client. If so will reap state with in a lease
3934          * period or on next set_clientid_confirm step
3935          */
3936         if (rfs4_lease_expired(lo->rl_client)) {
3937                 rfs4_lockowner_rele(lo);
3938                 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
3939                 goto out;
3940         }
3941 
3942         /*
3943          * If no sysid has been assigned, then no locks exist; just return.
3944          */
3945         rfs4_dbe_lock(lo->rl_client->rc_dbe);
3946         if (lo->rl_client->rc_sysidt == LM_NOSYSID) {
3947                 rfs4_lockowner_rele(lo);
3948                 rfs4_dbe_unlock(lo->rl_client->rc_dbe);
3949                 goto out;
3950         }
3951 
3952         sysid = lo->rl_client->rc_sysidt;
3953         rfs4_dbe_unlock(lo->rl_client->rc_dbe);
3954 
3955         /*
3956          * Mark the lockowner invalid.
3957          */
3958         rfs4_dbe_hide(lo->rl_dbe);
3959 
3960         /*
3961          * sysid-pid pair should now not be used since the lockowner is
3962          * invalid. If the client were to instantiate the lockowner again
3963          * it would be assigned a new pid. Thus we can get the list of
3964          * current locks.
3965          */
3966 
3967         llist = flk_get_active_locks(sysid, lo->rl_pid);
3968         /* If we are still holding locks fail */
3969         if (llist != NULL) {
3970 
3971                 *cs->statusp = resp->status = NFS4ERR_LOCKS_HELD;
3972 
3973                 flk_free_locklist(llist);
3974                 /*
3975                  * We need to unhide the lockowner so the client can
3976                  * try it again. The bad thing here is if the client
3977                  * has a logic error that took it here in the first place
3978                  * they probably have lost accounting of the locks that it
3979                  * is holding. So we may have dangling state until the
3980                  * open owner state is reaped via close. One scenario
3981                  * that could possibly occur is that the client has
3982                  * sent the unlock request(s) in separate threads
3983                  * and has not waited for the replies before sending the
3984                  * RELEASE_LOCKOWNER request. Presumably, it would expect
3985                  * and deal appropriately with NFS4ERR_LOCKS_HELD, by
3986                  * reissuing the request.
3987                  */
3988                 rfs4_dbe_unhide(lo->rl_dbe);
3989                 rfs4_lockowner_rele(lo);
3990                 goto out;
3991         }
3992 
3993         /*
3994          * For the corresponding client we need to check each open
3995          * owner for any opens that have lockowner state associated
3996          * with this lockowner.
3997          */
3998 
3999         rfs4_dbe_lock(lo->rl_client->rc_dbe);
4000         for (oo = list_head(&lo->rl_client->rc_openownerlist); oo != NULL;
4001             oo = list_next(&lo->rl_client->rc_openownerlist, oo)) {
4002 
4003                 rfs4_dbe_lock(oo->ro_dbe);
4004                 for (sp = list_head(&oo->ro_statelist); sp != NULL;
4005                     sp = list_next(&oo->ro_statelist, sp)) {
4006 
4007                         rfs4_dbe_lock(sp->rs_dbe);
4008                         for (lsp = list_head(&sp->rs_lostatelist);
4009                             lsp != NULL;
4010                             lsp = list_next(&sp->rs_lostatelist, lsp)) {
4011                                 if (lsp->rls_locker == lo) {
4012                                         rfs4_dbe_lock(lsp->rls_dbe);
4013                                         rfs4_dbe_invalidate(lsp->rls_dbe);
4014                                         rfs4_dbe_unlock(lsp->rls_dbe);
4015                                 }
4016                         }
4017                         rfs4_dbe_unlock(sp->rs_dbe);
4018                 }
4019                 rfs4_dbe_unlock(oo->ro_dbe);
4020         }
4021         rfs4_dbe_unlock(lo->rl_client->rc_dbe);
4022 
4023         rfs4_lockowner_rele(lo);
4024 
4025         *cs->statusp = resp->status = NFS4_OK;
4026 
4027 out:
4028         DTRACE_NFSV4_2(op__release__lockowner__done, struct compound_state *,
4029             cs, RELEASE_LOCKOWNER4res *, resp);
4030 }
4031 
4032 /*
4033  * short utility function to lookup a file and recall the delegation
4034  */
4035 static rfs4_file_t *
4036 rfs4_lookup_and_findfile(vnode_t *dvp, char *nm, vnode_t **vpp,
4037     int *lkup_error, cred_t *cr)
4038 {
4039         vnode_t *vp;
4040         rfs4_file_t *fp = NULL;
4041         bool_t fcreate = FALSE;
4042         int error;
4043 
4044         if (vpp)
4045                 *vpp = NULL;
4046 
4047         if ((error = VOP_LOOKUP(dvp, nm, &vp, NULL, 0, NULL, cr, NULL, NULL,
4048             NULL)) == 0) {
4049                 if (vp->v_type == VREG)
4050                         fp = rfs4_findfile(vp, NULL, &fcreate);
4051                 if (vpp)
4052                         *vpp = vp;
4053                 else
4054                         VN_RELE(vp);
4055         }
4056 
4057         if (lkup_error)
4058                 *lkup_error = error;
4059 
4060         return (fp);
4061 }
4062 
4063 /*
4064  * remove: args: CURRENT_FH: directory; name.
4065  *      res: status. If success - CURRENT_FH unchanged, return change_info
4066  *              for directory.
4067  */
4068 /* ARGSUSED */
4069 static void
4070 rfs4_op_remove(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4071     struct compound_state *cs)
4072 {
4073         REMOVE4args *args = &argop->nfs_argop4_u.opremove;
4074         REMOVE4res *resp = &resop->nfs_resop4_u.opremove;
4075         int error;
4076         vnode_t *dvp, *vp;
4077         struct vattr bdva, idva, adva;
4078         char *nm;
4079         uint_t len;
4080         rfs4_file_t *fp;
4081         int in_crit = 0;
4082         bslabel_t *clabel;
4083         struct sockaddr *ca;
4084         char *name = NULL;
4085         nfsstat4 status;
4086 
4087         DTRACE_NFSV4_2(op__remove__start, struct compound_state *, cs,
4088             REMOVE4args *, args);
4089 
4090         /* CURRENT_FH: directory */
4091         dvp = cs->vp;
4092         if (dvp == NULL) {
4093                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4094                 goto out;
4095         }
4096 
4097         if (cs->access == CS_ACCESS_DENIED) {
4098                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4099                 goto out;
4100         }
4101 
4102         /*
4103          * If there is an unshared filesystem mounted on this vnode,
4104          * Do not allow to remove anything in this directory.
4105          */
4106         if (vn_ismntpt(dvp)) {
4107                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4108                 goto out;
4109         }
4110 
4111         if (dvp->v_type != VDIR) {
4112                 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
4113                 goto out;
4114         }
4115 
4116         status = utf8_dir_verify(&args->target);
4117         if (status != NFS4_OK) {
4118                 *cs->statusp = resp->status = status;
4119                 goto out;
4120         }
4121 
4122         /*
4123          * Lookup the file so that we can check if it's a directory
4124          */
4125         nm = utf8_to_fn(&args->target, &len, NULL);
4126         if (nm == NULL) {
4127                 *cs->statusp = resp->status = NFS4ERR_INVAL;
4128                 goto out;
4129         }
4130 
4131         if (len > MAXNAMELEN) {
4132                 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
4133                 kmem_free(nm, len);
4134                 goto out;
4135         }
4136 
4137         if (rdonly4(req, cs)) {
4138                 *cs->statusp = resp->status = NFS4ERR_ROFS;
4139                 kmem_free(nm, len);
4140                 goto out;
4141         }
4142 
4143         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
4144         name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
4145             MAXPATHLEN  + 1);
4146 
4147         if (name == NULL) {
4148                 *cs->statusp = resp->status = NFS4ERR_INVAL;
4149                 kmem_free(nm, len);
4150                 goto out;
4151         }
4152 
4153         /*
4154          * Lookup the file to determine type and while we are see if
4155          * there is a file struct around and check for delegation.
4156          * We don't need to acquire va_seq before this lookup, if
4157          * it causes an update, cinfo.before will not match, which will
4158          * trigger a cache flush even if atomic is TRUE.
4159          */
4160         if (fp = rfs4_lookup_and_findfile(dvp, name, &vp, &error, cs->cr)) {
4161                 if (rfs4_check_delegated_byfp(FWRITE, fp, TRUE, TRUE, TRUE,
4162                     NULL)) {
4163                         VN_RELE(vp);
4164                         rfs4_file_rele(fp);
4165                         *cs->statusp = resp->status = NFS4ERR_DELAY;
4166                         if (nm != name)
4167                                 kmem_free(name, MAXPATHLEN + 1);
4168                         kmem_free(nm, len);
4169                         goto out;
4170                 }
4171         }
4172 
4173         /* Didn't find anything to remove */
4174         if (vp == NULL) {
4175                 *cs->statusp = resp->status = error;
4176                 if (nm != name)
4177                         kmem_free(name, MAXPATHLEN + 1);
4178                 kmem_free(nm, len);
4179                 goto out;
4180         }
4181 
4182         if (nbl_need_check(vp)) {
4183                 nbl_start_crit(vp, RW_READER);
4184                 in_crit = 1;
4185                 if (nbl_conflict(vp, NBL_REMOVE, 0, 0, 0, NULL)) {
4186                         *cs->statusp = resp->status = NFS4ERR_FILE_OPEN;
4187                         if (nm != name)
4188                                 kmem_free(name, MAXPATHLEN + 1);
4189                         kmem_free(nm, len);
4190                         nbl_end_crit(vp);
4191                         VN_RELE(vp);
4192                         if (fp) {
4193                                 rfs4_clear_dont_grant(fp);
4194                                 rfs4_file_rele(fp);
4195                         }
4196                         goto out;
4197                 }
4198         }
4199 
4200         /* check label before allowing removal */
4201         if (is_system_labeled()) {
4202                 ASSERT(req->rq_label != NULL);
4203                 clabel = req->rq_label;
4204                 DTRACE_PROBE2(tx__rfs4__log__info__opremove__clabel, char *,
4205                     "got client label from request(1)",
4206                     struct svc_req *, req);
4207                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
4208                         if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
4209                             cs->exi)) {
4210                                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4211                                 if (name != nm)
4212                                         kmem_free(name, MAXPATHLEN + 1);
4213                                 kmem_free(nm, len);
4214                                 if (in_crit)
4215                                         nbl_end_crit(vp);
4216                                 VN_RELE(vp);
4217                                 if (fp) {
4218                                         rfs4_clear_dont_grant(fp);
4219                                         rfs4_file_rele(fp);
4220                                 }
4221                                 goto out;
4222                         }
4223                 }
4224         }
4225 
4226         /* Get dir "before" change value */
4227         bdva.va_mask = AT_CTIME|AT_SEQ;
4228         error = VOP_GETATTR(dvp, &bdva, 0, cs->cr, NULL);
4229         if (error) {
4230                 *cs->statusp = resp->status = puterrno4(error);
4231                 if (nm != name)
4232                         kmem_free(name, MAXPATHLEN + 1);
4233                 kmem_free(nm, len);
4234                 if (in_crit)
4235                         nbl_end_crit(vp);
4236                 VN_RELE(vp);
4237                 if (fp) {
4238                         rfs4_clear_dont_grant(fp);
4239                         rfs4_file_rele(fp);
4240                 }
4241                 goto out;
4242         }
4243         NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bdva.va_ctime)
4244 
4245         /* Actually do the REMOVE operation */
4246         if (vp->v_type == VDIR) {
4247                 /*
4248                  * Can't remove a directory that has a mounted-on filesystem.
4249                  */
4250                 if (vn_ismntpt(vp)) {
4251                         error = EACCES;
4252                 } else {
4253                         /*
4254                          * System V defines rmdir to return EEXIST,
4255                          * not ENOTEMPTY, if the directory is not
4256                          * empty.  A System V NFS server needs to map
4257                          * NFS4ERR_EXIST to NFS4ERR_NOTEMPTY to
4258                          * transmit over the wire.
4259                          */
4260                         if ((error = VOP_RMDIR(dvp, name, ZONE_ROOTVP(), cs->cr,
4261                             NULL, 0)) == EEXIST)
4262                                 error = ENOTEMPTY;
4263                 }
4264         } else {
4265                 if ((error = VOP_REMOVE(dvp, name, cs->cr, NULL, 0)) == 0 &&
4266                     fp != NULL) {
4267                         struct vattr va;
4268                         vnode_t *tvp;
4269 
4270                         rfs4_dbe_lock(fp->rf_dbe);
4271                         tvp = fp->rf_vp;
4272                         if (tvp)
4273                                 VN_HOLD(tvp);
4274                         rfs4_dbe_unlock(fp->rf_dbe);
4275 
4276                         if (tvp) {
4277                                 /*
4278                                  * This is va_seq safe because we are not
4279                                  * manipulating dvp.
4280                                  */
4281                                 va.va_mask = AT_NLINK;
4282                                 if (!VOP_GETATTR(tvp, &va, 0, cs->cr, NULL) &&
4283                                     va.va_nlink == 0) {
4284                                         /* Remove state on file remove */
4285                                         if (in_crit) {
4286                                                 nbl_end_crit(vp);
4287                                                 in_crit = 0;
4288                                         }
4289                                         rfs4_close_all_state(fp);
4290                                 }
4291                                 VN_RELE(tvp);
4292                         }
4293                 }
4294         }
4295 
4296         if (in_crit)
4297                 nbl_end_crit(vp);
4298         VN_RELE(vp);
4299 
4300         if (fp) {
4301                 rfs4_clear_dont_grant(fp);
4302                 rfs4_file_rele(fp);
4303         }
4304         if (nm != name)
4305                 kmem_free(name, MAXPATHLEN + 1);
4306         kmem_free(nm, len);
4307 
4308         if (error) {
4309                 *cs->statusp = resp->status = puterrno4(error);
4310                 goto out;
4311         }
4312 
4313         /*
4314          * Get the initial "after" sequence number, if it fails, set to zero
4315          */
4316         idva.va_mask = AT_SEQ;
4317         if (VOP_GETATTR(dvp, &idva, 0, cs->cr, NULL))
4318                 idva.va_seq = 0;
4319 
4320         /*
4321          * Force modified data and metadata out to stable storage.
4322          */
4323         (void) VOP_FSYNC(dvp, 0, cs->cr, NULL);
4324 
4325         /*
4326          * Get "after" change value, if it fails, simply return the
4327          * before value.
4328          */
4329         adva.va_mask = AT_CTIME|AT_SEQ;
4330         if (VOP_GETATTR(dvp, &adva, 0, cs->cr, NULL)) {
4331                 adva.va_ctime = bdva.va_ctime;
4332                 adva.va_seq = 0;
4333         }
4334 
4335         NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, adva.va_ctime)
4336 
4337         /*
4338          * The cinfo.atomic = TRUE only if we have
4339          * non-zero va_seq's, and it has incremented by exactly one
4340          * during the VOP_REMOVE/RMDIR and it didn't change during
4341          * the VOP_FSYNC.
4342          */
4343         if (bdva.va_seq && idva.va_seq && adva.va_seq &&
4344             idva.va_seq == (bdva.va_seq + 1) && idva.va_seq == adva.va_seq)
4345                 resp->cinfo.atomic = TRUE;
4346         else
4347                 resp->cinfo.atomic = FALSE;
4348 
4349         *cs->statusp = resp->status = NFS4_OK;
4350 
4351 out:
4352         DTRACE_NFSV4_2(op__remove__done, struct compound_state *, cs,
4353             REMOVE4res *, resp);
4354 }
4355 
4356 /*
4357  * rename: args: SAVED_FH: from directory, CURRENT_FH: target directory,
4358  *              oldname and newname.
4359  *      res: status. If success - CURRENT_FH unchanged, return change_info
4360  *              for both from and target directories.
4361  */
4362 /* ARGSUSED */
4363 static void
4364 rfs4_op_rename(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4365     struct compound_state *cs)
4366 {
4367         RENAME4args *args = &argop->nfs_argop4_u.oprename;
4368         RENAME4res *resp = &resop->nfs_resop4_u.oprename;
4369         int error;
4370         vnode_t *odvp;
4371         vnode_t *ndvp;
4372         vnode_t *srcvp, *targvp, *tvp;
4373         struct vattr obdva, oidva, oadva;
4374         struct vattr nbdva, nidva, nadva;
4375         char *onm, *nnm;
4376         uint_t olen, nlen;
4377         rfs4_file_t *fp, *sfp;
4378         int in_crit_src, in_crit_targ;
4379         int fp_rele_grant_hold, sfp_rele_grant_hold;
4380         int unlinked;
4381         bslabel_t *clabel;
4382         struct sockaddr *ca;
4383         char *converted_onm = NULL;
4384         char *converted_nnm = NULL;
4385         nfsstat4 status;
4386 
4387         DTRACE_NFSV4_2(op__rename__start, struct compound_state *, cs,
4388             RENAME4args *, args);
4389 
4390         fp = sfp = NULL;
4391         srcvp = targvp = tvp = NULL;
4392         in_crit_src = in_crit_targ = 0;
4393         fp_rele_grant_hold = sfp_rele_grant_hold = 0;
4394         unlinked = 0;
4395 
4396         /* CURRENT_FH: target directory */
4397         ndvp = cs->vp;
4398         if (ndvp == NULL) {
4399                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4400                 goto out;
4401         }
4402 
4403         /* SAVED_FH: from directory */
4404         odvp = cs->saved_vp;
4405         if (odvp == NULL) {
4406                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4407                 goto out;
4408         }
4409 
4410         if (cs->access == CS_ACCESS_DENIED) {
4411                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4412                 goto out;
4413         }
4414 
4415         /*
4416          * If there is an unshared filesystem mounted on this vnode,
4417          * do not allow to rename objects in this directory.
4418          */
4419         if (vn_ismntpt(odvp)) {
4420                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4421                 goto out;
4422         }
4423 
4424         /*
4425          * If there is an unshared filesystem mounted on this vnode,
4426          * do not allow to rename to this directory.
4427          */
4428         if (vn_ismntpt(ndvp)) {
4429                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4430                 goto out;
4431         }
4432 
4433         if (odvp->v_type != VDIR || ndvp->v_type != VDIR) {
4434                 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
4435                 goto out;
4436         }
4437 
4438         if (cs->saved_exi != cs->exi) {
4439                 *cs->statusp = resp->status = NFS4ERR_XDEV;
4440                 goto out;
4441         }
4442 
4443         status = utf8_dir_verify(&args->oldname);
4444         if (status != NFS4_OK) {
4445                 *cs->statusp = resp->status = status;
4446                 goto out;
4447         }
4448 
4449         status = utf8_dir_verify(&args->newname);
4450         if (status != NFS4_OK) {
4451                 *cs->statusp = resp->status = status;
4452                 goto out;
4453         }
4454 
4455         onm = utf8_to_fn(&args->oldname, &olen, NULL);
4456         if (onm == NULL) {
4457                 *cs->statusp = resp->status = NFS4ERR_INVAL;
4458                 goto out;
4459         }
4460         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
4461         nlen = MAXPATHLEN + 1;
4462         converted_onm = nfscmd_convname(ca, cs->exi, onm, NFSCMD_CONV_INBOUND,
4463             nlen);
4464 
4465         if (converted_onm == NULL) {
4466                 *cs->statusp = resp->status = NFS4ERR_INVAL;
4467                 kmem_free(onm, olen);
4468                 goto out;
4469         }
4470 
4471         nnm = utf8_to_fn(&args->newname, &nlen, NULL);
4472         if (nnm == NULL) {
4473                 *cs->statusp = resp->status = NFS4ERR_INVAL;
4474                 if (onm != converted_onm)
4475                         kmem_free(converted_onm, MAXPATHLEN + 1);
4476                 kmem_free(onm, olen);
4477                 goto out;
4478         }
4479         converted_nnm = nfscmd_convname(ca, cs->exi, nnm, NFSCMD_CONV_INBOUND,
4480             MAXPATHLEN  + 1);
4481 
4482         if (converted_nnm == NULL) {
4483                 *cs->statusp = resp->status = NFS4ERR_INVAL;
4484                 kmem_free(nnm, nlen);
4485                 nnm = NULL;
4486                 if (onm != converted_onm)
4487                         kmem_free(converted_onm, MAXPATHLEN + 1);
4488                 kmem_free(onm, olen);
4489                 goto out;
4490         }
4491 
4492 
4493         if (olen > MAXNAMELEN || nlen > MAXNAMELEN) {
4494                 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
4495                 kmem_free(onm, olen);
4496                 kmem_free(nnm, nlen);
4497                 goto out;
4498         }
4499 
4500 
4501         if (rdonly4(req, cs)) {
4502                 *cs->statusp = resp->status = NFS4ERR_ROFS;
4503                 if (onm != converted_onm)
4504                         kmem_free(converted_onm, MAXPATHLEN + 1);
4505                 kmem_free(onm, olen);
4506                 if (nnm != converted_nnm)
4507                         kmem_free(converted_nnm, MAXPATHLEN + 1);
4508                 kmem_free(nnm, nlen);
4509                 goto out;
4510         }
4511 
4512         /* check label of the target dir */
4513         if (is_system_labeled()) {
4514                 ASSERT(req->rq_label != NULL);
4515                 clabel = req->rq_label;
4516                 DTRACE_PROBE2(tx__rfs4__log__info__oprename__clabel, char *,
4517                     "got client label from request(1)",
4518                     struct svc_req *, req);
4519                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
4520                         if (!do_rfs_label_check(clabel, ndvp,
4521                             EQUALITY_CHECK, cs->exi)) {
4522                                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4523                                 goto err_out;
4524                         }
4525                 }
4526         }
4527 
4528         /*
4529          * Is the source a file and have a delegation?
4530          * We don't need to acquire va_seq before these lookups, if
4531          * it causes an update, cinfo.before will not match, which will
4532          * trigger a cache flush even if atomic is TRUE.
4533          */
4534         if (sfp = rfs4_lookup_and_findfile(odvp, converted_onm, &srcvp,
4535             &error, cs->cr)) {
4536                 if (rfs4_check_delegated_byfp(FWRITE, sfp, TRUE, TRUE, TRUE,
4537                     NULL)) {
4538                         *cs->statusp = resp->status = NFS4ERR_DELAY;
4539                         goto err_out;
4540                 }
4541         }
4542 
4543         if (srcvp == NULL) {
4544                 *cs->statusp = resp->status = puterrno4(error);
4545                 if (onm != converted_onm)
4546                         kmem_free(converted_onm, MAXPATHLEN + 1);
4547                 kmem_free(onm, olen);
4548                 if (nnm != converted_nnm)
4549                         kmem_free(converted_nnm, MAXPATHLEN + 1);
4550                 kmem_free(nnm, nlen);
4551                 goto out;
4552         }
4553 
4554         sfp_rele_grant_hold = 1;
4555 
4556         /* Does the destination exist and a file and have a delegation? */
4557         if (fp = rfs4_lookup_and_findfile(ndvp, converted_nnm, &targvp,
4558             NULL, cs->cr)) {
4559                 if (rfs4_check_delegated_byfp(FWRITE, fp, TRUE, TRUE, TRUE,
4560                     NULL)) {
4561                         *cs->statusp = resp->status = NFS4ERR_DELAY;
4562                         goto err_out;
4563                 }
4564         }
4565         fp_rele_grant_hold = 1;
4566 
4567         /* Check for NBMAND lock on both source and target */
4568         if (nbl_need_check(srcvp)) {
4569                 nbl_start_crit(srcvp, RW_READER);
4570                 in_crit_src = 1;
4571                 if (nbl_conflict(srcvp, NBL_RENAME, 0, 0, 0, NULL)) {
4572                         *cs->statusp = resp->status = NFS4ERR_FILE_OPEN;
4573                         goto err_out;
4574                 }
4575         }
4576 
4577         if (targvp && nbl_need_check(targvp)) {
4578                 nbl_start_crit(targvp, RW_READER);
4579                 in_crit_targ = 1;
4580                 if (nbl_conflict(targvp, NBL_REMOVE, 0, 0, 0, NULL)) {
4581                         *cs->statusp = resp->status = NFS4ERR_FILE_OPEN;
4582                         goto err_out;
4583                 }
4584         }
4585 
4586         /* Get source "before" change value */
4587         obdva.va_mask = AT_CTIME|AT_SEQ;
4588         error = VOP_GETATTR(odvp, &obdva, 0, cs->cr, NULL);
4589         if (!error) {
4590                 nbdva.va_mask = AT_CTIME|AT_SEQ;
4591                 error = VOP_GETATTR(ndvp, &nbdva, 0, cs->cr, NULL);
4592         }
4593         if (error) {
4594                 *cs->statusp = resp->status = puterrno4(error);
4595                 goto err_out;
4596         }
4597 
4598         NFS4_SET_FATTR4_CHANGE(resp->source_cinfo.before, obdva.va_ctime)
4599         NFS4_SET_FATTR4_CHANGE(resp->target_cinfo.before, nbdva.va_ctime)
4600 
4601         error = VOP_RENAME(odvp, converted_onm, ndvp, converted_nnm, cs->cr,
4602             NULL, 0);
4603 
4604         /*
4605          * If target existed and was unlinked by VOP_RENAME, state will need
4606          * closed. To avoid deadlock, rfs4_close_all_state will be done after
4607          * any necessary nbl_end_crit on srcvp and tgtvp.
4608          */
4609         if (error == 0 && fp != NULL) {
4610                 rfs4_dbe_lock(fp->rf_dbe);
4611                 tvp = fp->rf_vp;
4612                 if (tvp)
4613                         VN_HOLD(tvp);
4614                 rfs4_dbe_unlock(fp->rf_dbe);
4615 
4616                 if (tvp) {
4617                         struct vattr va;
4618                         va.va_mask = AT_NLINK;
4619 
4620                         if (!VOP_GETATTR(tvp, &va, 0, cs->cr, NULL) &&
4621                             va.va_nlink == 0) {
4622                                 unlinked = 1;
4623 
4624                                 /* DEBUG data */
4625                                 if ((srcvp == targvp) || (tvp != targvp)) {
4626                                         cmn_err(CE_WARN, "rfs4_op_rename: "
4627                                             "srcvp %p, targvp: %p, tvp: %p",
4628                                             (void *)srcvp, (void *)targvp,
4629                                             (void *)tvp);
4630                                 }
4631                         } else {
4632                                 VN_RELE(tvp);
4633                         }
4634                 }
4635         }
4636         if (error == 0)
4637                 vn_renamepath(ndvp, srcvp, nnm, nlen - 1);
4638 
4639         if (in_crit_src)
4640                 nbl_end_crit(srcvp);
4641         if (srcvp)
4642                 VN_RELE(srcvp);
4643         if (in_crit_targ)
4644                 nbl_end_crit(targvp);
4645         if (targvp)
4646                 VN_RELE(targvp);
4647 
4648         if (unlinked) {
4649                 ASSERT(fp != NULL);
4650                 ASSERT(tvp != NULL);
4651 
4652                 /* DEBUG data */
4653                 if (RW_READ_HELD(&tvp->v_nbllock)) {
4654                         cmn_err(CE_WARN, "rfs4_op_rename: "
4655                             "RW_READ_HELD(%p)", (void *)tvp);
4656                 }
4657 
4658                 /* The file is gone and so should the state */
4659                 rfs4_close_all_state(fp);
4660                 VN_RELE(tvp);
4661         }
4662 
4663         if (sfp) {
4664                 rfs4_clear_dont_grant(sfp);
4665                 rfs4_file_rele(sfp);
4666         }
4667         if (fp) {
4668                 rfs4_clear_dont_grant(fp);
4669                 rfs4_file_rele(fp);
4670         }
4671 
4672         if (converted_onm != onm)
4673                 kmem_free(converted_onm, MAXPATHLEN + 1);
4674         kmem_free(onm, olen);
4675         if (converted_nnm != nnm)
4676                 kmem_free(converted_nnm, MAXPATHLEN + 1);
4677         kmem_free(nnm, nlen);
4678 
4679         /*
4680          * Get the initial "after" sequence number, if it fails, set to zero
4681          */
4682         oidva.va_mask = AT_SEQ;
4683         if (VOP_GETATTR(odvp, &oidva, 0, cs->cr, NULL))
4684                 oidva.va_seq = 0;
4685 
4686         nidva.va_mask = AT_SEQ;
4687         if (VOP_GETATTR(ndvp, &nidva, 0, cs->cr, NULL))
4688                 nidva.va_seq = 0;
4689 
4690         /*
4691          * Force modified data and metadata out to stable storage.
4692          */
4693         (void) VOP_FSYNC(odvp, 0, cs->cr, NULL);
4694         (void) VOP_FSYNC(ndvp, 0, cs->cr, NULL);
4695 
4696         if (error) {
4697                 *cs->statusp = resp->status = puterrno4(error);
4698                 goto out;
4699         }
4700 
4701         /*
4702          * Get "after" change values, if it fails, simply return the
4703          * before value.
4704          */
4705         oadva.va_mask = AT_CTIME|AT_SEQ;
4706         if (VOP_GETATTR(odvp, &oadva, 0, cs->cr, NULL)) {
4707                 oadva.va_ctime = obdva.va_ctime;
4708                 oadva.va_seq = 0;
4709         }
4710 
4711         nadva.va_mask = AT_CTIME|AT_SEQ;
4712         if (VOP_GETATTR(odvp, &nadva, 0, cs->cr, NULL)) {
4713                 nadva.va_ctime = nbdva.va_ctime;
4714                 nadva.va_seq = 0;
4715         }
4716 
4717         NFS4_SET_FATTR4_CHANGE(resp->source_cinfo.after, oadva.va_ctime)
4718         NFS4_SET_FATTR4_CHANGE(resp->target_cinfo.after, nadva.va_ctime)
4719 
4720         /*
4721          * The cinfo.atomic = TRUE only if we have
4722          * non-zero va_seq's, and it has incremented by exactly one
4723          * during the VOP_RENAME and it didn't change during the VOP_FSYNC.
4724          */
4725         if (obdva.va_seq && oidva.va_seq && oadva.va_seq &&
4726             oidva.va_seq == (obdva.va_seq + 1) && oidva.va_seq == oadva.va_seq)
4727                 resp->source_cinfo.atomic = TRUE;
4728         else
4729                 resp->source_cinfo.atomic = FALSE;
4730 
4731         if (nbdva.va_seq && nidva.va_seq && nadva.va_seq &&
4732             nidva.va_seq == (nbdva.va_seq + 1) && nidva.va_seq == nadva.va_seq)
4733                 resp->target_cinfo.atomic = TRUE;
4734         else
4735                 resp->target_cinfo.atomic = FALSE;
4736 
4737 #ifdef  VOLATILE_FH_TEST
4738         {
4739         extern void add_volrnm_fh(struct exportinfo *, vnode_t *);
4740 
4741         /*
4742          * Add the renamed file handle to the volatile rename list
4743          */
4744         if (cs->exi->exi_export.ex_flags & EX_VOLRNM) {
4745                 /* file handles may expire on rename */
4746                 vnode_t *vp;
4747 
4748                 nnm = utf8_to_fn(&args->newname, &nlen, NULL);
4749                 /*
4750                  * Already know that nnm will be a valid string
4751                  */
4752                 error = VOP_LOOKUP(ndvp, nnm, &vp, NULL, 0, NULL, cs->cr,
4753                     NULL, NULL, NULL);
4754                 kmem_free(nnm, nlen);
4755                 if (!error) {
4756                         add_volrnm_fh(cs->exi, vp);
4757                         VN_RELE(vp);
4758                 }
4759         }
4760         }
4761 #endif  /* VOLATILE_FH_TEST */
4762 
4763         *cs->statusp = resp->status = NFS4_OK;
4764 out:
4765         DTRACE_NFSV4_2(op__rename__done, struct compound_state *, cs,
4766             RENAME4res *, resp);
4767         return;
4768 
4769 err_out:
4770         if (onm != converted_onm)
4771                 kmem_free(converted_onm, MAXPATHLEN + 1);
4772         if (onm != NULL)
4773                 kmem_free(onm, olen);
4774         if (nnm != converted_nnm)
4775                 kmem_free(converted_nnm, MAXPATHLEN + 1);
4776         if (nnm != NULL)
4777                 kmem_free(nnm, nlen);
4778 
4779         if (in_crit_src) nbl_end_crit(srcvp);
4780         if (in_crit_targ) nbl_end_crit(targvp);
4781         if (targvp) VN_RELE(targvp);
4782         if (srcvp) VN_RELE(srcvp);
4783         if (sfp) {
4784                 if (sfp_rele_grant_hold) rfs4_clear_dont_grant(sfp);
4785                 rfs4_file_rele(sfp);
4786         }
4787         if (fp) {
4788                 if (fp_rele_grant_hold) rfs4_clear_dont_grant(fp);
4789                 rfs4_file_rele(fp);
4790         }
4791 
4792         DTRACE_NFSV4_2(op__rename__done, struct compound_state *, cs,
4793             RENAME4res *, resp);
4794 }
4795 
4796 /* ARGSUSED */
4797 static void
4798 rfs4_op_renew(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4799     struct compound_state *cs)
4800 {
4801         RENEW4args *args = &argop->nfs_argop4_u.oprenew;
4802         RENEW4res *resp = &resop->nfs_resop4_u.oprenew;
4803         rfs4_client_t *cp;
4804 
4805         DTRACE_NFSV4_2(op__renew__start, struct compound_state *, cs,
4806             RENEW4args *, args);
4807 
4808         if ((cp = rfs4_findclient_by_id(args->clientid, FALSE)) == NULL) {
4809                 *cs->statusp = resp->status =
4810                     rfs4_check_clientid(&args->clientid, 0);
4811                 goto out;
4812         }
4813 
4814         if (rfs4_lease_expired(cp)) {
4815                 rfs4_client_rele(cp);
4816                 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
4817                 goto out;
4818         }
4819 
4820         rfs4_update_lease(cp);
4821 
4822         mutex_enter(cp->rc_cbinfo.cb_lock);
4823         if (cp->rc_cbinfo.cb_notified_of_cb_path_down == FALSE) {
4824                 cp->rc_cbinfo.cb_notified_of_cb_path_down = TRUE;
4825                 *cs->statusp = resp->status = NFS4ERR_CB_PATH_DOWN;
4826         } else {
4827                 *cs->statusp = resp->status = NFS4_OK;
4828         }
4829         mutex_exit(cp->rc_cbinfo.cb_lock);
4830 
4831         rfs4_client_rele(cp);
4832 
4833 out:
4834         DTRACE_NFSV4_2(op__renew__done, struct compound_state *, cs,
4835             RENEW4res *, resp);
4836 }
4837 
4838 /* ARGSUSED */
4839 static void
4840 rfs4_op_restorefh(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req,
4841     struct compound_state *cs)
4842 {
4843         RESTOREFH4res *resp = &resop->nfs_resop4_u.oprestorefh;
4844 
4845         DTRACE_NFSV4_1(op__restorefh__start, struct compound_state *, cs);
4846 
4847         /* No need to check cs->access - we are not accessing any object */
4848         if ((cs->saved_vp == NULL) || (cs->saved_fh.nfs_fh4_val == NULL)) {
4849                 *cs->statusp = resp->status = NFS4ERR_RESTOREFH;
4850                 goto out;
4851         }
4852         if (cs->vp != NULL) {
4853                 VN_RELE(cs->vp);
4854         }
4855         cs->vp = cs->saved_vp;
4856         cs->saved_vp = NULL;
4857         cs->exi = cs->saved_exi;
4858         nfs_fh4_copy(&cs->saved_fh, &cs->fh);
4859         *cs->statusp = resp->status = NFS4_OK;
4860         cs->deleg = FALSE;
4861 
4862 out:
4863         DTRACE_NFSV4_2(op__restorefh__done, struct compound_state *, cs,
4864             RESTOREFH4res *, resp);
4865 }
4866 
4867 /* ARGSUSED */
4868 static void
4869 rfs4_op_savefh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4870     struct compound_state *cs)
4871 {
4872         SAVEFH4res *resp = &resop->nfs_resop4_u.opsavefh;
4873 
4874         DTRACE_NFSV4_1(op__savefh__start, struct compound_state *, cs);
4875 
4876         /* No need to check cs->access - we are not accessing any object */
4877         if (cs->vp == NULL) {
4878                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4879                 goto out;
4880         }
4881         if (cs->saved_vp != NULL) {
4882                 VN_RELE(cs->saved_vp);
4883         }
4884         cs->saved_vp = cs->vp;
4885         VN_HOLD(cs->saved_vp);
4886         cs->saved_exi = cs->exi;
4887         /*
4888          * since SAVEFH is fairly rare, don't alloc space for its fh
4889          * unless necessary.
4890          */
4891         if (cs->saved_fh.nfs_fh4_val == NULL) {
4892                 cs->saved_fh.nfs_fh4_val = kmem_alloc(NFS4_FHSIZE, KM_SLEEP);
4893         }
4894         nfs_fh4_copy(&cs->fh, &cs->saved_fh);
4895         *cs->statusp = resp->status = NFS4_OK;
4896 
4897 out:
4898         DTRACE_NFSV4_2(op__savefh__done, struct compound_state *, cs,
4899             SAVEFH4res *, resp);
4900 }
4901 
4902 /*
4903  * rfs4_verify_attr is called when nfsv4 Setattr failed, but we wish to
4904  * return the bitmap of attrs that were set successfully. It is also
4905  * called by Verify/Nverify to test the vattr/vfsstat attrs. It should
4906  * always be called only after rfs4_do_set_attrs().
4907  *
4908  * Verify that the attributes are same as the expected ones. sargp->vap
4909  * and sargp->sbp contain the input attributes as translated from fattr4.
4910  *
4911  * This function verifies only the attrs that correspond to a vattr or
4912  * vfsstat struct. That is because of the extra step needed to get the
4913  * corresponding system structs. Other attributes have already been set or
4914  * verified by do_rfs4_set_attrs.
4915  *
4916  * Return 0 if all attrs match, -1 if some don't, error if error processing.
4917  */
4918 static int
4919 rfs4_verify_attr(struct nfs4_svgetit_arg *sargp,
4920     bitmap4 *resp, struct nfs4_ntov_table *ntovp)
4921 {
4922         int error, ret_error = 0;
4923         int i, k;
4924         uint_t sva_mask = sargp->vap->va_mask;
4925         uint_t vbit;
4926         union nfs4_attr_u *na;
4927         uint8_t *amap;
4928         bool_t getsb = ntovp->vfsstat;
4929 
4930         if (sva_mask != 0) {
4931                 /*
4932                  * Okay to overwrite sargp->vap because we verify based
4933                  * on the incoming values.
4934                  */
4935                 ret_error = VOP_GETATTR(sargp->cs->vp, sargp->vap, 0,
4936                     sargp->cs->cr, NULL);
4937                 if (ret_error) {
4938                         if (resp == NULL)
4939                                 return (ret_error);
4940                         /*
4941                          * Must return bitmap of successful attrs
4942                          */
4943                         sva_mask = 0;   /* to prevent checking vap later */
4944                 } else {
4945                         /*
4946                          * Some file systems clobber va_mask. it is probably
4947                          * wrong of them to do so, nonethless we practice
4948                          * defensive coding.
4949                          * See bug id 4276830.
4950                          */
4951                         sargp->vap->va_mask = sva_mask;
4952                 }
4953         }
4954 
4955         if (getsb) {
4956                 /*
4957                  * Now get the superblock and loop on the bitmap, as there is
4958                  * no simple way of translating from superblock to bitmap4.
4959                  */
4960                 ret_error = VFS_STATVFS(sargp->cs->vp->v_vfsp, sargp->sbp);
4961                 if (ret_error) {
4962                         if (resp == NULL)
4963                                 goto errout;
4964                         getsb = FALSE;
4965                 }
4966         }
4967 
4968         /*
4969          * Now loop and verify each attribute which getattr returned
4970          * whether it's the same as the input.
4971          */
4972         if (resp == NULL && !getsb && (sva_mask == 0))
4973                 goto errout;
4974 
4975         na = ntovp->na;
4976         amap = ntovp->amap;
4977         k = 0;
4978         for (i = 0; i < ntovp->attrcnt; i++, na++, amap++) {
4979                 k = *amap;
4980                 ASSERT(nfs4_ntov_map[k].nval == k);
4981                 vbit = nfs4_ntov_map[k].vbit;
4982 
4983                 /*
4984                  * If vattr attribute but VOP_GETATTR failed, or it's
4985                  * superblock attribute but VFS_STATVFS failed, skip
4986                  */
4987                 if (vbit) {
4988                         if ((vbit & sva_mask) == 0)
4989                                 continue;
4990                 } else if (!(getsb && nfs4_ntov_map[k].vfsstat)) {
4991                         continue;
4992                 }
4993                 error = (*nfs4_ntov_map[k].sv_getit)(NFS4ATTR_VERIT, sargp, na);
4994                 if (resp != NULL) {
4995                         if (error)
4996                                 ret_error = -1; /* not all match */
4997                         else    /* update response bitmap */
4998                                 *resp |= nfs4_ntov_map[k].fbit;
4999                         continue;
5000                 }
5001                 if (error) {
5002                         ret_error = -1; /* not all match */
5003                         break;
5004                 }
5005         }
5006 errout:
5007         return (ret_error);
5008 }
5009 
5010 /*
5011  * Decode the attribute to be set/verified. If the attr requires a sys op
5012  * (VOP_GETATTR, VFS_VFSSTAT), and the request is to verify, then don't
5013  * call the sv_getit function for it, because the sys op hasn't yet been done.
5014  * Return 0 for success, error code if failed.
5015  *
5016  * Note: the decoded arg is not freed here but in nfs4_ntov_table_free.
5017  */
5018 static int
5019 decode_fattr4_attr(nfs4_attr_cmd_t cmd, struct nfs4_svgetit_arg *sargp,
5020     int k, XDR *xdrp, bitmap4 *resp_bval, union nfs4_attr_u *nap)
5021 {
5022         int error = 0;
5023         bool_t set_later;
5024 
5025         sargp->vap->va_mask |= nfs4_ntov_map[k].vbit;
5026 
5027         if ((*nfs4_ntov_map[k].xfunc)(xdrp, nap)) {
5028                 set_later = nfs4_ntov_map[k].vbit || nfs4_ntov_map[k].vfsstat;
5029                 /*
5030                  * don't verify yet if a vattr or sb dependent attr,
5031                  * because we don't have their sys values yet.
5032                  * Will be done later.
5033                  */
5034                 if (! (set_later && (cmd == NFS4ATTR_VERIT))) {
5035                         /*
5036                          * ACLs are a special case, since setting the MODE
5037                          * conflicts with setting the ACL.  We delay setting
5038                          * the ACL until all other attributes have been set.
5039                          * The ACL gets set in do_rfs4_op_setattr().
5040                          */
5041                         if (nfs4_ntov_map[k].fbit != FATTR4_ACL_MASK) {
5042                                 error = (*nfs4_ntov_map[k].sv_getit)(cmd,
5043                                     sargp, nap);
5044                                 if (error) {
5045                                         xdr_free(nfs4_ntov_map[k].xfunc,
5046                                             (caddr_t)nap);
5047                                 }
5048                         }
5049                 }
5050         } else {
5051 #ifdef  DEBUG
5052                 cmn_err(CE_NOTE, "decode_fattr4_attr: error "
5053                     "decoding attribute %d\n", k);
5054 #endif
5055                 error = EINVAL;
5056         }
5057         if (!error && resp_bval && !set_later) {
5058                 *resp_bval |= nfs4_ntov_map[k].fbit;
5059         }
5060 
5061         return (error);
5062 }
5063 
5064 /*
5065  * Set vattr based on incoming fattr4 attrs - used by setattr.
5066  * Set response mask. Ignore any values that are not writable vattr attrs.
5067  */
5068 static nfsstat4
5069 do_rfs4_set_attrs(bitmap4 *resp, fattr4 *fattrp, struct compound_state *cs,
5070     struct nfs4_svgetit_arg *sargp, struct nfs4_ntov_table *ntovp,
5071     nfs4_attr_cmd_t cmd)
5072 {
5073         int error = 0;
5074         int i;
5075         char *attrs = fattrp->attrlist4;
5076         uint32_t attrslen = fattrp->attrlist4_len;
5077         XDR xdr;
5078         nfsstat4 status = NFS4_OK;
5079         vnode_t *vp = cs->vp;
5080         union nfs4_attr_u *na;
5081         uint8_t *amap;
5082 
5083 #ifndef lint
5084         /*
5085          * Make sure that maximum attribute number can be expressed as an
5086          * 8 bit quantity.
5087          */
5088         ASSERT(NFS4_MAXNUM_ATTRS <= (UINT8_MAX + 1));
5089 #endif
5090 
5091         if (vp == NULL) {
5092                 if (resp)
5093                         *resp = 0;
5094                 return (NFS4ERR_NOFILEHANDLE);
5095         }
5096         if (cs->access == CS_ACCESS_DENIED) {
5097                 if (resp)
5098                         *resp = 0;
5099                 return (NFS4ERR_ACCESS);
5100         }
5101 
5102         sargp->op = cmd;
5103         sargp->cs = cs;
5104         sargp->flag = 0;     /* may be set later */
5105         sargp->vap->va_mask = 0;
5106         sargp->rdattr_error = NFS4_OK;
5107         sargp->rdattr_error_req = FALSE;
5108         /* sargp->sbp is set by the caller */
5109 
5110         xdrmem_create(&xdr, attrs, attrslen, XDR_DECODE);
5111 
5112         na = ntovp->na;
5113         amap = ntovp->amap;
5114 
5115         /*
5116          * The following loop iterates on the nfs4_ntov_map checking
5117          * if the fbit is set in the requested bitmap.
5118          * If set then we process the arguments using the
5119          * rfs4_fattr4 conversion functions to populate the setattr
5120          * vattr and va_mask. Any settable attrs that are not using vattr
5121          * will be set in this loop.
5122          */
5123         for (i = 0; i < nfs4_ntov_map_size; i++) {
5124                 if (!(fattrp->attrmask & nfs4_ntov_map[i].fbit)) {
5125                         continue;
5126                 }
5127                 /*
5128                  * If setattr, must be a writable attr.
5129                  * If verify/nverify, must be a readable attr.
5130                  */
5131                 if ((error = (*nfs4_ntov_map[i].sv_getit)(
5132                     NFS4ATTR_SUPPORTED, sargp, NULL)) != 0) {
5133                         /*
5134                          * Client tries to set/verify an
5135                          * unsupported attribute, tries to set
5136                          * a read only attr or verify a write
5137                          * only one - error!
5138                          */
5139                         break;
5140                 }
5141                 /*
5142                  * Decode the attribute to set/verify
5143                  */
5144                 error = decode_fattr4_attr(cmd, sargp, nfs4_ntov_map[i].nval,
5145                     &xdr, resp ? resp : NULL, na);
5146                 if (error)
5147                         break;
5148                 *amap++ = (uint8_t)nfs4_ntov_map[i].nval;
5149                 na++;
5150                 (ntovp->attrcnt)++;
5151                 if (nfs4_ntov_map[i].vfsstat)
5152                         ntovp->vfsstat = TRUE;
5153         }
5154 
5155         if (error != 0)
5156                 status = (error == ENOTSUP ? NFS4ERR_ATTRNOTSUPP :
5157                     puterrno4(error));
5158         /* xdrmem_destroy(&xdrs); */        /* NO-OP */
5159         return (status);
5160 }
5161 
5162 static nfsstat4
5163 do_rfs4_op_setattr(bitmap4 *resp, fattr4 *fattrp, struct compound_state *cs,
5164     stateid4 *stateid)
5165 {
5166         int error = 0;
5167         struct nfs4_svgetit_arg sarg;
5168         bool_t trunc;
5169 
5170         nfsstat4 status = NFS4_OK;
5171         cred_t *cr = cs->cr;
5172         vnode_t *vp = cs->vp;
5173         struct nfs4_ntov_table ntov;
5174         struct statvfs64 sb;
5175         struct vattr bva;
5176         struct flock64 bf;
5177         int in_crit = 0;
5178         uint_t saved_mask = 0;
5179         caller_context_t ct;
5180 
5181         *resp = 0;
5182         sarg.sbp = &sb;
5183         sarg.is_referral = B_FALSE;
5184         nfs4_ntov_table_init(&ntov);
5185         status = do_rfs4_set_attrs(resp, fattrp, cs, &sarg, &ntov,
5186             NFS4ATTR_SETIT);
5187         if (status != NFS4_OK) {
5188                 /*
5189                  * failed set attrs
5190                  */
5191                 goto done;
5192         }
5193         if ((sarg.vap->va_mask == 0) &&
5194             (! (fattrp->attrmask & FATTR4_ACL_MASK))) {
5195                 /*
5196                  * no further work to be done
5197                  */
5198                 goto done;
5199         }
5200 
5201         /*
5202          * If we got a request to set the ACL and the MODE, only
5203          * allow changing VSUID, VSGID, and VSVTX.  Attempting
5204          * to change any other bits, along with setting an ACL,
5205          * gives NFS4ERR_INVAL.
5206          */
5207         if ((fattrp->attrmask & FATTR4_ACL_MASK) &&
5208             (fattrp->attrmask & FATTR4_MODE_MASK)) {
5209                 vattr_t va;
5210 
5211                 va.va_mask = AT_MODE;
5212                 error = VOP_GETATTR(vp, &va, 0, cs->cr, NULL);
5213                 if (error) {
5214                         status = puterrno4(error);
5215                         goto done;
5216                 }
5217                 if ((sarg.vap->va_mode ^ va.va_mode) &
5218                     ~(VSUID | VSGID | VSVTX)) {
5219                         status = NFS4ERR_INVAL;
5220                         goto done;
5221                 }
5222         }
5223 
5224         /* Check stateid only if size has been set */
5225         if (sarg.vap->va_mask & AT_SIZE) {
5226                 trunc = (sarg.vap->va_size == 0);
5227                 status = rfs4_check_stateid(FWRITE, cs->vp, stateid,
5228                     trunc, &cs->deleg, sarg.vap->va_mask & AT_SIZE, &ct);
5229                 if (status != NFS4_OK)
5230                         goto done;
5231         } else {
5232                 ct.cc_sysid = 0;
5233                 ct.cc_pid = 0;
5234                 ct.cc_caller_id = nfs4_srv_caller_id;
5235                 ct.cc_flags = CC_DONTBLOCK;
5236         }
5237 
5238         /* XXX start of possible race with delegations */
5239 
5240         /*
5241          * We need to specially handle size changes because it is
5242          * possible for the client to create a file with read-only
5243          * modes, but with the file opened for writing. If the client
5244          * then tries to set the file size, e.g. ftruncate(3C),
5245          * fcntl(F_FREESP), the normal access checking done in
5246          * VOP_SETATTR would prevent the client from doing it even though
5247          * it should be allowed to do so.  To get around this, we do the
5248          * access checking for ourselves and use VOP_SPACE which doesn't
5249          * do the access checking.
5250          * Also the client should not be allowed to change the file
5251          * size if there is a conflicting non-blocking mandatory lock in
5252          * the region of the change.
5253          */
5254         if (vp->v_type == VREG && (sarg.vap->va_mask & AT_SIZE)) {
5255                 u_offset_t offset;
5256                 ssize_t length;
5257 
5258                 /*
5259                  * ufs_setattr clears AT_SIZE from vap->va_mask, but
5260                  * before returning, sarg.vap->va_mask is used to
5261                  * generate the setattr reply bitmap.  We also clear
5262                  * AT_SIZE below before calling VOP_SPACE.  For both
5263                  * of these cases, the va_mask needs to be saved here
5264                  * and restored after calling VOP_SETATTR.
5265                  */
5266                 saved_mask = sarg.vap->va_mask;
5267 
5268                 /*
5269                  * Check any possible conflict due to NBMAND locks.
5270                  * Get into critical region before VOP_GETATTR, so the
5271                  * size attribute is valid when checking conflicts.
5272                  */
5273                 if (nbl_need_check(vp)) {
5274                         nbl_start_crit(vp, RW_READER);
5275                         in_crit = 1;
5276                 }
5277 
5278                 bva.va_mask = AT_UID|AT_SIZE;
5279                 if (error = VOP_GETATTR(vp, &bva, 0, cr, &ct)) {
5280                         status = puterrno4(error);
5281                         goto done;
5282                 }
5283 
5284                 if (in_crit) {
5285                         if (sarg.vap->va_size < bva.va_size) {
5286                                 offset = sarg.vap->va_size;
5287                                 length = bva.va_size - sarg.vap->va_size;
5288                         } else {
5289                                 offset = bva.va_size;
5290                                 length = sarg.vap->va_size - bva.va_size;
5291                         }
5292                         if (nbl_conflict(vp, NBL_WRITE, offset, length, 0,
5293                             &ct)) {
5294                                 status = NFS4ERR_LOCKED;
5295                                 goto done;
5296                         }
5297                 }
5298 
5299                 if (crgetuid(cr) == bva.va_uid) {
5300                         sarg.vap->va_mask &= ~AT_SIZE;
5301                         bf.l_type = F_WRLCK;
5302                         bf.l_whence = 0;
5303                         bf.l_start = (off64_t)sarg.vap->va_size;
5304                         bf.l_len = 0;
5305                         bf.l_sysid = 0;
5306                         bf.l_pid = 0;
5307                         error = VOP_SPACE(vp, F_FREESP, &bf, FWRITE,
5308                             (offset_t)sarg.vap->va_size, cr, &ct);
5309                 }
5310         }
5311 
5312         if (!error && sarg.vap->va_mask != 0)
5313                 error = VOP_SETATTR(vp, sarg.vap, sarg.flag, cr, &ct);
5314 
5315         /* restore va_mask -- ufs_setattr clears AT_SIZE */
5316         if (saved_mask & AT_SIZE)
5317                 sarg.vap->va_mask |= AT_SIZE;
5318 
5319         /*
5320          * If an ACL was being set, it has been delayed until now,
5321          * in order to set the mode (via the VOP_SETATTR() above) first.
5322          */
5323         if ((! error) && (fattrp->attrmask & FATTR4_ACL_MASK)) {
5324                 int i;
5325 
5326                 for (i = 0; i < NFS4_MAXNUM_ATTRS; i++)
5327                         if (ntov.amap[i] == FATTR4_ACL)
5328                                 break;
5329                 if (i < NFS4_MAXNUM_ATTRS) {
5330                         error = (*nfs4_ntov_map[FATTR4_ACL].sv_getit)(
5331                             NFS4ATTR_SETIT, &sarg, &ntov.na[i]);
5332                         if (error == 0) {
5333                                 *resp |= FATTR4_ACL_MASK;
5334                         } else if (error == ENOTSUP) {
5335                                 (void) rfs4_verify_attr(&sarg, resp, &ntov);
5336                                 status = NFS4ERR_ATTRNOTSUPP;
5337                                 goto done;
5338                         }
5339                 } else {
5340                         NFS4_DEBUG(rfs4_debug,
5341                             (CE_NOTE, "do_rfs4_op_setattr: "
5342                             "unable to find ACL in fattr4"));
5343                         error = EINVAL;
5344                 }
5345         }
5346 
5347         if (error) {
5348                 /* check if a monitor detected a delegation conflict */
5349                 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK))
5350                         status = NFS4ERR_DELAY;
5351                 else
5352                         status = puterrno4(error);
5353 
5354                 /*
5355                  * Set the response bitmap when setattr failed.
5356                  * If VOP_SETATTR partially succeeded, test by doing a
5357                  * VOP_GETATTR on the object and comparing the data
5358                  * to the setattr arguments.
5359                  */
5360                 (void) rfs4_verify_attr(&sarg, resp, &ntov);
5361         } else {
5362                 /*
5363                  * Force modified metadata out to stable storage.
5364                  */
5365                 (void) VOP_FSYNC(vp, FNODSYNC, cr, &ct);
5366                 /*
5367                  * Set response bitmap
5368                  */
5369                 nfs4_vmask_to_nmask_set(sarg.vap->va_mask, resp);
5370         }
5371 
5372 /* Return early and already have a NFSv4 error */
5373 done:
5374         /*
5375          * Except for nfs4_vmask_to_nmask_set(), vattr --> fattr
5376          * conversion sets both readable and writeable NFS4 attrs
5377          * for AT_MTIME and AT_ATIME.  The line below masks out
5378          * unrequested attrs from the setattr result bitmap.  This
5379          * is placed after the done: label to catch the ATTRNOTSUP
5380          * case.
5381          */
5382         *resp &= fattrp->attrmask;
5383 
5384         if (in_crit)
5385                 nbl_end_crit(vp);
5386 
5387         nfs4_ntov_table_free(&ntov, &sarg);
5388 
5389         return (status);
5390 }
5391 
5392 /* ARGSUSED */
5393 static void
5394 rfs4_op_setattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5395     struct compound_state *cs)
5396 {
5397         SETATTR4args *args = &argop->nfs_argop4_u.opsetattr;
5398         SETATTR4res *resp = &resop->nfs_resop4_u.opsetattr;
5399         bslabel_t *clabel;
5400 
5401         DTRACE_NFSV4_2(op__setattr__start, struct compound_state *, cs,
5402             SETATTR4args *, args);
5403 
5404         if (cs->vp == NULL) {
5405                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5406                 goto out;
5407         }
5408 
5409         /*
5410          * If there is an unshared filesystem mounted on this vnode,
5411          * do not allow to setattr on this vnode.
5412          */
5413         if (vn_ismntpt(cs->vp)) {
5414                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
5415                 goto out;
5416         }
5417 
5418         resp->attrsset = 0;
5419 
5420         if (rdonly4(req, cs)) {
5421                 *cs->statusp = resp->status = NFS4ERR_ROFS;
5422                 goto out;
5423         }
5424 
5425         /* check label before setting attributes */
5426         if (is_system_labeled()) {
5427                 ASSERT(req->rq_label != NULL);
5428                 clabel = req->rq_label;
5429                 DTRACE_PROBE2(tx__rfs4__log__info__opsetattr__clabel, char *,
5430                     "got client label from request(1)",
5431                     struct svc_req *, req);
5432                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
5433                         if (!do_rfs_label_check(clabel, cs->vp,
5434                             EQUALITY_CHECK, cs->exi)) {
5435                                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
5436                                 goto out;
5437                         }
5438                 }
5439         }
5440 
5441         *cs->statusp = resp->status =
5442             do_rfs4_op_setattr(&resp->attrsset, &args->obj_attributes, cs,
5443             &args->stateid);
5444 
5445 out:
5446         DTRACE_NFSV4_2(op__setattr__done, struct compound_state *, cs,
5447             SETATTR4res *, resp);
5448 }
5449 
5450 /* ARGSUSED */
5451 static void
5452 rfs4_op_verify(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5453     struct compound_state *cs)
5454 {
5455         /*
5456          * verify and nverify are exactly the same, except that nverify
5457          * succeeds when some argument changed, and verify succeeds when
5458          * when none changed.
5459          */
5460 
5461         VERIFY4args  *args = &argop->nfs_argop4_u.opverify;
5462         VERIFY4res *resp = &resop->nfs_resop4_u.opverify;
5463 
5464         int error;
5465         struct nfs4_svgetit_arg sarg;
5466         struct statvfs64 sb;
5467         struct nfs4_ntov_table ntov;
5468 
5469         DTRACE_NFSV4_2(op__verify__start, struct compound_state *, cs,
5470             VERIFY4args *, args);
5471 
5472         if (cs->vp == NULL) {
5473                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5474                 goto out;
5475         }
5476 
5477         sarg.sbp = &sb;
5478         sarg.is_referral = B_FALSE;
5479         nfs4_ntov_table_init(&ntov);
5480         resp->status = do_rfs4_set_attrs(NULL, &args->obj_attributes, cs,
5481             &sarg, &ntov, NFS4ATTR_VERIT);
5482         if (resp->status != NFS4_OK) {
5483                 /*
5484                  * do_rfs4_set_attrs will try to verify systemwide attrs,
5485                  * so could return -1 for "no match".
5486                  */
5487                 if (resp->status == -1)
5488                         resp->status = NFS4ERR_NOT_SAME;
5489                 goto done;
5490         }
5491         error = rfs4_verify_attr(&sarg, NULL, &ntov);
5492         switch (error) {
5493         case 0:
5494                 resp->status = NFS4_OK;
5495                 break;
5496         case -1:
5497                 resp->status = NFS4ERR_NOT_SAME;
5498                 break;
5499         default:
5500                 resp->status = puterrno4(error);
5501                 break;
5502         }
5503 done:
5504         *cs->statusp = resp->status;
5505         nfs4_ntov_table_free(&ntov, &sarg);
5506 out:
5507         DTRACE_NFSV4_2(op__verify__done, struct compound_state *, cs,
5508             VERIFY4res *, resp);
5509 }
5510 
5511 /* ARGSUSED */
5512 static void
5513 rfs4_op_nverify(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5514     struct compound_state *cs)
5515 {
5516         /*
5517          * verify and nverify are exactly the same, except that nverify
5518          * succeeds when some argument changed, and verify succeeds when
5519          * when none changed.
5520          */
5521 
5522         NVERIFY4args  *args = &argop->nfs_argop4_u.opnverify;
5523         NVERIFY4res *resp = &resop->nfs_resop4_u.opnverify;
5524 
5525         int error;
5526         struct nfs4_svgetit_arg sarg;
5527         struct statvfs64 sb;
5528         struct nfs4_ntov_table ntov;
5529 
5530         DTRACE_NFSV4_2(op__nverify__start, struct compound_state *, cs,
5531             NVERIFY4args *, args);
5532 
5533         if (cs->vp == NULL) {
5534                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5535                 DTRACE_NFSV4_2(op__nverify__done, struct compound_state *, cs,
5536                     NVERIFY4res *, resp);
5537                 return;
5538         }
5539         sarg.sbp = &sb;
5540         sarg.is_referral = B_FALSE;
5541         nfs4_ntov_table_init(&ntov);
5542         resp->status = do_rfs4_set_attrs(NULL, &args->obj_attributes, cs,
5543             &sarg, &ntov, NFS4ATTR_VERIT);
5544         if (resp->status != NFS4_OK) {
5545                 /*
5546                  * do_rfs4_set_attrs will try to verify systemwide attrs,
5547                  * so could return -1 for "no match".
5548                  */
5549                 if (resp->status == -1)
5550                         resp->status = NFS4_OK;
5551                 goto done;
5552         }
5553         error = rfs4_verify_attr(&sarg, NULL, &ntov);
5554         switch (error) {
5555         case 0:
5556                 resp->status = NFS4ERR_SAME;
5557                 break;
5558         case -1:
5559                 resp->status = NFS4_OK;
5560                 break;
5561         default:
5562                 resp->status = puterrno4(error);
5563                 break;
5564         }
5565 done:
5566         *cs->statusp = resp->status;
5567         nfs4_ntov_table_free(&ntov, &sarg);
5568 
5569         DTRACE_NFSV4_2(op__nverify__done, struct compound_state *, cs,
5570             NVERIFY4res *, resp);
5571 }
5572 
5573 /*
5574  * XXX - This should live in an NFS header file.
5575  */
5576 #define MAX_IOVECS      12
5577 
5578 /* ARGSUSED */
5579 static void
5580 rfs4_op_write(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5581     struct compound_state *cs)
5582 {
5583         WRITE4args *args = &argop->nfs_argop4_u.opwrite;
5584         WRITE4res *resp = &resop->nfs_resop4_u.opwrite;
5585         int error;
5586         vnode_t *vp;
5587         struct vattr bva;
5588         u_offset_t rlimit;
5589         struct uio uio;
5590         struct iovec iov[MAX_IOVECS];
5591         struct iovec *iovp;
5592         int iovcnt;
5593         int ioflag;
5594         cred_t *savecred, *cr;
5595         bool_t *deleg = &cs->deleg;
5596         nfsstat4 stat;
5597         int in_crit = 0;
5598         caller_context_t ct;
5599         nfs4_srv_t *nsrv4;
5600 
5601         DTRACE_NFSV4_2(op__write__start, struct compound_state *, cs,
5602             WRITE4args *, args);
5603 
5604         vp = cs->vp;
5605         if (vp == NULL) {
5606                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5607                 goto out;
5608         }
5609         if (cs->access == CS_ACCESS_DENIED) {
5610                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
5611                 goto out;
5612         }
5613 
5614         cr = cs->cr;
5615 
5616         if ((stat = rfs4_check_stateid(FWRITE, vp, &args->stateid, FALSE,
5617             deleg, TRUE, &ct)) != NFS4_OK) {
5618                 *cs->statusp = resp->status = stat;
5619                 goto out;
5620         }
5621 
5622         /*
5623          * We have to enter the critical region before calling VOP_RWLOCK
5624          * to avoid a deadlock with ufs.
5625          */
5626         if (nbl_need_check(vp)) {
5627                 nbl_start_crit(vp, RW_READER);
5628                 in_crit = 1;
5629                 if (nbl_conflict(vp, NBL_WRITE,
5630                     args->offset, args->data_len, 0, &ct)) {
5631                         *cs->statusp = resp->status = NFS4ERR_LOCKED;
5632                         goto out;
5633                 }
5634         }
5635 
5636         bva.va_mask = AT_MODE | AT_UID;
5637         error = VOP_GETATTR(vp, &bva, 0, cr, &ct);
5638 
5639         /*
5640          * If we can't get the attributes, then we can't do the
5641          * right access checking.  So, we'll fail the request.
5642          */
5643         if (error) {
5644                 *cs->statusp = resp->status = puterrno4(error);
5645                 goto out;
5646         }
5647 
5648         if (rdonly4(req, cs)) {
5649                 *cs->statusp = resp->status = NFS4ERR_ROFS;
5650                 goto out;
5651         }
5652 
5653         if (vp->v_type != VREG) {
5654                 *cs->statusp = resp->status =
5655                     ((vp->v_type == VDIR) ? NFS4ERR_ISDIR : NFS4ERR_INVAL);
5656                 goto out;
5657         }
5658 
5659         if (crgetuid(cr) != bva.va_uid &&
5660             (error = VOP_ACCESS(vp, VWRITE, 0, cr, &ct))) {
5661                 *cs->statusp = resp->status = puterrno4(error);
5662                 goto out;
5663         }
5664 
5665         if (MANDLOCK(vp, bva.va_mode)) {
5666                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
5667                 goto out;
5668         }
5669 
5670         nsrv4 = nfs4_get_srv();
5671         if (args->data_len == 0) {
5672                 *cs->statusp = resp->status = NFS4_OK;
5673                 resp->count = 0;
5674                 resp->committed = args->stable;
5675                 resp->writeverf = nsrv4->write4verf;
5676                 goto out;
5677         }
5678 
5679         if (args->mblk != NULL) {
5680                 mblk_t *m;
5681                 uint_t bytes, round_len;
5682 
5683                 iovcnt = 0;
5684                 bytes = 0;
5685                 round_len = roundup(args->data_len, BYTES_PER_XDR_UNIT);
5686                 for (m = args->mblk;
5687                     m != NULL && bytes < round_len;
5688                     m = m->b_cont) {
5689                         iovcnt++;
5690                         bytes += MBLKL(m);
5691                 }
5692 #ifdef DEBUG
5693                 /* should have ended on an mblk boundary */
5694                 if (bytes != round_len) {
5695                         printf("bytes=0x%x, round_len=0x%x, req len=0x%x\n",
5696                             bytes, round_len, args->data_len);
5697                         printf("args=%p, args->mblk=%p, m=%p", (void *)args,
5698                             (void *)args->mblk, (void *)m);
5699                         ASSERT(bytes == round_len);
5700                 }
5701 #endif
5702                 if (iovcnt <= MAX_IOVECS) {
5703                         iovp = iov;
5704                 } else {
5705                         iovp = kmem_alloc(sizeof (*iovp) * iovcnt, KM_SLEEP);
5706                 }
5707                 mblk_to_iov(args->mblk, iovcnt, iovp);
5708         } else if (args->rlist != NULL) {
5709                 iovcnt = 1;
5710                 iovp = iov;
5711                 iovp->iov_base = (char *)((args->rlist)->u.c_daddr3);
5712                 iovp->iov_len = args->data_len;
5713         } else {
5714                 iovcnt = 1;
5715                 iovp = iov;
5716                 iovp->iov_base = args->data_val;
5717                 iovp->iov_len = args->data_len;
5718         }
5719 
5720         uio.uio_iov = iovp;
5721         uio.uio_iovcnt = iovcnt;
5722 
5723         uio.uio_segflg = UIO_SYSSPACE;
5724         uio.uio_extflg = UIO_COPY_DEFAULT;
5725         uio.uio_loffset = args->offset;
5726         uio.uio_resid = args->data_len;
5727         uio.uio_llimit = curproc->p_fsz_ctl;
5728         rlimit = uio.uio_llimit - args->offset;
5729         if (rlimit < (u_offset_t)uio.uio_resid)
5730                 uio.uio_resid = (int)rlimit;
5731 
5732         if (args->stable == UNSTABLE4)
5733                 ioflag = 0;
5734         else if (args->stable == FILE_SYNC4)
5735                 ioflag = FSYNC;
5736         else if (args->stable == DATA_SYNC4)
5737                 ioflag = FDSYNC;
5738         else {
5739                 if (iovp != iov)
5740                         kmem_free(iovp, sizeof (*iovp) * iovcnt);
5741                 *cs->statusp = resp->status = NFS4ERR_INVAL;
5742                 goto out;
5743         }
5744 
5745         /*
5746          * We're changing creds because VM may fault and we need
5747          * the cred of the current thread to be used if quota
5748          * checking is enabled.
5749          */
5750         savecred = curthread->t_cred;
5751         curthread->t_cred = cr;
5752         error = do_io(FWRITE, vp, &uio, ioflag, cr, &ct);
5753         curthread->t_cred = savecred;
5754 
5755         if (iovp != iov)
5756                 kmem_free(iovp, sizeof (*iovp) * iovcnt);
5757 
5758         if (error) {
5759                 *cs->statusp = resp->status = puterrno4(error);
5760                 goto out;
5761         }
5762 
5763         *cs->statusp = resp->status = NFS4_OK;
5764         resp->count = args->data_len - uio.uio_resid;
5765 
5766         if (ioflag == 0)
5767                 resp->committed = UNSTABLE4;
5768         else
5769                 resp->committed = FILE_SYNC4;
5770 
5771         resp->writeverf = nsrv4->write4verf;
5772 
5773 out:
5774         if (in_crit)
5775                 nbl_end_crit(vp);
5776 
5777         DTRACE_NFSV4_2(op__write__done, struct compound_state *, cs,
5778             WRITE4res *, resp);
5779 }
5780 
5781 
5782 /* XXX put in a header file */
5783 extern int      sec_svc_getcred(struct svc_req *, cred_t *,  caddr_t *, int *);
5784 
5785 void
5786 rfs4_compound(COMPOUND4args *args, COMPOUND4res *resp, struct exportinfo *exi,
5787     struct svc_req *req, cred_t *cr, int *rv)
5788 {
5789         uint_t i;
5790         struct compound_state cs;
5791         nfs4_srv_t *nsrv4;
5792         nfs_export_t *ne = nfs_get_export();
5793 
5794         if (rv != NULL)
5795                 *rv = 0;
5796         rfs4_init_compound_state(&cs);
5797         /*
5798          * Form a reply tag by copying over the reqeuest tag.
5799          */
5800         resp->tag.utf8string_val =
5801             kmem_alloc(args->tag.utf8string_len, KM_SLEEP);
5802         resp->tag.utf8string_len = args->tag.utf8string_len;
5803         bcopy(args->tag.utf8string_val, resp->tag.utf8string_val,
5804             resp->tag.utf8string_len);
5805 
5806         cs.statusp = &resp->status;
5807         cs.req = req;
5808         resp->array = NULL;
5809         resp->array_len = 0;
5810 
5811         /*
5812          * XXX for now, minorversion should be zero
5813          */
5814         if (args->minorversion != NFS4_MINORVERSION) {
5815                 DTRACE_NFSV4_2(compound__start, struct compound_state *,
5816                     &cs, COMPOUND4args *, args);
5817                 resp->status = NFS4ERR_MINOR_VERS_MISMATCH;
5818                 DTRACE_NFSV4_2(compound__done, struct compound_state *,
5819                     &cs, COMPOUND4res *, resp);
5820                 return;
5821         }
5822 
5823         if (args->array_len == 0) {
5824                 resp->status = NFS4_OK;
5825                 return;
5826         }
5827 
5828         ASSERT(exi == NULL);
5829         ASSERT(cr == NULL);
5830 
5831         cr = crget();
5832         ASSERT(cr != NULL);
5833 
5834         if (sec_svc_getcred(req, cr, &cs.principal, &cs.nfsflavor) == 0) {
5835                 DTRACE_NFSV4_2(compound__start, struct compound_state *,
5836                     &cs, COMPOUND4args *, args);
5837                 crfree(cr);
5838                 DTRACE_NFSV4_2(compound__done, struct compound_state *,
5839                     &cs, COMPOUND4res *, resp);
5840                 svcerr_badcred(req->rq_xprt);
5841                 if (rv != NULL)
5842                         *rv = 1;
5843                 return;
5844         }
5845         resp->array_len = args->array_len;
5846         resp->array = kmem_zalloc(args->array_len * sizeof (nfs_resop4),
5847             KM_SLEEP);
5848 
5849         cs.basecr = cr;
5850         nsrv4 = nfs4_get_srv();
5851 
5852         DTRACE_NFSV4_2(compound__start, struct compound_state *, &cs,
5853             COMPOUND4args *, args);
5854 
5855         /*
5856          * For now, NFS4 compound processing must be protected by
5857          * exported_lock because it can access more than one exportinfo
5858          * per compound and share/unshare can now change multiple
5859          * exinfo structs.  The NFS2/3 code only refs 1 exportinfo
5860          * per proc (excluding public exinfo), and exi_count design
5861          * is sufficient to protect concurrent execution of NFS2/3
5862          * ops along with unexport.  This lock will be removed as
5863          * part of the NFSv4 phase 2 namespace redesign work.
5864          */
5865         rw_enter(&ne->exported_lock, RW_READER);
5866 
5867         /*
5868          * If this is the first compound we've seen, we need to start all
5869          * new instances' grace periods.
5870          */
5871         if (nsrv4->seen_first_compound == 0) {
5872                 rfs4_grace_start_new(nsrv4);
5873                 /*
5874                  * This must be set after rfs4_grace_start_new(), otherwise
5875                  * another thread could proceed past here before the former
5876                  * is finished.
5877                  */
5878                 nsrv4->seen_first_compound = 1;
5879         }
5880 
5881         for (i = 0; i < args->array_len && cs.cont; i++) {
5882                 nfs_argop4 *argop;
5883                 nfs_resop4 *resop;
5884                 uint_t op;
5885 
5886                 argop = &args->array[i];
5887                 resop = &resp->array[i];
5888                 resop->resop = argop->argop;
5889                 op = (uint_t)resop->resop;
5890 
5891                 if (op < rfsv4disp_cnt) {
5892                         /*
5893                          * Count the individual ops here; NULL and COMPOUND
5894                          * are counted in common_dispatch()
5895                          */
5896                         rfsproccnt_v4_ptr[op].value.ui64++;
5897 
5898                         NFS4_DEBUG(rfs4_debug > 1,
5899                             (CE_NOTE, "Executing %s", rfs4_op_string[op]));
5900                         (*rfsv4disptab[op].dis_proc)(argop, resop, req, &cs);
5901                         NFS4_DEBUG(rfs4_debug > 1, (CE_NOTE, "%s returned %d",
5902                             rfs4_op_string[op], *cs.statusp));
5903                         if (*cs.statusp != NFS4_OK)
5904                                 cs.cont = FALSE;
5905                 } else {
5906                         /*
5907                          * This is effectively dead code since XDR code
5908                          * will have already returned BADXDR if op doesn't
5909                          * decode to legal value.  This only done for a
5910                          * day when XDR code doesn't verify v4 opcodes.
5911                          */
5912                         op = OP_ILLEGAL;
5913                         rfsproccnt_v4_ptr[OP_ILLEGAL_IDX].value.ui64++;
5914 
5915                         rfs4_op_illegal(argop, resop, req, &cs);
5916                         cs.cont = FALSE;
5917                 }
5918 
5919                 /*
5920                  * If not at last op, and if we are to stop, then
5921                  * compact the results array.
5922                  */
5923                 if ((i + 1) < args->array_len && !cs.cont) {
5924                         nfs_resop4 *new_res = kmem_alloc(
5925                             (i+1) * sizeof (nfs_resop4), KM_SLEEP);
5926                         bcopy(resp->array,
5927                             new_res, (i+1) * sizeof (nfs_resop4));
5928                         kmem_free(resp->array,
5929                             args->array_len * sizeof (nfs_resop4));
5930 
5931                         resp->array_len =  i + 1;
5932                         resp->array = new_res;
5933                 }
5934         }
5935 
5936         rw_exit(&ne->exported_lock);
5937 
5938         /*
5939          * clear exportinfo and vnode fields from compound_state before dtrace
5940          * probe, to avoid tracing residual values for path and share path.
5941          */
5942         if (cs.vp)
5943                 VN_RELE(cs.vp);
5944         if (cs.saved_vp)
5945                 VN_RELE(cs.saved_vp);
5946         cs.exi = cs.saved_exi = NULL;
5947         cs.vp = cs.saved_vp = NULL;
5948 
5949         DTRACE_NFSV4_2(compound__done, struct compound_state *, &cs,
5950             COMPOUND4res *, resp);
5951 
5952         if (cs.saved_fh.nfs_fh4_val)
5953                 kmem_free(cs.saved_fh.nfs_fh4_val, NFS4_FHSIZE);
5954 
5955         if (cs.basecr)
5956                 crfree(cs.basecr);
5957         if (cs.cr)
5958                 crfree(cs.cr);
5959         /*
5960          * done with this compound request, free the label
5961          */
5962 
5963         if (req->rq_label != NULL) {
5964                 kmem_free(req->rq_label, sizeof (bslabel_t));
5965                 req->rq_label = NULL;
5966         }
5967 }
5968 
5969 /*
5970  * XXX because of what appears to be duplicate calls to rfs4_compound_free
5971  * XXX zero out the tag and array values. Need to investigate why the
5972  * XXX calls occur, but at least prevent the panic for now.
5973  */
5974 void
5975 rfs4_compound_free(COMPOUND4res *resp)
5976 {
5977         uint_t i;
5978 
5979         if (resp->tag.utf8string_val) {
5980                 UTF8STRING_FREE(resp->tag)
5981         }
5982 
5983         for (i = 0; i < resp->array_len; i++) {
5984                 nfs_resop4 *resop;
5985                 uint_t op;
5986 
5987                 resop = &resp->array[i];
5988                 op = (uint_t)resop->resop;
5989                 if (op < rfsv4disp_cnt) {
5990                         (*rfsv4disptab[op].dis_resfree)(resop);
5991                 }
5992         }
5993         if (resp->array != NULL) {
5994                 kmem_free(resp->array, resp->array_len * sizeof (nfs_resop4));
5995         }
5996 }
5997 
5998 /*
5999  * Process the value of the compound request rpc flags, as a bit-AND
6000  * of the individual per-op flags (idempotent, allowork, publicfh_ok)
6001  */
6002 void
6003 rfs4_compound_flagproc(COMPOUND4args *args, int *flagp)
6004 {
6005         int i;
6006         int flag = RPC_ALL;
6007 
6008         for (i = 0; flag && i < args->array_len; i++) {
6009                 uint_t op;
6010 
6011                 op = (uint_t)args->array[i].argop;
6012 
6013                 if (op < rfsv4disp_cnt)
6014                         flag &= rfsv4disptab[op].dis_flags;
6015                 else
6016                         flag = 0;
6017         }
6018         *flagp = flag;
6019 }
6020 
6021 nfsstat4
6022 rfs4_client_sysid(rfs4_client_t *cp, sysid_t *sp)
6023 {
6024         nfsstat4 e;
6025 
6026         rfs4_dbe_lock(cp->rc_dbe);
6027 
6028         if (cp->rc_sysidt != LM_NOSYSID) {
6029                 *sp = cp->rc_sysidt;
6030                 e = NFS4_OK;
6031 
6032         } else if ((cp->rc_sysidt = lm_alloc_sysidt()) != LM_NOSYSID) {
6033                 *sp = cp->rc_sysidt;
6034                 e = NFS4_OK;
6035 
6036                 NFS4_DEBUG(rfs4_debug, (CE_NOTE,
6037                     "rfs4_client_sysid: allocated 0x%x\n", *sp));
6038         } else
6039                 e = NFS4ERR_DELAY;
6040 
6041         rfs4_dbe_unlock(cp->rc_dbe);
6042         return (e);
6043 }
6044 
6045 #if defined(DEBUG) && ! defined(lint)
6046 static void lock_print(char *str, int operation, struct flock64 *flk)
6047 {
6048         char *op, *type;
6049 
6050         switch (operation) {
6051         case F_GETLK: op = "F_GETLK";
6052                 break;
6053         case F_SETLK: op = "F_SETLK";
6054                 break;
6055         case F_SETLK_NBMAND: op = "F_SETLK_NBMAND";
6056                 break;
6057         default: op = "F_UNKNOWN";
6058                 break;
6059         }
6060         switch (flk->l_type) {
6061         case F_UNLCK: type = "F_UNLCK";
6062                 break;
6063         case F_RDLCK: type = "F_RDLCK";
6064                 break;
6065         case F_WRLCK: type = "F_WRLCK";
6066                 break;
6067         default: type = "F_UNKNOWN";
6068                 break;
6069         }
6070 
6071         ASSERT(flk->l_whence == 0);
6072         cmn_err(CE_NOTE, "%s:  %s, type = %s, off = %llx len = %llx pid = %d",
6073             str, op, type, (longlong_t)flk->l_start,
6074             flk->l_len ? (longlong_t)flk->l_len : ~0LL, flk->l_pid);
6075 }
6076 
6077 #define LOCK_PRINT(d, s, t, f) if (d) lock_print(s, t, f)
6078 #else
6079 #define LOCK_PRINT(d, s, t, f)
6080 #endif
6081 
6082 /*ARGSUSED*/
6083 static bool_t
6084 creds_ok(cred_set_t cr_set, struct svc_req *req, struct compound_state *cs)
6085 {
6086         return (TRUE);
6087 }
6088 
6089 /*
6090  * Look up the pathname using the vp in cs as the directory vnode.
6091  * cs->vp will be the vnode for the file on success
6092  */
6093 
6094 static nfsstat4
6095 rfs4_lookup(component4 *component, struct svc_req *req,
6096     struct compound_state *cs)
6097 {
6098         char *nm;
6099         uint32_t len;
6100         nfsstat4 status;
6101         struct sockaddr *ca;
6102         char *name;
6103 
6104         if (cs->vp == NULL) {
6105                 return (NFS4ERR_NOFILEHANDLE);
6106         }
6107         if (cs->vp->v_type != VDIR) {
6108                 return (NFS4ERR_NOTDIR);
6109         }
6110 
6111         status = utf8_dir_verify(component);
6112         if (status != NFS4_OK)
6113                 return (status);
6114 
6115         nm = utf8_to_fn(component, &len, NULL);
6116         if (nm == NULL) {
6117                 return (NFS4ERR_INVAL);
6118         }
6119 
6120         if (len > MAXNAMELEN) {
6121                 kmem_free(nm, len);
6122                 return (NFS4ERR_NAMETOOLONG);
6123         }
6124 
6125         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
6126         name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
6127             MAXPATHLEN + 1);
6128 
6129         if (name == NULL) {
6130                 kmem_free(nm, len);
6131                 return (NFS4ERR_INVAL);
6132         }
6133 
6134         status = do_rfs4_op_lookup(name, req, cs);
6135 
6136         if (name != nm)
6137                 kmem_free(name, MAXPATHLEN + 1);
6138 
6139         kmem_free(nm, len);
6140 
6141         return (status);
6142 }
6143 
6144 static nfsstat4
6145 rfs4_lookupfile(component4 *component, struct svc_req *req,
6146     struct compound_state *cs, uint32_t access, change_info4 *cinfo)
6147 {
6148         nfsstat4 status;
6149         vnode_t *dvp = cs->vp;
6150         vattr_t bva, ava, fva;
6151         int error;
6152 
6153         /* Get "before" change value */
6154         bva.va_mask = AT_CTIME|AT_SEQ;
6155         error = VOP_GETATTR(dvp, &bva, 0, cs->cr, NULL);
6156         if (error)
6157                 return (puterrno4(error));
6158 
6159         /* rfs4_lookup may VN_RELE directory */
6160         VN_HOLD(dvp);
6161 
6162         status = rfs4_lookup(component, req, cs);
6163         if (status != NFS4_OK) {
6164                 VN_RELE(dvp);
6165                 return (status);
6166         }
6167 
6168         /*
6169          * Get "after" change value, if it fails, simply return the
6170          * before value.
6171          */
6172         ava.va_mask = AT_CTIME|AT_SEQ;
6173         if (VOP_GETATTR(dvp, &ava, 0, cs->cr, NULL)) {
6174                 ava.va_ctime = bva.va_ctime;
6175                 ava.va_seq = 0;
6176         }
6177         VN_RELE(dvp);
6178 
6179         /*
6180          * Validate the file is a file
6181          */
6182         fva.va_mask = AT_TYPE|AT_MODE;
6183         error = VOP_GETATTR(cs->vp, &fva, 0, cs->cr, NULL);
6184         if (error)
6185                 return (puterrno4(error));
6186 
6187         if (fva.va_type != VREG) {
6188                 if (fva.va_type == VDIR)
6189                         return (NFS4ERR_ISDIR);
6190                 if (fva.va_type == VLNK)
6191                         return (NFS4ERR_SYMLINK);
6192                 return (NFS4ERR_INVAL);
6193         }
6194 
6195         NFS4_SET_FATTR4_CHANGE(cinfo->before, bva.va_ctime);
6196         NFS4_SET_FATTR4_CHANGE(cinfo->after, ava.va_ctime);
6197 
6198         /*
6199          * It is undefined if VOP_LOOKUP will change va_seq, so
6200          * cinfo.atomic = TRUE only if we have
6201          * non-zero va_seq's, and they have not changed.
6202          */
6203         if (bva.va_seq && ava.va_seq && ava.va_seq == bva.va_seq)
6204                 cinfo->atomic = TRUE;
6205         else
6206                 cinfo->atomic = FALSE;
6207 
6208         /* Check for mandatory locking */
6209         cs->mandlock = MANDLOCK(cs->vp, fva.va_mode);
6210         return (check_open_access(access, cs, req));
6211 }
6212 
6213 static nfsstat4
6214 create_vnode(vnode_t *dvp, char *nm,  vattr_t *vap, createmode4 mode,
6215     cred_t *cr, vnode_t **vpp, bool_t *created)
6216 {
6217         int error;
6218         nfsstat4 status = NFS4_OK;
6219         vattr_t va;
6220 
6221 tryagain:
6222 
6223         /*
6224          * The file open mode used is VWRITE.  If the client needs
6225          * some other semantic, then it should do the access checking
6226          * itself.  It would have been nice to have the file open mode
6227          * passed as part of the arguments.
6228          */
6229 
6230         *created = TRUE;
6231         error = VOP_CREATE(dvp, nm, vap, EXCL, VWRITE, vpp, cr, 0, NULL, NULL);
6232 
6233         if (error) {
6234                 *created = FALSE;
6235 
6236                 /*
6237                  * If we got something other than file already exists
6238                  * then just return this error.  Otherwise, we got
6239                  * EEXIST.  If we were doing a GUARDED create, then
6240                  * just return this error.  Otherwise, we need to
6241                  * make sure that this wasn't a duplicate of an
6242                  * exclusive create request.
6243                  *
6244                  * The assumption is made that a non-exclusive create
6245                  * request will never return EEXIST.
6246                  */
6247 
6248                 if (error != EEXIST || mode == GUARDED4) {
6249                         status = puterrno4(error);
6250                         return (status);
6251                 }
6252                 error = VOP_LOOKUP(dvp, nm, vpp, NULL, 0, NULL, cr,
6253                     NULL, NULL, NULL);
6254 
6255                 if (error) {
6256                         /*
6257                          * We couldn't find the file that we thought that
6258                          * we just created.  So, we'll just try creating
6259                          * it again.
6260                          */
6261                         if (error == ENOENT)
6262                                 goto tryagain;
6263 
6264                         status = puterrno4(error);
6265                         return (status);
6266                 }
6267 
6268                 if (mode == UNCHECKED4) {
6269                         /* existing object must be regular file */
6270                         if ((*vpp)->v_type != VREG) {
6271                                 if ((*vpp)->v_type == VDIR)
6272                                         status = NFS4ERR_ISDIR;
6273                                 else if ((*vpp)->v_type == VLNK)
6274                                         status = NFS4ERR_SYMLINK;
6275                                 else
6276                                         status = NFS4ERR_INVAL;
6277                                 VN_RELE(*vpp);
6278                                 return (status);
6279                         }
6280 
6281                         return (NFS4_OK);
6282                 }
6283 
6284                 /* Check for duplicate request */
6285                 va.va_mask = AT_MTIME;
6286                 error = VOP_GETATTR(*vpp, &va, 0, cr, NULL);
6287                 if (!error) {
6288                         /* We found the file */
6289                         const timestruc_t *mtime = &vap->va_mtime;
6290 
6291                         if (va.va_mtime.tv_sec != mtime->tv_sec ||
6292                             va.va_mtime.tv_nsec != mtime->tv_nsec) {
6293                                 /* but its not our creation */
6294                                 VN_RELE(*vpp);
6295                                 return (NFS4ERR_EXIST);
6296                         }
6297                         *created = TRUE; /* retrans of create == created */
6298                         return (NFS4_OK);
6299                 }
6300                 VN_RELE(*vpp);
6301                 return (NFS4ERR_EXIST);
6302         }
6303 
6304         return (NFS4_OK);
6305 }
6306 
6307 static nfsstat4
6308 check_open_access(uint32_t access, struct compound_state *cs,
6309     struct svc_req *req)
6310 {
6311         int error;
6312         vnode_t *vp;
6313         bool_t readonly;
6314         cred_t *cr = cs->cr;
6315 
6316         /* For now we don't allow mandatory locking as per V2/V3 */
6317         if (cs->access == CS_ACCESS_DENIED || cs->mandlock) {
6318                 return (NFS4ERR_ACCESS);
6319         }
6320 
6321         vp = cs->vp;
6322         ASSERT(cr != NULL && vp->v_type == VREG);
6323 
6324         /*
6325          * If the file system is exported read only and we are trying
6326          * to open for write, then return NFS4ERR_ROFS
6327          */
6328 
6329         readonly = rdonly4(req, cs);
6330 
6331         if ((access & OPEN4_SHARE_ACCESS_WRITE) && readonly)
6332                 return (NFS4ERR_ROFS);
6333 
6334         if (access & OPEN4_SHARE_ACCESS_READ) {
6335                 if ((VOP_ACCESS(vp, VREAD, 0, cr, NULL) != 0) &&
6336                     (VOP_ACCESS(vp, VEXEC, 0, cr, NULL) != 0)) {
6337                         return (NFS4ERR_ACCESS);
6338                 }
6339         }
6340 
6341         if (access & OPEN4_SHARE_ACCESS_WRITE) {
6342                 error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
6343                 if (error)
6344                         return (NFS4ERR_ACCESS);
6345         }
6346 
6347         return (NFS4_OK);
6348 }
6349 
6350 static nfsstat4
6351 rfs4_createfile(OPEN4args *args, struct svc_req *req, struct compound_state *cs,
6352     change_info4 *cinfo, bitmap4 *attrset, clientid4 clientid)
6353 {
6354         struct nfs4_svgetit_arg sarg;
6355         struct nfs4_ntov_table ntov;
6356 
6357         bool_t ntov_table_init = FALSE;
6358         struct statvfs64 sb;
6359         nfsstat4 status;
6360         vnode_t *vp;
6361         vattr_t bva, ava, iva, cva, *vap;
6362         vnode_t *dvp;
6363         timespec32_t *mtime;
6364         char *nm = NULL;
6365         uint_t buflen;
6366         bool_t created;
6367         bool_t setsize = FALSE;
6368         len_t reqsize;
6369         int error;
6370         bool_t trunc;
6371         caller_context_t ct;
6372         component4 *component;
6373         bslabel_t *clabel;
6374         struct sockaddr *ca;
6375         char *name = NULL;
6376 
6377         sarg.sbp = &sb;
6378         sarg.is_referral = B_FALSE;
6379 
6380         dvp = cs->vp;
6381 
6382         /* Check if the file system is read only */
6383         if (rdonly4(req, cs))
6384                 return (NFS4ERR_ROFS);
6385 
6386         /* check the label of including directory */
6387         if (is_system_labeled()) {
6388                 ASSERT(req->rq_label != NULL);
6389                 clabel = req->rq_label;
6390                 DTRACE_PROBE2(tx__rfs4__log__info__opremove__clabel, char *,
6391                     "got client label from request(1)",
6392                     struct svc_req *, req);
6393                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
6394                         if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
6395                             cs->exi)) {
6396                                 return (NFS4ERR_ACCESS);
6397                         }
6398                 }
6399         }
6400 
6401         /*
6402          * Get the last component of path name in nm. cs will reference
6403          * the including directory on success.
6404          */
6405         component = &args->open_claim4_u.file;
6406         status = utf8_dir_verify(component);
6407         if (status != NFS4_OK)
6408                 return (status);
6409 
6410         nm = utf8_to_fn(component, &buflen, NULL);
6411 
6412         if (nm == NULL)
6413                 return (NFS4ERR_RESOURCE);
6414 
6415         if (buflen > MAXNAMELEN) {
6416                 kmem_free(nm, buflen);
6417                 return (NFS4ERR_NAMETOOLONG);
6418         }
6419 
6420         bva.va_mask = AT_TYPE|AT_CTIME|AT_SEQ;
6421         error = VOP_GETATTR(dvp, &bva, 0, cs->cr, NULL);
6422         if (error) {
6423                 kmem_free(nm, buflen);
6424                 return (puterrno4(error));
6425         }
6426 
6427         if (bva.va_type != VDIR) {
6428                 kmem_free(nm, buflen);
6429                 return (NFS4ERR_NOTDIR);
6430         }
6431 
6432         NFS4_SET_FATTR4_CHANGE(cinfo->before, bva.va_ctime)
6433 
6434         switch (args->mode) {
6435         case GUARDED4:
6436                 /*FALLTHROUGH*/
6437         case UNCHECKED4:
6438                 nfs4_ntov_table_init(&ntov);
6439                 ntov_table_init = TRUE;
6440 
6441                 *attrset = 0;
6442                 status = do_rfs4_set_attrs(attrset,
6443                     &args->createhow4_u.createattrs,
6444                     cs, &sarg, &ntov, NFS4ATTR_SETIT);
6445 
6446                 if (status == NFS4_OK && (sarg.vap->va_mask & AT_TYPE) &&
6447                     sarg.vap->va_type != VREG) {
6448                         if (sarg.vap->va_type == VDIR)
6449                                 status = NFS4ERR_ISDIR;
6450                         else if (sarg.vap->va_type == VLNK)
6451                                 status = NFS4ERR_SYMLINK;
6452                         else
6453                                 status = NFS4ERR_INVAL;
6454                 }
6455 
6456                 if (status != NFS4_OK) {
6457                         kmem_free(nm, buflen);
6458                         nfs4_ntov_table_free(&ntov, &sarg);
6459                         *attrset = 0;
6460                         return (status);
6461                 }
6462 
6463                 vap = sarg.vap;
6464                 vap->va_type = VREG;
6465                 vap->va_mask |= AT_TYPE;
6466 
6467                 if ((vap->va_mask & AT_MODE) == 0) {
6468                         vap->va_mask |= AT_MODE;
6469                         vap->va_mode = (mode_t)0600;
6470                 }
6471 
6472                 if (vap->va_mask & AT_SIZE) {
6473 
6474                         /* Disallow create with a non-zero size */
6475 
6476                         if ((reqsize = sarg.vap->va_size) != 0) {
6477                                 kmem_free(nm, buflen);
6478                                 nfs4_ntov_table_free(&ntov, &sarg);
6479                                 *attrset = 0;
6480                                 return (NFS4ERR_INVAL);
6481                         }
6482                         setsize = TRUE;
6483                 }
6484                 break;
6485 
6486         case EXCLUSIVE4:
6487                 /* prohibit EXCL create of named attributes */
6488                 if (dvp->v_flag & V_XATTRDIR) {
6489                         kmem_free(nm, buflen);
6490                         *attrset = 0;
6491                         return (NFS4ERR_INVAL);
6492                 }
6493 
6494                 cva.va_mask = AT_TYPE | AT_MTIME | AT_MODE;
6495                 cva.va_type = VREG;
6496                 /*
6497                  * Ensure no time overflows. Assumes underlying
6498                  * filesystem supports at least 32 bits.
6499                  * Truncate nsec to usec resolution to allow valid
6500                  * compares even if the underlying filesystem truncates.
6501                  */
6502                 mtime = (timespec32_t *)&args->createhow4_u.createverf;
6503                 cva.va_mtime.tv_sec = mtime->tv_sec % TIME32_MAX;
6504                 cva.va_mtime.tv_nsec = (mtime->tv_nsec / 1000) * 1000;
6505                 cva.va_mode = (mode_t)0;
6506                 vap = &cva;
6507 
6508                 /*
6509                  * For EXCL create, attrset is set to the server attr
6510                  * used to cache the client's verifier.
6511                  */
6512                 *attrset = FATTR4_TIME_MODIFY_MASK;
6513                 break;
6514         }
6515 
6516         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
6517         name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
6518             MAXPATHLEN  + 1);
6519 
6520         if (name == NULL) {
6521                 kmem_free(nm, buflen);
6522                 return (NFS4ERR_SERVERFAULT);
6523         }
6524 
6525         status = create_vnode(dvp, name, vap, args->mode,
6526             cs->cr, &vp, &created);
6527         if (nm != name)
6528                 kmem_free(name, MAXPATHLEN + 1);
6529         kmem_free(nm, buflen);
6530 
6531         if (status != NFS4_OK) {
6532                 if (ntov_table_init)
6533                         nfs4_ntov_table_free(&ntov, &sarg);
6534                 *attrset = 0;
6535                 return (status);
6536         }
6537 
6538         trunc = (setsize && !created);
6539 
6540         if (args->mode != EXCLUSIVE4) {
6541                 bitmap4 createmask = args->createhow4_u.createattrs.attrmask;
6542 
6543                 /*
6544                  * True verification that object was created with correct
6545                  * attrs is impossible.  The attrs could have been changed
6546                  * immediately after object creation.  If attributes did
6547                  * not verify, the only recourse for the server is to
6548                  * destroy the object.  Maybe if some attrs (like gid)
6549                  * are set incorrectly, the object should be destroyed;
6550                  * however, seems bad as a default policy.  Do we really
6551                  * want to destroy an object over one of the times not
6552                  * verifying correctly?  For these reasons, the server
6553                  * currently sets bits in attrset for createattrs
6554                  * that were set; however, no verification is done.
6555                  *
6556                  * vmask_to_nmask accounts for vattr bits set on create
6557                  *      [do_rfs4_set_attrs() only sets resp bits for
6558                  *       non-vattr/vfs bits.]
6559                  * Mask off any bits we set by default so as not to return
6560                  * more attrset bits than were requested in createattrs
6561                  */
6562                 if (created) {
6563                         nfs4_vmask_to_nmask(sarg.vap->va_mask, attrset);
6564                         *attrset &= createmask;
6565                 } else {
6566                         /*
6567                          * We did not create the vnode (we tried but it
6568                          * already existed).  In this case, the only createattr
6569                          * that the spec allows the server to set is size,
6570                          * and even then, it can only be set if it is 0.
6571                          */
6572                         *attrset = 0;
6573                         if (trunc)
6574                                 *attrset = FATTR4_SIZE_MASK;
6575                 }
6576         }
6577         if (ntov_table_init)
6578                 nfs4_ntov_table_free(&ntov, &sarg);
6579 
6580         /*
6581          * Get the initial "after" sequence number, if it fails,
6582          * set to zero, time to before.
6583          */
6584         iva.va_mask = AT_CTIME|AT_SEQ;
6585         if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL)) {
6586                 iva.va_seq = 0;
6587                 iva.va_ctime = bva.va_ctime;
6588         }
6589 
6590         /*
6591          * create_vnode attempts to create the file exclusive,
6592          * if it already exists the VOP_CREATE will fail and
6593          * may not increase va_seq. It is atomic if
6594          * we haven't changed the directory, but if it has changed
6595          * we don't know what changed it.
6596          */
6597         if (!created) {
6598                 if (bva.va_seq && iva.va_seq &&
6599                     bva.va_seq == iva.va_seq)
6600                         cinfo->atomic = TRUE;
6601                 else
6602                         cinfo->atomic = FALSE;
6603                 NFS4_SET_FATTR4_CHANGE(cinfo->after, iva.va_ctime);
6604         } else {
6605                 /*
6606                  * The entry was created, we need to sync the
6607                  * directory metadata.
6608                  */
6609                 (void) VOP_FSYNC(dvp, 0, cs->cr, NULL);
6610 
6611                 /*
6612                  * Get "after" change value, if it fails, simply return the
6613                  * before value.
6614                  */
6615                 ava.va_mask = AT_CTIME|AT_SEQ;
6616                 if (VOP_GETATTR(dvp, &ava, 0, cs->cr, NULL)) {
6617                         ava.va_ctime = bva.va_ctime;
6618                         ava.va_seq = 0;
6619                 }
6620 
6621                 NFS4_SET_FATTR4_CHANGE(cinfo->after, ava.va_ctime);
6622 
6623                 /*
6624                  * The cinfo->atomic = TRUE only if we have
6625                  * non-zero va_seq's, and it has incremented by exactly one
6626                  * during the create_vnode and it didn't
6627                  * change during the VOP_FSYNC.
6628                  */
6629                 if (bva.va_seq && iva.va_seq && ava.va_seq &&
6630                     iva.va_seq == (bva.va_seq + 1) && iva.va_seq == ava.va_seq)
6631                         cinfo->atomic = TRUE;
6632                 else
6633                         cinfo->atomic = FALSE;
6634         }
6635 
6636         /* Check for mandatory locking and that the size gets set. */
6637         cva.va_mask = AT_MODE;
6638         if (setsize)
6639                 cva.va_mask |= AT_SIZE;
6640 
6641         /* Assume the worst */
6642         cs->mandlock = TRUE;
6643 
6644         if (VOP_GETATTR(vp, &cva, 0, cs->cr, NULL) == 0) {
6645                 cs->mandlock = MANDLOCK(cs->vp, cva.va_mode);
6646 
6647                 /*
6648                  * Truncate the file if necessary; this would be
6649                  * the case for create over an existing file.
6650                  */
6651 
6652                 if (trunc) {
6653                         int in_crit = 0;
6654                         rfs4_file_t *fp;
6655                         nfs4_srv_t *nsrv4;
6656                         bool_t create = FALSE;
6657 
6658                         /*
6659                          * We are writing over an existing file.
6660                          * Check to see if we need to recall a delegation.
6661                          */
6662                         nsrv4 = nfs4_get_srv();
6663                         rfs4_hold_deleg_policy(nsrv4);
6664                         if ((fp = rfs4_findfile(vp, NULL, &create)) != NULL) {
6665                                 if (rfs4_check_delegated_byfp(FWRITE, fp,
6666                                     (reqsize == 0), FALSE, FALSE, &clientid)) {
6667                                         rfs4_file_rele(fp);
6668                                         rfs4_rele_deleg_policy(nsrv4);
6669                                         VN_RELE(vp);
6670                                         *attrset = 0;
6671                                         return (NFS4ERR_DELAY);
6672                                 }
6673                                 rfs4_file_rele(fp);
6674                         }
6675                         rfs4_rele_deleg_policy(nsrv4);
6676 
6677                         if (nbl_need_check(vp)) {
6678                                 in_crit = 1;
6679 
6680                                 ASSERT(reqsize == 0);
6681 
6682                                 nbl_start_crit(vp, RW_READER);
6683                                 if (nbl_conflict(vp, NBL_WRITE, 0,
6684                                     cva.va_size, 0, NULL)) {
6685                                         in_crit = 0;
6686                                         nbl_end_crit(vp);
6687                                         VN_RELE(vp);
6688                                         *attrset = 0;
6689                                         return (NFS4ERR_ACCESS);
6690                                 }
6691                         }
6692                         ct.cc_sysid = 0;
6693                         ct.cc_pid = 0;
6694                         ct.cc_caller_id = nfs4_srv_caller_id;
6695                         ct.cc_flags = CC_DONTBLOCK;
6696 
6697                         cva.va_mask = AT_SIZE;
6698                         cva.va_size = reqsize;
6699                         (void) VOP_SETATTR(vp, &cva, 0, cs->cr, &ct);
6700                         if (in_crit)
6701                                 nbl_end_crit(vp);
6702                 }
6703         }
6704 
6705         error = makefh4(&cs->fh, vp, cs->exi);
6706 
6707         /*
6708          * Force modified data and metadata out to stable storage.
6709          */
6710         (void) VOP_FSYNC(vp, FNODSYNC, cs->cr, NULL);
6711 
6712         if (error) {
6713                 VN_RELE(vp);
6714                 *attrset = 0;
6715                 return (puterrno4(error));
6716         }
6717 
6718         /* if parent dir is attrdir, set namedattr fh flag */
6719         if (dvp->v_flag & V_XATTRDIR)
6720                 set_fh4_flag(&cs->fh, FH4_NAMEDATTR);
6721 
6722         if (cs->vp)
6723                 VN_RELE(cs->vp);
6724 
6725         cs->vp = vp;
6726 
6727         /*
6728          * if we did not create the file, we will need to check
6729          * the access bits on the file
6730          */
6731 
6732         if (!created) {
6733                 if (setsize)
6734                         args->share_access |= OPEN4_SHARE_ACCESS_WRITE;
6735                 status = check_open_access(args->share_access, cs, req);
6736                 if (status != NFS4_OK)
6737                         *attrset = 0;
6738         }
6739         return (status);
6740 }
6741 
6742 /*ARGSUSED*/
6743 static void
6744 rfs4_do_open(struct compound_state *cs, struct svc_req *req,
6745     rfs4_openowner_t *oo, delegreq_t deleg,
6746     uint32_t access, uint32_t deny,
6747     OPEN4res *resp, int deleg_cur)
6748 {
6749         /* XXX Currently not using req  */
6750         rfs4_state_t *sp;
6751         rfs4_file_t *fp;
6752         bool_t screate = TRUE;
6753         bool_t fcreate = TRUE;
6754         uint32_t open_a, share_a;
6755         uint32_t open_d, share_d;
6756         rfs4_deleg_state_t *dsp;
6757         sysid_t sysid;
6758         nfsstat4 status;
6759         caller_context_t ct;
6760         int fflags = 0;
6761         int recall = 0;
6762         int err;
6763         int first_open;
6764 
6765         /* get the file struct and hold a lock on it during initial open */
6766         fp = rfs4_findfile_withlock(cs->vp, &cs->fh, &fcreate);
6767         if (fp == NULL) {
6768                 resp->status = NFS4ERR_RESOURCE;
6769                 DTRACE_PROBE1(nfss__e__do__open1, nfsstat4, resp->status);
6770                 return;
6771         }
6772 
6773         sp = rfs4_findstate_by_owner_file(oo, fp, &screate);
6774         if (sp == NULL) {
6775                 resp->status = NFS4ERR_RESOURCE;
6776                 DTRACE_PROBE1(nfss__e__do__open2, nfsstat4, resp->status);
6777                 /* No need to keep any reference */
6778                 rw_exit(&fp->rf_file_rwlock);
6779                 rfs4_file_rele(fp);
6780                 return;
6781         }
6782 
6783         /* try to get the sysid before continuing */
6784         if ((status = rfs4_client_sysid(oo->ro_client, &sysid)) != NFS4_OK) {
6785                 resp->status = status;
6786                 rfs4_file_rele(fp);
6787                 /* Not a fully formed open; "close" it */
6788                 if (screate == TRUE)
6789                         rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6790                 rfs4_state_rele(sp);
6791                 return;
6792         }
6793 
6794         /* Calculate the fflags for this OPEN. */
6795         if (access & OPEN4_SHARE_ACCESS_READ)
6796                 fflags |= FREAD;
6797         if (access & OPEN4_SHARE_ACCESS_WRITE)
6798                 fflags |= FWRITE;
6799 
6800         rfs4_dbe_lock(sp->rs_dbe);
6801 
6802         /*
6803          * Calculate the new deny and access mode that this open is adding to
6804          * the file for this open owner;
6805          */
6806         open_d = (deny & ~sp->rs_open_deny);
6807         open_a = (access & ~sp->rs_open_access);
6808 
6809         /*
6810          * Calculate the new share access and share deny modes that this open
6811          * is adding to the file for this open owner;
6812          */
6813         share_a = (access & ~sp->rs_share_access);
6814         share_d = (deny & ~sp->rs_share_deny);
6815 
6816         first_open = (sp->rs_open_access & OPEN4_SHARE_ACCESS_BOTH) == 0;
6817 
6818         /*
6819          * Check to see the client has already sent an open for this
6820          * open owner on this file with the same share/deny modes.
6821          * If so, we don't need to check for a conflict and we don't
6822          * need to add another shrlock.  If not, then we need to
6823          * check for conflicts in deny and access before checking for
6824          * conflicts in delegation.  We don't want to recall a
6825          * delegation based on an open that will eventually fail based
6826          * on shares modes.
6827          */
6828 
6829         if (share_a || share_d) {
6830                 if ((err = rfs4_share(sp, access, deny)) != 0) {
6831                         rfs4_dbe_unlock(sp->rs_dbe);
6832                         resp->status = err;
6833 
6834                         rfs4_file_rele(fp);
6835                         /* Not a fully formed open; "close" it */
6836                         if (screate == TRUE)
6837                                 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6838                         rfs4_state_rele(sp);
6839                         return;
6840                 }
6841         }
6842 
6843         rfs4_dbe_lock(fp->rf_dbe);
6844 
6845         /*
6846          * Check to see if this file is delegated and if so, if a
6847          * recall needs to be done.
6848          */
6849         if (rfs4_check_recall(sp, access)) {
6850                 rfs4_dbe_unlock(fp->rf_dbe);
6851                 rfs4_dbe_unlock(sp->rs_dbe);
6852                 rfs4_recall_deleg(fp, FALSE, sp->rs_owner->ro_client);
6853                 delay(NFS4_DELEGATION_CONFLICT_DELAY);
6854                 rfs4_dbe_lock(sp->rs_dbe);
6855 
6856                 /* if state closed while lock was dropped */
6857                 if (sp->rs_closed) {
6858                         if (share_a || share_d)
6859                                 (void) rfs4_unshare(sp);
6860                         rfs4_dbe_unlock(sp->rs_dbe);
6861                         rfs4_file_rele(fp);
6862                         /* Not a fully formed open; "close" it */
6863                         if (screate == TRUE)
6864                                 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6865                         rfs4_state_rele(sp);
6866                         resp->status = NFS4ERR_OLD_STATEID;
6867                         return;
6868                 }
6869 
6870                 rfs4_dbe_lock(fp->rf_dbe);
6871                 /* Let's see if the delegation was returned */
6872                 if (rfs4_check_recall(sp, access)) {
6873                         rfs4_dbe_unlock(fp->rf_dbe);
6874                         if (share_a || share_d)
6875                                 (void) rfs4_unshare(sp);
6876                         rfs4_dbe_unlock(sp->rs_dbe);
6877                         rfs4_file_rele(fp);
6878                         rfs4_update_lease(sp->rs_owner->ro_client);
6879 
6880                         /* Not a fully formed open; "close" it */
6881                         if (screate == TRUE)
6882                                 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6883                         rfs4_state_rele(sp);
6884                         resp->status = NFS4ERR_DELAY;
6885                         return;
6886                 }
6887         }
6888         /*
6889          * the share check passed and any delegation conflict has been
6890          * taken care of, now call vop_open.
6891          * if this is the first open then call vop_open with fflags.
6892          * if not, call vn_open_upgrade with just the upgrade flags.
6893          *
6894          * if the file has been opened already, it will have the current
6895          * access mode in the state struct.  if it has no share access, then
6896          * this is a new open.
6897          *
6898          * However, if this is open with CLAIM_DLEGATE_CUR, then don't
6899          * call VOP_OPEN(), just do the open upgrade.
6900          */
6901         if (first_open && !deleg_cur) {
6902                 ct.cc_sysid = sysid;
6903                 ct.cc_pid = rfs4_dbe_getid(sp->rs_owner->ro_dbe);
6904                 ct.cc_caller_id = nfs4_srv_caller_id;
6905                 ct.cc_flags = CC_DONTBLOCK;
6906                 err = VOP_OPEN(&cs->vp, fflags, cs->cr, &ct);
6907                 if (err) {
6908                         rfs4_dbe_unlock(fp->rf_dbe);
6909                         if (share_a || share_d)
6910                                 (void) rfs4_unshare(sp);
6911                         rfs4_dbe_unlock(sp->rs_dbe);
6912                         rfs4_file_rele(fp);
6913 
6914                         /* Not a fully formed open; "close" it */
6915                         if (screate == TRUE)
6916                                 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6917                         rfs4_state_rele(sp);
6918                         /* check if a monitor detected a delegation conflict */
6919                         if (err == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK))
6920                                 resp->status = NFS4ERR_DELAY;
6921                         else
6922                                 resp->status = NFS4ERR_SERVERFAULT;
6923                         return;
6924                 }
6925         } else { /* open upgrade */
6926                 /*
6927                  * calculate the fflags for the new mode that is being added
6928                  * by this upgrade.
6929                  */
6930                 fflags = 0;
6931                 if (open_a & OPEN4_SHARE_ACCESS_READ)
6932                         fflags |= FREAD;
6933                 if (open_a & OPEN4_SHARE_ACCESS_WRITE)
6934                         fflags |= FWRITE;
6935                 vn_open_upgrade(cs->vp, fflags);
6936         }
6937         sp->rs_open_access |= access;
6938         sp->rs_open_deny |= deny;
6939 
6940         if (open_d & OPEN4_SHARE_DENY_READ)
6941                 fp->rf_deny_read++;
6942         if (open_d & OPEN4_SHARE_DENY_WRITE)
6943                 fp->rf_deny_write++;
6944         fp->rf_share_deny |= deny;
6945 
6946         if (open_a & OPEN4_SHARE_ACCESS_READ)
6947                 fp->rf_access_read++;
6948         if (open_a & OPEN4_SHARE_ACCESS_WRITE)
6949                 fp->rf_access_write++;
6950         fp->rf_share_access |= access;
6951 
6952         /*
6953          * Check for delegation here. if the deleg argument is not
6954          * DELEG_ANY, then this is a reclaim from a client and
6955          * we must honor the delegation requested. If necessary we can
6956          * set the recall flag.
6957          */
6958 
6959         dsp = rfs4_grant_delegation(deleg, sp, &recall);
6960 
6961         cs->deleg = (fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_WRITE);
6962 
6963         next_stateid(&sp->rs_stateid);
6964 
6965         resp->stateid = sp->rs_stateid.stateid;
6966 
6967         rfs4_dbe_unlock(fp->rf_dbe);
6968         rfs4_dbe_unlock(sp->rs_dbe);
6969 
6970         if (dsp) {
6971                 rfs4_set_deleg_response(dsp, &resp->delegation, NULL, recall);
6972                 rfs4_deleg_state_rele(dsp);
6973         }
6974 
6975         rfs4_file_rele(fp);
6976         rfs4_state_rele(sp);
6977 
6978         resp->status = NFS4_OK;
6979 }
6980 
6981 /*ARGSUSED*/
6982 static void
6983 rfs4_do_opennull(struct compound_state *cs, struct svc_req *req,
6984     OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
6985 {
6986         change_info4 *cinfo = &resp->cinfo;
6987         bitmap4 *attrset = &resp->attrset;
6988 
6989         if (args->opentype == OPEN4_NOCREATE)
6990                 resp->status = rfs4_lookupfile(&args->open_claim4_u.file,
6991                     req, cs, args->share_access, cinfo);
6992         else {
6993                 /* inhibit delegation grants during exclusive create */
6994 
6995                 if (args->mode == EXCLUSIVE4)
6996                         rfs4_disable_delegation();
6997 
6998                 resp->status = rfs4_createfile(args, req, cs, cinfo, attrset,
6999                     oo->ro_client->rc_clientid);
7000         }
7001 
7002         if (resp->status == NFS4_OK) {
7003 
7004                 /* cs->vp cs->fh now reference the desired file */
7005 
7006                 rfs4_do_open(cs, req, oo,
7007                     oo->ro_need_confirm ? DELEG_NONE : DELEG_ANY,
7008                     args->share_access, args->share_deny, resp, 0);
7009 
7010                 /*
7011                  * If rfs4_createfile set attrset, we must
7012                  * clear this attrset before the response is copied.
7013                  */
7014                 if (resp->status != NFS4_OK && resp->attrset) {
7015                         resp->attrset = 0;
7016                 }
7017         }
7018         else
7019                 *cs->statusp = resp->status;
7020 
7021         if (args->mode == EXCLUSIVE4)
7022                 rfs4_enable_delegation();
7023 }
7024 
7025 /*ARGSUSED*/
7026 static void
7027 rfs4_do_openprev(struct compound_state *cs, struct svc_req *req,
7028     OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
7029 {
7030         change_info4 *cinfo = &resp->cinfo;
7031         vattr_t va;
7032         vtype_t v_type = cs->vp->v_type;
7033         int error = 0;
7034 
7035         /* Verify that we have a regular file */
7036         if (v_type != VREG) {
7037                 if (v_type == VDIR)
7038                         resp->status = NFS4ERR_ISDIR;
7039                 else if (v_type == VLNK)
7040                         resp->status = NFS4ERR_SYMLINK;
7041                 else
7042                         resp->status = NFS4ERR_INVAL;
7043                 return;
7044         }
7045 
7046         va.va_mask = AT_MODE|AT_UID;
7047         error = VOP_GETATTR(cs->vp, &va, 0, cs->cr, NULL);
7048         if (error) {
7049                 resp->status = puterrno4(error);
7050                 return;
7051         }
7052 
7053         cs->mandlock = MANDLOCK(cs->vp, va.va_mode);
7054 
7055         /*
7056          * Check if we have access to the file, Note the the file
7057          * could have originally been open UNCHECKED or GUARDED
7058          * with mode bits that will now fail, but there is nothing
7059          * we can really do about that except in the case that the
7060          * owner of the file is the one requesting the open.
7061          */
7062         if (crgetuid(cs->cr) != va.va_uid) {
7063                 resp->status = check_open_access(args->share_access, cs, req);
7064                 if (resp->status != NFS4_OK) {
7065                         return;
7066                 }
7067         }
7068 
7069         /*
7070          * cinfo on a CLAIM_PREVIOUS is undefined, initialize to zero
7071          */
7072         cinfo->before = 0;
7073         cinfo->after = 0;
7074         cinfo->atomic = FALSE;
7075 
7076         rfs4_do_open(cs, req, oo,
7077             NFS4_DELEG4TYPE2REQTYPE(args->open_claim4_u.delegate_type),
7078             args->share_access, args->share_deny, resp, 0);
7079 }
7080 
7081 static void
7082 rfs4_do_opendelcur(struct compound_state *cs, struct svc_req *req,
7083     OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
7084 {
7085         int error;
7086         nfsstat4 status;
7087         stateid4 stateid =
7088             args->open_claim4_u.delegate_cur_info.delegate_stateid;
7089         rfs4_deleg_state_t *dsp;
7090 
7091         /*
7092          * Find the state info from the stateid and confirm that the
7093          * file is delegated.  If the state openowner is the same as
7094          * the supplied openowner we're done. If not, get the file
7095          * info from the found state info. Use that file info to
7096          * create the state for this lock owner. Note solaris doen't
7097          * really need the pathname to find the file. We may want to
7098          * lookup the pathname and make sure that the vp exist and
7099          * matches the vp in the file structure. However it is
7100          * possible that the pathname nolonger exists (local process
7101          * unlinks the file), so this may not be that useful.
7102          */
7103 
7104         status = rfs4_get_deleg_state(&stateid, &dsp);
7105         if (status != NFS4_OK) {
7106                 resp->status = status;
7107                 return;
7108         }
7109 
7110         ASSERT(dsp->rds_finfo->rf_dinfo.rd_dtype != OPEN_DELEGATE_NONE);
7111 
7112         /*
7113          * New lock owner, create state. Since this was probably called
7114          * in response to a CB_RECALL we set deleg to DELEG_NONE
7115          */
7116 
7117         ASSERT(cs->vp != NULL);
7118         VN_RELE(cs->vp);
7119         VN_HOLD(dsp->rds_finfo->rf_vp);
7120         cs->vp = dsp->rds_finfo->rf_vp;
7121 
7122         if (error = makefh4(&cs->fh, cs->vp, cs->exi)) {
7123                 rfs4_deleg_state_rele(dsp);
7124                 *cs->statusp = resp->status = puterrno4(error);
7125                 return;
7126         }
7127 
7128         /* Mark progress for delegation returns */
7129         dsp->rds_finfo->rf_dinfo.rd_time_lastwrite = gethrestime_sec();
7130         rfs4_deleg_state_rele(dsp);
7131         rfs4_do_open(cs, req, oo, DELEG_NONE,
7132             args->share_access, args->share_deny, resp, 1);
7133 }
7134 
7135 /*ARGSUSED*/
7136 static void
7137 rfs4_do_opendelprev(struct compound_state *cs, struct svc_req *req,
7138     OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
7139 {
7140         /*
7141          * Lookup the pathname, it must already exist since this file
7142          * was delegated.
7143          *
7144          * Find the file and state info for this vp and open owner pair.
7145          *      check that they are in fact delegated.
7146          *      check that the state access and deny modes are the same.
7147          *
7148          * Return the delgation possibly seting the recall flag.
7149          */
7150         rfs4_file_t *fp;
7151         rfs4_state_t *sp;
7152         bool_t create = FALSE;
7153         bool_t dcreate = FALSE;
7154         rfs4_deleg_state_t *dsp;
7155         nfsace4 *ace;
7156 
7157         /* Note we ignore oflags */
7158         resp->status = rfs4_lookupfile(&args->open_claim4_u.file_delegate_prev,
7159             req, cs, args->share_access, &resp->cinfo);
7160 
7161         if (resp->status != NFS4_OK) {
7162                 return;
7163         }
7164 
7165         /* get the file struct and hold a lock on it during initial open */
7166         fp = rfs4_findfile_withlock(cs->vp, NULL, &create);
7167         if (fp == NULL) {
7168                 resp->status = NFS4ERR_RESOURCE;
7169                 DTRACE_PROBE1(nfss__e__do_opendelprev1, nfsstat4, resp->status);
7170                 return;
7171         }
7172 
7173         sp = rfs4_findstate_by_owner_file(oo, fp, &create);
7174         if (sp == NULL) {
7175                 resp->status = NFS4ERR_SERVERFAULT;
7176                 DTRACE_PROBE1(nfss__e__do_opendelprev2, nfsstat4, resp->status);
7177                 rw_exit(&fp->rf_file_rwlock);
7178                 rfs4_file_rele(fp);
7179                 return;
7180         }
7181 
7182         rfs4_dbe_lock(sp->rs_dbe);
7183         rfs4_dbe_lock(fp->rf_dbe);
7184         if (args->share_access != sp->rs_share_access ||
7185             args->share_deny != sp->rs_share_deny ||
7186             sp->rs_finfo->rf_dinfo.rd_dtype == OPEN_DELEGATE_NONE) {
7187                 NFS4_DEBUG(rfs4_debug,
7188                     (CE_NOTE, "rfs4_do_opendelprev: state mixup"));
7189                 rfs4_dbe_unlock(fp->rf_dbe);
7190                 rfs4_dbe_unlock(sp->rs_dbe);
7191                 rfs4_file_rele(fp);
7192                 rfs4_state_rele(sp);
7193                 resp->status = NFS4ERR_SERVERFAULT;
7194                 return;
7195         }
7196         rfs4_dbe_unlock(fp->rf_dbe);
7197         rfs4_dbe_unlock(sp->rs_dbe);
7198 
7199         dsp = rfs4_finddeleg(sp, &dcreate);
7200         if (dsp == NULL) {
7201                 rfs4_state_rele(sp);
7202                 rfs4_file_rele(fp);
7203                 resp->status = NFS4ERR_SERVERFAULT;
7204                 return;
7205         }
7206 
7207         next_stateid(&sp->rs_stateid);
7208 
7209         resp->stateid = sp->rs_stateid.stateid;
7210 
7211         resp->delegation.delegation_type = dsp->rds_dtype;
7212 
7213         if (dsp->rds_dtype == OPEN_DELEGATE_READ) {
7214                 open_read_delegation4 *rv =
7215                     &resp->delegation.open_delegation4_u.read;
7216 
7217                 rv->stateid = dsp->rds_delegid.stateid;
7218                 rv->recall = FALSE; /* no policy in place to set to TRUE */
7219                 ace = &rv->permissions;
7220         } else {
7221                 open_write_delegation4 *rv =
7222                     &resp->delegation.open_delegation4_u.write;
7223 
7224                 rv->stateid = dsp->rds_delegid.stateid;
7225                 rv->recall = FALSE;  /* no policy in place to set to TRUE */
7226                 ace = &rv->permissions;
7227                 rv->space_limit.limitby = NFS_LIMIT_SIZE;
7228                 rv->space_limit.nfs_space_limit4_u.filesize = UINT64_MAX;
7229         }
7230 
7231         /* XXX For now */
7232         ace->type = ACE4_ACCESS_ALLOWED_ACE_TYPE;
7233         ace->flag = 0;
7234         ace->access_mask = 0;
7235         ace->who.utf8string_len = 0;
7236         ace->who.utf8string_val = 0;
7237 
7238         rfs4_deleg_state_rele(dsp);
7239         rfs4_state_rele(sp);
7240         rfs4_file_rele(fp);
7241 }
7242 
7243 typedef enum {
7244         NFS4_CHKSEQ_OKAY = 0,
7245         NFS4_CHKSEQ_REPLAY = 1,
7246         NFS4_CHKSEQ_BAD = 2
7247 } rfs4_chkseq_t;
7248 
7249 /*
7250  * Generic function for sequence number checks.
7251  */
7252 static rfs4_chkseq_t
7253 rfs4_check_seqid(seqid4 seqid, nfs_resop4 *lastop,
7254     seqid4 rqst_seq, nfs_resop4 *resop, bool_t copyres)
7255 {
7256         /* Same sequence ids and matching operations? */
7257         if (seqid == rqst_seq && resop->resop == lastop->resop) {
7258                 if (copyres == TRUE) {
7259                         rfs4_free_reply(resop);
7260                         rfs4_copy_reply(resop, lastop);
7261                 }
7262                 NFS4_DEBUG(rfs4_debug, (CE_NOTE,
7263                     "Replayed SEQID %d\n", seqid));
7264                 return (NFS4_CHKSEQ_REPLAY);
7265         }
7266 
7267         /* If the incoming sequence is not the next expected then it is bad */
7268         if (rqst_seq != seqid + 1) {
7269                 if (rqst_seq == seqid) {
7270                         NFS4_DEBUG(rfs4_debug,
7271                             (CE_NOTE, "BAD SEQID: Replayed sequence id "
7272                             "but last op was %d current op is %d\n",
7273                             lastop->resop, resop->resop));
7274                         return (NFS4_CHKSEQ_BAD);
7275                 }
7276                 NFS4_DEBUG(rfs4_debug,
7277                     (CE_NOTE, "BAD SEQID: got %u expecting %u\n",
7278                     rqst_seq, seqid));
7279                 return (NFS4_CHKSEQ_BAD);
7280         }
7281 
7282         /* Everything okay -- next expected */
7283         return (NFS4_CHKSEQ_OKAY);
7284 }
7285 
7286 
7287 static rfs4_chkseq_t
7288 rfs4_check_open_seqid(seqid4 seqid, rfs4_openowner_t *op, nfs_resop4 *resop)
7289 {
7290         rfs4_chkseq_t rc;
7291 
7292         rfs4_dbe_lock(op->ro_dbe);
7293         rc = rfs4_check_seqid(op->ro_open_seqid, &op->ro_reply, seqid, resop,
7294             TRUE);
7295         rfs4_dbe_unlock(op->ro_dbe);
7296 
7297         if (rc == NFS4_CHKSEQ_OKAY)
7298                 rfs4_update_lease(op->ro_client);
7299 
7300         return (rc);
7301 }
7302 
7303 static rfs4_chkseq_t
7304 rfs4_check_olo_seqid(seqid4 olo_seqid, rfs4_openowner_t *op, nfs_resop4 *resop)
7305 {
7306         rfs4_chkseq_t rc;
7307 
7308         rfs4_dbe_lock(op->ro_dbe);
7309         rc = rfs4_check_seqid(op->ro_open_seqid, &op->ro_reply,
7310             olo_seqid, resop, FALSE);
7311         rfs4_dbe_unlock(op->ro_dbe);
7312 
7313         return (rc);
7314 }
7315 
7316 static rfs4_chkseq_t
7317 rfs4_check_lock_seqid(seqid4 seqid, rfs4_lo_state_t *lsp, nfs_resop4 *resop)
7318 {
7319         rfs4_chkseq_t rc = NFS4_CHKSEQ_OKAY;
7320 
7321         rfs4_dbe_lock(lsp->rls_dbe);
7322         if (!lsp->rls_skip_seqid_check)
7323                 rc = rfs4_check_seqid(lsp->rls_seqid, &lsp->rls_reply, seqid,
7324                     resop, TRUE);
7325         rfs4_dbe_unlock(lsp->rls_dbe);
7326 
7327         return (rc);
7328 }
7329 
7330 static void
7331 rfs4_op_open(nfs_argop4 *argop, nfs_resop4 *resop,
7332     struct svc_req *req, struct compound_state *cs)
7333 {
7334         OPEN4args *args = &argop->nfs_argop4_u.opopen;
7335         OPEN4res *resp = &resop->nfs_resop4_u.opopen;
7336         open_owner4 *owner = &args->owner;
7337         open_claim_type4 claim = args->claim;
7338         rfs4_client_t *cp;
7339         rfs4_openowner_t *oo;
7340         bool_t create;
7341         bool_t replay = FALSE;
7342         int can_reclaim;
7343 
7344         DTRACE_NFSV4_2(op__open__start, struct compound_state *, cs,
7345             OPEN4args *, args);
7346 
7347         if (cs->vp == NULL) {
7348                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
7349                 goto end;
7350         }
7351 
7352         /*
7353          * Need to check clientid and lease expiration first based on
7354          * error ordering and incrementing sequence id.
7355          */
7356         cp = rfs4_findclient_by_id(owner->clientid, FALSE);
7357         if (cp == NULL) {
7358                 *cs->statusp = resp->status =
7359                     rfs4_check_clientid(&owner->clientid, 0);
7360                 goto end;
7361         }
7362 
7363         if (rfs4_lease_expired(cp)) {
7364                 rfs4_client_close(cp);
7365                 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
7366                 goto end;
7367         }
7368         can_reclaim = cp->rc_can_reclaim;
7369 
7370         /*
7371          * Find the open_owner for use from this point forward.  Take
7372          * care in updating the sequence id based on the type of error
7373          * being returned.
7374          */
7375 retry:
7376         create = TRUE;
7377         oo = rfs4_findopenowner(owner, &create, args->seqid);
7378         if (oo == NULL) {
7379                 *cs->statusp = resp->status = NFS4ERR_RESOURCE;
7380                 rfs4_client_rele(cp);
7381                 goto end;
7382         }
7383 
7384         /* Hold off access to the sequence space while the open is done */
7385         rfs4_sw_enter(&oo->ro_sw);
7386 
7387         /*
7388          * If the open_owner existed before at the server, then check
7389          * the sequence id.
7390          */
7391         if (!create && !oo->ro_postpone_confirm) {
7392                 switch (rfs4_check_open_seqid(args->seqid, oo, resop)) {
7393                 case NFS4_CHKSEQ_BAD:
7394                         if ((args->seqid > oo->ro_open_seqid) &&
7395                             oo->ro_need_confirm) {
7396                                 rfs4_free_opens(oo, TRUE, FALSE);
7397                                 rfs4_sw_exit(&oo->ro_sw);
7398                                 rfs4_openowner_rele(oo);
7399                                 goto retry;
7400                         }
7401                         resp->status = NFS4ERR_BAD_SEQID;
7402                         goto out;
7403                 case NFS4_CHKSEQ_REPLAY: /* replay of previous request */
7404                         replay = TRUE;
7405                         goto out;
7406                 default:
7407                         break;
7408                 }
7409 
7410                 /*
7411                  * Sequence was ok and open owner exists
7412                  * check to see if we have yet to see an
7413                  * open_confirm.
7414                  */
7415                 if (oo->ro_need_confirm) {
7416                         rfs4_free_opens(oo, TRUE, FALSE);
7417                         rfs4_sw_exit(&oo->ro_sw);
7418                         rfs4_openowner_rele(oo);
7419                         goto retry;
7420                 }
7421         }
7422         /* Grace only applies to regular-type OPENs */
7423         if (rfs4_clnt_in_grace(cp) &&
7424             (claim == CLAIM_NULL || claim == CLAIM_DELEGATE_CUR)) {
7425                 *cs->statusp = resp->status = NFS4ERR_GRACE;
7426                 goto out;
7427         }
7428 
7429         /*
7430          * If previous state at the server existed then can_reclaim
7431          * will be set. If not reply NFS4ERR_NO_GRACE to the
7432          * client.
7433          */
7434         if (rfs4_clnt_in_grace(cp) && claim == CLAIM_PREVIOUS && !can_reclaim) {
7435                 *cs->statusp = resp->status = NFS4ERR_NO_GRACE;
7436                 goto out;
7437         }
7438 
7439 
7440         /*
7441          * Reject the open if the client has missed the grace period
7442          */
7443         if (!rfs4_clnt_in_grace(cp) && claim == CLAIM_PREVIOUS) {
7444                 *cs->statusp = resp->status = NFS4ERR_NO_GRACE;
7445                 goto out;
7446         }
7447 
7448         /* Couple of up-front bookkeeping items */
7449         if (oo->ro_need_confirm) {
7450                 /*
7451                  * If this is a reclaim OPEN then we should not ask
7452                  * for a confirmation of the open_owner per the
7453                  * protocol specification.
7454                  */
7455                 if (claim == CLAIM_PREVIOUS)
7456                         oo->ro_need_confirm = FALSE;
7457                 else
7458                         resp->rflags |= OPEN4_RESULT_CONFIRM;
7459         }
7460         resp->rflags |= OPEN4_RESULT_LOCKTYPE_POSIX;
7461 
7462         /*
7463          * If there is an unshared filesystem mounted on this vnode,
7464          * do not allow to open/create in this directory.
7465          */
7466         if (vn_ismntpt(cs->vp)) {
7467                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
7468                 goto out;
7469         }
7470 
7471         /*
7472          * access must READ, WRITE, or BOTH.  No access is invalid.
7473          * deny can be READ, WRITE, BOTH, or NONE.
7474          * bits not defined for access/deny are invalid.
7475          */
7476         if (! (args->share_access & OPEN4_SHARE_ACCESS_BOTH) ||
7477             (args->share_access & ~OPEN4_SHARE_ACCESS_BOTH) ||
7478             (args->share_deny & ~OPEN4_SHARE_DENY_BOTH)) {
7479                 *cs->statusp = resp->status = NFS4ERR_INVAL;
7480                 goto out;
7481         }
7482 
7483 
7484         /*
7485          * make sure attrset is zero before response is built.
7486          */
7487         resp->attrset = 0;
7488 
7489         switch (claim) {
7490         case CLAIM_NULL:
7491                 rfs4_do_opennull(cs, req, args, oo, resp);
7492                 break;
7493         case CLAIM_PREVIOUS:
7494                 rfs4_do_openprev(cs, req, args, oo, resp);
7495                 break;
7496         case CLAIM_DELEGATE_CUR:
7497                 rfs4_do_opendelcur(cs, req, args, oo, resp);
7498                 break;
7499         case CLAIM_DELEGATE_PREV:
7500                 rfs4_do_opendelprev(cs, req, args, oo, resp);
7501                 break;
7502         default:
7503                 resp->status = NFS4ERR_INVAL;
7504                 break;
7505         }
7506 
7507 out:
7508         rfs4_client_rele(cp);
7509 
7510         /* Catch sequence id handling here to make it a little easier */
7511         switch (resp->status) {
7512         case NFS4ERR_BADXDR:
7513         case NFS4ERR_BAD_SEQID:
7514         case NFS4ERR_BAD_STATEID:
7515         case NFS4ERR_NOFILEHANDLE:
7516         case NFS4ERR_RESOURCE:
7517         case NFS4ERR_STALE_CLIENTID:
7518         case NFS4ERR_STALE_STATEID:
7519                 /*
7520                  * The protocol states that if any of these errors are
7521                  * being returned, the sequence id should not be
7522                  * incremented.  Any other return requires an
7523                  * increment.
7524                  */
7525                 break;
7526         default:
7527                 /* Always update the lease in this case */
7528                 rfs4_update_lease(oo->ro_client);
7529 
7530                 /* Regular response - copy the result */
7531                 if (!replay)
7532                         rfs4_update_open_resp(oo, resop, &cs->fh);
7533 
7534                 /*
7535                  * REPLAY case: Only if the previous response was OK
7536                  * do we copy the filehandle.  If not OK, no
7537                  * filehandle to copy.
7538                  */
7539                 if (replay == TRUE &&
7540                     resp->status == NFS4_OK &&
7541                     oo->ro_reply_fh.nfs_fh4_val) {
7542                         /*
7543                          * If this is a replay, we must restore the
7544                          * current filehandle/vp to that of what was
7545                          * returned originally.  Try our best to do
7546                          * it.
7547                          */
7548                         nfs_fh4_fmt_t *fh_fmtp =
7549                             (nfs_fh4_fmt_t *)oo->ro_reply_fh.nfs_fh4_val;
7550 
7551                         cs->exi = checkexport4(&fh_fmtp->fh4_fsid,
7552                             (fid_t *)&fh_fmtp->fh4_xlen, NULL);
7553 
7554                         if (cs->exi == NULL) {
7555                                 resp->status = NFS4ERR_STALE;
7556                                 goto finish;
7557                         }
7558 
7559                         VN_RELE(cs->vp);
7560 
7561                         cs->vp = nfs4_fhtovp(&oo->ro_reply_fh, cs->exi,
7562                             &resp->status);
7563 
7564                         if (cs->vp == NULL)
7565                                 goto finish;
7566 
7567                         nfs_fh4_copy(&oo->ro_reply_fh, &cs->fh);
7568                 }
7569 
7570                 /*
7571                  * If this was a replay, no need to update the
7572                  * sequence id. If the open_owner was not created on
7573                  * this pass, then update.  The first use of an
7574                  * open_owner will not bump the sequence id.
7575                  */
7576                 if (replay == FALSE && !create)
7577                         rfs4_update_open_sequence(oo);
7578                 /*
7579                  * If the client is receiving an error and the
7580                  * open_owner needs to be confirmed, there is no way
7581                  * to notify the client of this fact ignoring the fact
7582                  * that the server has no method of returning a
7583                  * stateid to confirm.  Therefore, the server needs to
7584                  * mark this open_owner in a way as to avoid the
7585                  * sequence id checking the next time the client uses
7586                  * this open_owner.
7587                  */
7588                 if (resp->status != NFS4_OK && oo->ro_need_confirm)
7589                         oo->ro_postpone_confirm = TRUE;
7590                 /*
7591                  * If OK response then clear the postpone flag and
7592                  * reset the sequence id to keep in sync with the
7593                  * client.
7594                  */
7595                 if (resp->status == NFS4_OK && oo->ro_postpone_confirm) {
7596                         oo->ro_postpone_confirm = FALSE;
7597                         oo->ro_open_seqid = args->seqid;
7598                 }
7599                 break;
7600         }
7601 
7602 finish:
7603         *cs->statusp = resp->status;
7604 
7605         rfs4_sw_exit(&oo->ro_sw);
7606         rfs4_openowner_rele(oo);
7607 
7608 end:
7609         DTRACE_NFSV4_2(op__open__done, struct compound_state *, cs,
7610             OPEN4res *, resp);
7611 }
7612 
7613 /*ARGSUSED*/
7614 void
7615 rfs4_op_open_confirm(nfs_argop4 *argop, nfs_resop4 *resop,
7616     struct svc_req *req, struct compound_state *cs)
7617 {
7618         OPEN_CONFIRM4args *args = &argop->nfs_argop4_u.opopen_confirm;
7619         OPEN_CONFIRM4res *resp = &resop->nfs_resop4_u.opopen_confirm;
7620         rfs4_state_t *sp;
7621         nfsstat4 status;
7622 
7623         DTRACE_NFSV4_2(op__open__confirm__start, struct compound_state *, cs,
7624             OPEN_CONFIRM4args *, args);
7625 
7626         if (cs->vp == NULL) {
7627                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
7628                 goto out;
7629         }
7630 
7631         if (cs->vp->v_type != VREG) {
7632                 *cs->statusp = resp->status =
7633                     cs->vp->v_type == VDIR ? NFS4ERR_ISDIR : NFS4ERR_INVAL;
7634                 return;
7635         }
7636 
7637         status = rfs4_get_state(&args->open_stateid, &sp, RFS4_DBS_VALID);
7638         if (status != NFS4_OK) {
7639                 *cs->statusp = resp->status = status;
7640                 goto out;
7641         }
7642 
7643         /* Ensure specified filehandle matches */
7644         if (cs->vp != sp->rs_finfo->rf_vp) {
7645                 rfs4_state_rele(sp);
7646                 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7647                 goto out;
7648         }
7649 
7650         /* hold off other access to open_owner while we tinker */
7651         rfs4_sw_enter(&sp->rs_owner->ro_sw);
7652 
7653         switch (rfs4_check_stateid_seqid(sp, &args->open_stateid)) {
7654         case NFS4_CHECK_STATEID_OKAY:
7655                 if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7656                     resop) != 0) {
7657                         *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7658                         break;
7659                 }
7660                 /*
7661                  * If it is the appropriate stateid and determined to
7662                  * be "OKAY" then this means that the stateid does not
7663                  * need to be confirmed and the client is in error for
7664                  * sending an OPEN_CONFIRM.
7665                  */
7666                 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7667                 break;
7668         case NFS4_CHECK_STATEID_OLD:
7669                 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7670                 break;
7671         case NFS4_CHECK_STATEID_BAD:
7672                 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7673                 break;
7674         case NFS4_CHECK_STATEID_EXPIRED:
7675                 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
7676                 break;
7677         case NFS4_CHECK_STATEID_CLOSED:
7678                 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7679                 break;
7680         case NFS4_CHECK_STATEID_REPLAY:
7681                 switch (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7682                     resop)) {
7683                 case NFS4_CHKSEQ_OKAY:
7684                         /*
7685                          * This is replayed stateid; if seqid matches
7686                          * next expected, then client is using wrong seqid.
7687                          */
7688                         /* fall through */
7689                 case NFS4_CHKSEQ_BAD:
7690                         *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7691                         break;
7692                 case NFS4_CHKSEQ_REPLAY:
7693                         /*
7694                          * Note this case is the duplicate case so
7695                          * resp->status is already set.
7696                          */
7697                         *cs->statusp = resp->status;
7698                         rfs4_update_lease(sp->rs_owner->ro_client);
7699                         break;
7700                 }
7701                 break;
7702         case NFS4_CHECK_STATEID_UNCONFIRMED:
7703                 if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7704                     resop) != NFS4_CHKSEQ_OKAY) {
7705                         *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7706                         break;
7707                 }
7708                 *cs->statusp = resp->status = NFS4_OK;
7709 
7710                 next_stateid(&sp->rs_stateid);
7711                 resp->open_stateid = sp->rs_stateid.stateid;
7712                 sp->rs_owner->ro_need_confirm = FALSE;
7713                 rfs4_update_lease(sp->rs_owner->ro_client);
7714                 rfs4_update_open_sequence(sp->rs_owner);
7715                 rfs4_update_open_resp(sp->rs_owner, resop, NULL);
7716                 break;
7717         default:
7718                 ASSERT(FALSE);
7719                 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
7720                 break;
7721         }
7722         rfs4_sw_exit(&sp->rs_owner->ro_sw);
7723         rfs4_state_rele(sp);
7724 
7725 out:
7726         DTRACE_NFSV4_2(op__open__confirm__done, struct compound_state *, cs,
7727             OPEN_CONFIRM4res *, resp);
7728 }
7729 
7730 /*ARGSUSED*/
7731 void
7732 rfs4_op_open_downgrade(nfs_argop4 *argop, nfs_resop4 *resop,
7733     struct svc_req *req, struct compound_state *cs)
7734 {
7735         OPEN_DOWNGRADE4args *args = &argop->nfs_argop4_u.opopen_downgrade;
7736         OPEN_DOWNGRADE4res *resp = &resop->nfs_resop4_u.opopen_downgrade;
7737         uint32_t access = args->share_access;
7738         uint32_t deny = args->share_deny;
7739         nfsstat4 status;
7740         rfs4_state_t *sp;
7741         rfs4_file_t *fp;
7742         int fflags = 0;
7743 
7744         DTRACE_NFSV4_2(op__open__downgrade__start, struct compound_state *, cs,
7745             OPEN_DOWNGRADE4args *, args);
7746 
7747         if (cs->vp == NULL) {
7748                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
7749                 goto out;
7750         }
7751 
7752         if (cs->vp->v_type != VREG) {
7753                 *cs->statusp = resp->status = NFS4ERR_INVAL;
7754                 return;
7755         }
7756 
7757         status = rfs4_get_state(&args->open_stateid, &sp, RFS4_DBS_VALID);
7758         if (status != NFS4_OK) {
7759                 *cs->statusp = resp->status = status;
7760                 goto out;
7761         }
7762 
7763         /* Ensure specified filehandle matches */
7764         if (cs->vp != sp->rs_finfo->rf_vp) {
7765                 rfs4_state_rele(sp);
7766                 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7767                 goto out;
7768         }
7769 
7770         /* hold off other access to open_owner while we tinker */
7771         rfs4_sw_enter(&sp->rs_owner->ro_sw);
7772 
7773         switch (rfs4_check_stateid_seqid(sp, &args->open_stateid)) {
7774         case NFS4_CHECK_STATEID_OKAY:
7775                 if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7776                     resop) != NFS4_CHKSEQ_OKAY) {
7777                         *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7778                         goto end;
7779                 }
7780                 break;
7781         case NFS4_CHECK_STATEID_OLD:
7782                 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7783                 goto end;
7784         case NFS4_CHECK_STATEID_BAD:
7785                 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7786                 goto end;
7787         case NFS4_CHECK_STATEID_EXPIRED:
7788                 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
7789                 goto end;
7790         case NFS4_CHECK_STATEID_CLOSED:
7791                 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7792                 goto end;
7793         case NFS4_CHECK_STATEID_UNCONFIRMED:
7794                 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7795                 goto end;
7796         case NFS4_CHECK_STATEID_REPLAY:
7797                 /* Check the sequence id for the open owner */
7798                 switch (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7799                     resop)) {
7800                 case NFS4_CHKSEQ_OKAY:
7801                         /*
7802                          * This is replayed stateid; if seqid matches
7803                          * next expected, then client is using wrong seqid.
7804                          */
7805                         /* fall through */
7806                 case NFS4_CHKSEQ_BAD:
7807                         *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7808                         goto end;
7809                 case NFS4_CHKSEQ_REPLAY:
7810                         /*
7811                          * Note this case is the duplicate case so
7812                          * resp->status is already set.
7813                          */
7814                         *cs->statusp = resp->status;
7815                         rfs4_update_lease(sp->rs_owner->ro_client);
7816                         goto end;
7817                 }
7818                 break;
7819         default:
7820                 ASSERT(FALSE);
7821                 break;
7822         }
7823 
7824         rfs4_dbe_lock(sp->rs_dbe);
7825         /*
7826          * Check that the new access modes and deny modes are valid.
7827          * Check that no invalid bits are set.
7828          */
7829         if ((access & ~(OPEN4_SHARE_ACCESS_READ | OPEN4_SHARE_ACCESS_WRITE)) ||
7830             (deny & ~(OPEN4_SHARE_DENY_READ | OPEN4_SHARE_DENY_WRITE))) {
7831                 *cs->statusp = resp->status = NFS4ERR_INVAL;
7832                 rfs4_update_open_sequence(sp->rs_owner);
7833                 rfs4_dbe_unlock(sp->rs_dbe);
7834                 goto end;
7835         }
7836 
7837         /*
7838          * The new modes must be a subset of the current modes and
7839          * the access must specify at least one mode. To test that
7840          * the new mode is a subset of the current modes we bitwise
7841          * AND them together and check that the result equals the new
7842          * mode. For example:
7843          * New mode, access == R and current mode, sp->rs_open_access  == RW
7844          * access & sp->rs_open_access == R == access, so the new access mode
7845          * is valid. Consider access == RW, sp->rs_open_access = R
7846          * access & sp->rs_open_access == R != access, so the new access mode
7847          * is invalid.
7848          */
7849         if ((access & sp->rs_open_access) != access ||
7850             (deny & sp->rs_open_deny) != deny ||
7851             (access &
7852             (OPEN4_SHARE_ACCESS_READ | OPEN4_SHARE_ACCESS_WRITE)) == 0) {
7853                 *cs->statusp = resp->status = NFS4ERR_INVAL;
7854                 rfs4_update_open_sequence(sp->rs_owner);
7855                 rfs4_dbe_unlock(sp->rs_dbe);
7856                 goto end;
7857         }
7858 
7859         /*
7860          * Release any share locks associated with this stateID.
7861          * Strictly speaking, this violates the spec because the
7862          * spec effectively requires that open downgrade be atomic.
7863          * At present, fs_shrlock does not have this capability.
7864          */
7865         (void) rfs4_unshare(sp);
7866 
7867         status = rfs4_share(sp, access, deny);
7868         if (status != NFS4_OK) {
7869                 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
7870                 rfs4_update_open_sequence(sp->rs_owner);
7871                 rfs4_dbe_unlock(sp->rs_dbe);
7872                 goto end;
7873         }
7874 
7875         fp = sp->rs_finfo;
7876         rfs4_dbe_lock(fp->rf_dbe);
7877 
7878         /*
7879          * If the current mode has deny read and the new mode
7880          * does not, decrement the number of deny read mode bits
7881          * and if it goes to zero turn off the deny read bit
7882          * on the file.
7883          */
7884         if ((sp->rs_open_deny & OPEN4_SHARE_DENY_READ) &&
7885             (deny & OPEN4_SHARE_DENY_READ) == 0) {
7886                 fp->rf_deny_read--;
7887                 if (fp->rf_deny_read == 0)
7888                         fp->rf_share_deny &= ~OPEN4_SHARE_DENY_READ;
7889         }
7890 
7891         /*
7892          * If the current mode has deny write and the new mode
7893          * does not, decrement the number of deny write mode bits
7894          * and if it goes to zero turn off the deny write bit
7895          * on the file.
7896          */
7897         if ((sp->rs_open_deny & OPEN4_SHARE_DENY_WRITE) &&
7898             (deny & OPEN4_SHARE_DENY_WRITE) == 0) {
7899                 fp->rf_deny_write--;
7900                 if (fp->rf_deny_write == 0)
7901                         fp->rf_share_deny &= ~OPEN4_SHARE_DENY_WRITE;
7902         }
7903 
7904         /*
7905          * If the current mode has access read and the new mode
7906          * does not, decrement the number of access read mode bits
7907          * and if it goes to zero turn off the access read bit
7908          * on the file.  set fflags to FREAD for the call to
7909          * vn_open_downgrade().
7910          */
7911         if ((sp->rs_open_access & OPEN4_SHARE_ACCESS_READ) &&
7912             (access & OPEN4_SHARE_ACCESS_READ) == 0) {
7913                 fp->rf_access_read--;
7914                 if (fp->rf_access_read == 0)
7915                         fp->rf_share_access &= ~OPEN4_SHARE_ACCESS_READ;
7916                 fflags |= FREAD;
7917         }
7918 
7919         /*
7920          * If the current mode has access write and the new mode
7921          * does not, decrement the number of access write mode bits
7922          * and if it goes to zero turn off the access write bit
7923          * on the file.  set fflags to FWRITE for the call to
7924          * vn_open_downgrade().
7925          */
7926         if ((sp->rs_open_access & OPEN4_SHARE_ACCESS_WRITE) &&
7927             (access & OPEN4_SHARE_ACCESS_WRITE) == 0) {
7928                 fp->rf_access_write--;
7929                 if (fp->rf_access_write == 0)
7930                         fp->rf_share_deny &= ~OPEN4_SHARE_ACCESS_WRITE;
7931                 fflags |= FWRITE;
7932         }
7933 
7934         /* Check that the file is still accessible */
7935         ASSERT(fp->rf_share_access);
7936 
7937         rfs4_dbe_unlock(fp->rf_dbe);
7938 
7939         /* now set the new open access and deny modes */
7940         sp->rs_open_access = access;
7941         sp->rs_open_deny = deny;
7942 
7943         /*
7944          * we successfully downgraded the share lock, now we need to downgrade
7945          * the open. it is possible that the downgrade was only for a deny
7946          * mode and we have nothing else to do.
7947          */
7948         if ((fflags & (FREAD|FWRITE)) != 0)
7949                 vn_open_downgrade(cs->vp, fflags);
7950 
7951         /* Update the stateid */
7952         next_stateid(&sp->rs_stateid);
7953         resp->open_stateid = sp->rs_stateid.stateid;
7954 
7955         rfs4_dbe_unlock(sp->rs_dbe);
7956 
7957         *cs->statusp = resp->status = NFS4_OK;
7958         /* Update the lease */
7959         rfs4_update_lease(sp->rs_owner->ro_client);
7960         /* And the sequence */
7961         rfs4_update_open_sequence(sp->rs_owner);
7962         rfs4_update_open_resp(sp->rs_owner, resop, NULL);
7963 
7964 end:
7965         rfs4_sw_exit(&sp->rs_owner->ro_sw);
7966         rfs4_state_rele(sp);
7967 out:
7968         DTRACE_NFSV4_2(op__open__downgrade__done, struct compound_state *, cs,
7969             OPEN_DOWNGRADE4res *, resp);
7970 }
7971 
7972 static void *
7973 memstr(const void *s1, const char *s2, size_t n)
7974 {
7975         size_t l = strlen(s2);
7976         char *p = (char *)s1;
7977 
7978         while (n >= l) {
7979                 if (bcmp(p, s2, l) == 0)
7980                         return (p);
7981                 p++;
7982                 n--;
7983         }
7984 
7985         return (NULL);
7986 }
7987 
7988 /*
7989  * The logic behind this function is detailed in the NFSv4 RFC in the
7990  * SETCLIENTID operation description under IMPLEMENTATION.  Refer to
7991  * that section for explicit guidance to server behavior for
7992  * SETCLIENTID.
7993  */
7994 void
7995 rfs4_op_setclientid(nfs_argop4 *argop, nfs_resop4 *resop,
7996     struct svc_req *req, struct compound_state *cs)
7997 {
7998         SETCLIENTID4args *args = &argop->nfs_argop4_u.opsetclientid;
7999         SETCLIENTID4res *res = &resop->nfs_resop4_u.opsetclientid;
8000         rfs4_client_t *cp, *newcp, *cp_confirmed, *cp_unconfirmed;
8001         rfs4_clntip_t *ci;
8002         bool_t create;
8003         char *addr, *netid;
8004         int len;
8005 
8006         DTRACE_NFSV4_2(op__setclientid__start, struct compound_state *, cs,
8007             SETCLIENTID4args *, args);
8008 retry:
8009         newcp = cp_confirmed = cp_unconfirmed = NULL;
8010 
8011         /*
8012          * Save the caller's IP address
8013          */
8014         args->client.cl_addr =
8015             (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
8016 
8017         /*
8018          * Record if it is a Solaris client that cannot handle referrals.
8019          */
8020         if (memstr(args->client.id_val, "Solaris", args->client.id_len) &&
8021             !memstr(args->client.id_val, "+referrals", args->client.id_len)) {
8022                 /* Add a "yes, it's downrev" record */
8023                 create = TRUE;
8024                 ci = rfs4_find_clntip(args->client.cl_addr, &create);
8025                 ASSERT(ci != NULL);
8026                 rfs4_dbe_rele(ci->ri_dbe);
8027         } else {
8028                 /* Remove any previous record */
8029                 rfs4_invalidate_clntip(args->client.cl_addr);
8030         }
8031 
8032         /*
8033          * In search of an EXISTING client matching the incoming
8034          * request to establish a new client identifier at the server
8035          */
8036         create = TRUE;
8037         cp = rfs4_findclient(&args->client, &create, NULL);
8038 
8039         /* Should never happen */
8040         ASSERT(cp != NULL);
8041 
8042         if (cp == NULL) {
8043                 *cs->statusp = res->status = NFS4ERR_SERVERFAULT;
8044                 goto out;
8045         }
8046 
8047         /*
8048          * Easiest case. Client identifier is newly created and is
8049          * unconfirmed.  Also note that for this case, no other
8050          * entries exist for the client identifier.  Nothing else to
8051          * check.  Just setup the response and respond.
8052          */
8053         if (create) {
8054                 *cs->statusp = res->status = NFS4_OK;
8055                 res->SETCLIENTID4res_u.resok4.clientid = cp->rc_clientid;
8056                 res->SETCLIENTID4res_u.resok4.setclientid_confirm =
8057                     cp->rc_confirm_verf;
8058                 /* Setup callback information; CB_NULL confirmation later */
8059                 rfs4_client_setcb(cp, &args->callback, args->callback_ident);
8060 
8061                 rfs4_client_rele(cp);
8062                 goto out;
8063         }
8064 
8065         /*
8066          * An existing, confirmed client may exist but it may not have
8067          * been active for at least one lease period.  If so, then
8068          * "close" the client and create a new client identifier
8069          */
8070         if (rfs4_lease_expired(cp)) {
8071                 rfs4_client_close(cp);
8072                 goto retry;
8073         }
8074 
8075         if (cp->rc_need_confirm == TRUE)
8076                 cp_unconfirmed = cp;
8077         else
8078                 cp_confirmed = cp;
8079 
8080         cp = NULL;
8081 
8082         /*
8083          * We have a confirmed client, now check for an
8084          * unconfimred entry
8085          */
8086         if (cp_confirmed) {
8087                 /* If creds don't match then client identifier is inuse */
8088                 if (!creds_ok(cp_confirmed->rc_cr_set, req, cs)) {
8089                         rfs4_cbinfo_t *cbp;
8090                         /*
8091                          * Some one else has established this client
8092                          * id. Try and say * who they are. We will use
8093                          * the call back address supplied by * the
8094                          * first client.
8095                          */
8096                         *cs->statusp = res->status = NFS4ERR_CLID_INUSE;
8097 
8098                         addr = netid = NULL;
8099 
8100                         cbp = &cp_confirmed->rc_cbinfo;
8101                         if (cbp->cb_callback.cb_location.r_addr &&
8102                             cbp->cb_callback.cb_location.r_netid) {
8103                                 cb_client4 *cbcp = &cbp->cb_callback;
8104 
8105                                 len = strlen(cbcp->cb_location.r_addr)+1;
8106                                 addr = kmem_alloc(len, KM_SLEEP);
8107                                 bcopy(cbcp->cb_location.r_addr, addr, len);
8108                                 len = strlen(cbcp->cb_location.r_netid)+1;
8109                                 netid = kmem_alloc(len, KM_SLEEP);
8110                                 bcopy(cbcp->cb_location.r_netid, netid, len);
8111                         }
8112 
8113                         res->SETCLIENTID4res_u.client_using.r_addr = addr;
8114                         res->SETCLIENTID4res_u.client_using.r_netid = netid;
8115 
8116                         rfs4_client_rele(cp_confirmed);
8117                 }
8118 
8119                 /*
8120                  * Confirmed, creds match, and verifier matches; must
8121                  * be an update of the callback info
8122                  */
8123                 if (cp_confirmed->rc_nfs_client.verifier ==
8124                     args->client.verifier) {
8125                         /* Setup callback information */
8126                         rfs4_client_setcb(cp_confirmed, &args->callback,
8127                             args->callback_ident);
8128 
8129                         /* everything okay -- move ahead */
8130                         *cs->statusp = res->status = NFS4_OK;
8131                         res->SETCLIENTID4res_u.resok4.clientid =
8132                             cp_confirmed->rc_clientid;
8133 
8134                         /* update the confirm_verifier and return it */
8135                         rfs4_client_scv_next(cp_confirmed);
8136                         res->SETCLIENTID4res_u.resok4.setclientid_confirm =
8137                             cp_confirmed->rc_confirm_verf;
8138 
8139                         rfs4_client_rele(cp_confirmed);
8140                         goto out;
8141                 }
8142 
8143                 /*
8144                  * Creds match but the verifier doesn't.  Must search
8145                  * for an unconfirmed client that would be replaced by
8146                  * this request.
8147                  */
8148                 create = FALSE;
8149                 cp_unconfirmed = rfs4_findclient(&args->client, &create,
8150                     cp_confirmed);
8151         }
8152 
8153         /*
8154          * At this point, we have taken care of the brand new client
8155          * struct, INUSE case, update of an existing, and confirmed
8156          * client struct.
8157          */
8158 
8159         /*
8160          * check to see if things have changed while we originally
8161          * picked up the client struct.  If they have, then return and
8162          * retry the processing of this SETCLIENTID request.
8163          */
8164         if (cp_unconfirmed) {
8165                 rfs4_dbe_lock(cp_unconfirmed->rc_dbe);
8166                 if (!cp_unconfirmed->rc_need_confirm) {
8167                         rfs4_dbe_unlock(cp_unconfirmed->rc_dbe);
8168                         rfs4_client_rele(cp_unconfirmed);
8169                         if (cp_confirmed)
8170                                 rfs4_client_rele(cp_confirmed);
8171                         goto retry;
8172                 }
8173                 /* do away with the old unconfirmed one */
8174                 rfs4_dbe_invalidate(cp_unconfirmed->rc_dbe);
8175                 rfs4_dbe_unlock(cp_unconfirmed->rc_dbe);
8176                 rfs4_client_rele(cp_unconfirmed);
8177                 cp_unconfirmed = NULL;
8178         }
8179 
8180         /*
8181          * This search will temporarily hide the confirmed client
8182          * struct while a new client struct is created as the
8183          * unconfirmed one.
8184          */
8185         create = TRUE;
8186         newcp = rfs4_findclient(&args->client, &create, cp_confirmed);
8187 
8188         ASSERT(newcp != NULL);
8189 
8190         if (newcp == NULL) {
8191                 *cs->statusp = res->status = NFS4ERR_SERVERFAULT;
8192                 rfs4_client_rele(cp_confirmed);
8193                 goto out;
8194         }
8195 
8196         /*
8197          * If one was not created, then a similar request must be in
8198          * process so release and start over with this one
8199          */
8200         if (create != TRUE) {
8201                 rfs4_client_rele(newcp);
8202                 if (cp_confirmed)
8203                         rfs4_client_rele(cp_confirmed);
8204                 goto retry;
8205         }
8206 
8207         *cs->statusp = res->status = NFS4_OK;
8208         res->SETCLIENTID4res_u.resok4.clientid = newcp->rc_clientid;
8209         res->SETCLIENTID4res_u.resok4.setclientid_confirm =
8210             newcp->rc_confirm_verf;
8211         /* Setup callback information; CB_NULL confirmation later */
8212         rfs4_client_setcb(newcp, &args->callback, args->callback_ident);
8213 
8214         newcp->rc_cp_confirmed = cp_confirmed;
8215 
8216         rfs4_client_rele(newcp);
8217 
8218 out:
8219         DTRACE_NFSV4_2(op__setclientid__done, struct compound_state *, cs,
8220             SETCLIENTID4res *, res);
8221 }
8222 
8223 /*ARGSUSED*/
8224 void
8225 rfs4_op_setclientid_confirm(nfs_argop4 *argop, nfs_resop4 *resop,
8226     struct svc_req *req, struct compound_state *cs)
8227 {
8228         SETCLIENTID_CONFIRM4args *args =
8229             &argop->nfs_argop4_u.opsetclientid_confirm;
8230         SETCLIENTID_CONFIRM4res *res =
8231             &resop->nfs_resop4_u.opsetclientid_confirm;
8232         rfs4_client_t *cp, *cptoclose = NULL;
8233         nfs4_srv_t *nsrv4;
8234 
8235         DTRACE_NFSV4_2(op__setclientid__confirm__start,
8236             struct compound_state *, cs,
8237             SETCLIENTID_CONFIRM4args *, args);
8238 
8239         nsrv4 = nfs4_get_srv();
8240         *cs->statusp = res->status = NFS4_OK;
8241 
8242         cp = rfs4_findclient_by_id(args->clientid, TRUE);
8243 
8244         if (cp == NULL) {
8245                 *cs->statusp = res->status =
8246                     rfs4_check_clientid(&args->clientid, 1);
8247                 goto out;
8248         }
8249 
8250         if (!creds_ok(cp, req, cs)) {
8251                 *cs->statusp = res->status = NFS4ERR_CLID_INUSE;
8252                 rfs4_client_rele(cp);
8253                 goto out;
8254         }
8255 
8256         /* If the verifier doesn't match, the record doesn't match */
8257         if (cp->rc_confirm_verf != args->setclientid_confirm) {
8258                 *cs->statusp = res->status = NFS4ERR_STALE_CLIENTID;
8259                 rfs4_client_rele(cp);
8260                 goto out;
8261         }
8262 
8263         rfs4_dbe_lock(cp->rc_dbe);
8264         cp->rc_need_confirm = FALSE;
8265         if (cp->rc_cp_confirmed) {
8266                 cptoclose = cp->rc_cp_confirmed;
8267                 cptoclose->rc_ss_remove = 1;
8268                 cp->rc_cp_confirmed = NULL;
8269         }
8270 
8271         /*
8272          * Update the client's associated server instance, if it's changed
8273          * since the client was created.
8274          */
8275         if (rfs4_servinst(cp) != nsrv4->nfs4_cur_servinst)
8276                 rfs4_servinst_assign(nsrv4, cp, nsrv4->nfs4_cur_servinst);
8277 
8278         /*
8279          * Record clientid in stable storage.
8280          * Must be done after server instance has been assigned.
8281          */
8282         rfs4_ss_clid(nsrv4, cp);
8283 
8284         rfs4_dbe_unlock(cp->rc_dbe);
8285 
8286         if (cptoclose)
8287                 /* don't need to rele, client_close does it */
8288                 rfs4_client_close(cptoclose);
8289 
8290         /* If needed, initiate CB_NULL call for callback path */
8291         rfs4_deleg_cb_check(cp);
8292         rfs4_update_lease(cp);
8293 
8294         /*
8295          * Check to see if client can perform reclaims
8296          */
8297         rfs4_ss_chkclid(nsrv4, cp);
8298 
8299         rfs4_client_rele(cp);
8300 
8301 out:
8302         DTRACE_NFSV4_2(op__setclientid__confirm__done,
8303             struct compound_state *, cs,
8304             SETCLIENTID_CONFIRM4 *, res);
8305 }
8306 
8307 
8308 /*ARGSUSED*/
8309 void
8310 rfs4_op_close(nfs_argop4 *argop, nfs_resop4 *resop,
8311     struct svc_req *req, struct compound_state *cs)
8312 {
8313         CLOSE4args *args = &argop->nfs_argop4_u.opclose;
8314         CLOSE4res *resp = &resop->nfs_resop4_u.opclose;
8315         rfs4_state_t *sp;
8316         nfsstat4 status;
8317 
8318         DTRACE_NFSV4_2(op__close__start, struct compound_state *, cs,
8319             CLOSE4args *, args);
8320 
8321         if (cs->vp == NULL) {
8322                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
8323                 goto out;
8324         }
8325 
8326         status = rfs4_get_state(&args->open_stateid, &sp, RFS4_DBS_INVALID);
8327         if (status != NFS4_OK) {
8328                 *cs->statusp = resp->status = status;
8329                 goto out;
8330         }
8331 
8332         /* Ensure specified filehandle matches */
8333         if (cs->vp != sp->rs_finfo->rf_vp) {
8334                 rfs4_state_rele(sp);
8335                 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8336                 goto out;
8337         }
8338 
8339         /* hold off other access to open_owner while we tinker */
8340         rfs4_sw_enter(&sp->rs_owner->ro_sw);
8341 
8342         switch (rfs4_check_stateid_seqid(sp, &args->open_stateid)) {
8343         case NFS4_CHECK_STATEID_OKAY:
8344                 if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
8345                     resop) != NFS4_CHKSEQ_OKAY) {
8346                         *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8347                         goto end;
8348                 }
8349                 break;
8350         case NFS4_CHECK_STATEID_OLD:
8351                 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8352                 goto end;
8353         case NFS4_CHECK_STATEID_BAD:
8354                 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8355                 goto end;
8356         case NFS4_CHECK_STATEID_EXPIRED:
8357                 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
8358                 goto end;
8359         case NFS4_CHECK_STATEID_CLOSED:
8360                 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8361                 goto end;
8362         case NFS4_CHECK_STATEID_UNCONFIRMED:
8363                 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8364                 goto end;
8365         case NFS4_CHECK_STATEID_REPLAY:
8366                 /* Check the sequence id for the open owner */
8367                 switch (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
8368                     resop)) {
8369                 case NFS4_CHKSEQ_OKAY:
8370                         /*
8371                          * This is replayed stateid; if seqid matches
8372                          * next expected, then client is using wrong seqid.
8373                          */
8374                         /* FALL THROUGH */
8375                 case NFS4_CHKSEQ_BAD:
8376                         *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8377                         goto end;
8378                 case NFS4_CHKSEQ_REPLAY:
8379                         /*
8380                          * Note this case is the duplicate case so
8381                          * resp->status is already set.
8382                          */
8383                         *cs->statusp = resp->status;
8384                         rfs4_update_lease(sp->rs_owner->ro_client);
8385                         goto end;
8386                 }
8387                 break;
8388         default:
8389                 ASSERT(FALSE);
8390                 break;
8391         }
8392 
8393         rfs4_dbe_lock(sp->rs_dbe);
8394 
8395         /* Update the stateid. */
8396         next_stateid(&sp->rs_stateid);
8397         resp->open_stateid = sp->rs_stateid.stateid;
8398 
8399         rfs4_dbe_unlock(sp->rs_dbe);
8400 
8401         rfs4_update_lease(sp->rs_owner->ro_client);
8402         rfs4_update_open_sequence(sp->rs_owner);
8403         rfs4_update_open_resp(sp->rs_owner, resop, NULL);
8404 
8405         rfs4_state_close(sp, FALSE, FALSE, cs->cr);
8406 
8407         *cs->statusp = resp->status = status;
8408 
8409 end:
8410         rfs4_sw_exit(&sp->rs_owner->ro_sw);
8411         rfs4_state_rele(sp);
8412 out:
8413         DTRACE_NFSV4_2(op__close__done, struct compound_state *, cs,
8414             CLOSE4res *, resp);
8415 }
8416 
8417 /*
8418  * Manage the counts on the file struct and close all file locks
8419  */
8420 /*ARGSUSED*/
8421 void
8422 rfs4_release_share_lock_state(rfs4_state_t *sp, cred_t *cr,
8423     bool_t close_of_client)
8424 {
8425         rfs4_file_t *fp = sp->rs_finfo;
8426         rfs4_lo_state_t *lsp;
8427         int fflags = 0;
8428 
8429         /*
8430          * If this call is part of the larger closing down of client
8431          * state then it is just easier to release all locks
8432          * associated with this client instead of going through each
8433          * individual file and cleaning locks there.
8434          */
8435         if (close_of_client) {
8436                 if (sp->rs_owner->ro_client->rc_unlksys_completed == FALSE &&
8437                     !list_is_empty(&sp->rs_lostatelist) &&
8438                     sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID) {
8439                         /* Is the PxFS kernel module loaded? */
8440                         if (lm_remove_file_locks != NULL) {
8441                                 int new_sysid;
8442 
8443                                 /* Encode the cluster nodeid in new sysid */
8444                                 new_sysid = sp->rs_owner->ro_client->rc_sysidt;
8445                                 lm_set_nlmid_flk(&new_sysid);
8446 
8447                                 /*
8448                                  * This PxFS routine removes file locks for a
8449                                  * client over all nodes of a cluster.
8450                                  */
8451                                 NFS4_DEBUG(rfs4_debug, (CE_NOTE,
8452                                     "lm_remove_file_locks(sysid=0x%x)\n",
8453                                     new_sysid));
8454                                 (*lm_remove_file_locks)(new_sysid);
8455                         } else {
8456                                 struct flock64 flk;
8457 
8458                                 /* Release all locks for this client */
8459                                 flk.l_type = F_UNLKSYS;
8460                                 flk.l_whence = 0;
8461                                 flk.l_start = 0;
8462                                 flk.l_len = 0;
8463                                 flk.l_sysid =
8464                                     sp->rs_owner->ro_client->rc_sysidt;
8465                                 flk.l_pid = 0;
8466                                 (void) VOP_FRLOCK(sp->rs_finfo->rf_vp, F_SETLK,
8467                                     &flk, F_REMOTELOCK | FREAD | FWRITE,
8468                                     (u_offset_t)0, NULL, CRED(), NULL);
8469                         }
8470 
8471                         sp->rs_owner->ro_client->rc_unlksys_completed = TRUE;
8472                 }
8473         }
8474 
8475         /*
8476          * Release all locks on this file by this lock owner or at
8477          * least mark the locks as having been released
8478          */
8479         for (lsp = list_head(&sp->rs_lostatelist); lsp != NULL;
8480             lsp = list_next(&sp->rs_lostatelist, lsp)) {
8481                 lsp->rls_locks_cleaned = TRUE;
8482 
8483                 /* Was this already taken care of above? */
8484                 if (!close_of_client &&
8485                     sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID)
8486                         (void) cleanlocks(sp->rs_finfo->rf_vp,
8487                             lsp->rls_locker->rl_pid,
8488                             lsp->rls_locker->rl_client->rc_sysidt);
8489         }
8490 
8491         /*
8492          * Release any shrlocks associated with this open state ID.
8493          * This must be done before the rfs4_state gets marked closed.
8494          */
8495         if (sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID)
8496                 (void) rfs4_unshare(sp);
8497 
8498         if (sp->rs_open_access) {
8499                 rfs4_dbe_lock(fp->rf_dbe);
8500 
8501                 /*
8502                  * Decrement the count for each access and deny bit that this
8503                  * state has contributed to the file.
8504                  * If the file counts go to zero
8505                  * clear the appropriate bit in the appropriate mask.
8506                  */
8507                 if (sp->rs_open_access & OPEN4_SHARE_ACCESS_READ) {
8508                         fp->rf_access_read--;
8509                         fflags |= FREAD;
8510                         if (fp->rf_access_read == 0)
8511                                 fp->rf_share_access &= ~OPEN4_SHARE_ACCESS_READ;
8512                 }
8513                 if (sp->rs_open_access & OPEN4_SHARE_ACCESS_WRITE) {
8514                         fp->rf_access_write--;
8515                         fflags |= FWRITE;
8516                         if (fp->rf_access_write == 0)
8517                                 fp->rf_share_access &=
8518                                     ~OPEN4_SHARE_ACCESS_WRITE;
8519                 }
8520                 if (sp->rs_open_deny & OPEN4_SHARE_DENY_READ) {
8521                         fp->rf_deny_read--;
8522                         if (fp->rf_deny_read == 0)
8523                                 fp->rf_share_deny &= ~OPEN4_SHARE_DENY_READ;
8524                 }
8525                 if (sp->rs_open_deny & OPEN4_SHARE_DENY_WRITE) {
8526                         fp->rf_deny_write--;
8527                         if (fp->rf_deny_write == 0)
8528                                 fp->rf_share_deny &= ~OPEN4_SHARE_DENY_WRITE;
8529                 }
8530 
8531                 (void) VOP_CLOSE(fp->rf_vp, fflags, 1, (offset_t)0, cr, NULL);
8532 
8533                 rfs4_dbe_unlock(fp->rf_dbe);
8534 
8535                 sp->rs_open_access = 0;
8536                 sp->rs_open_deny = 0;
8537         }
8538 }
8539 
8540 /*
8541  * lock_denied: Fill in a LOCK4deneid structure given an flock64 structure.
8542  */
8543 static nfsstat4
8544 lock_denied(LOCK4denied *dp, struct flock64 *flk)
8545 {
8546         rfs4_lockowner_t *lo;
8547         rfs4_client_t *cp;
8548         uint32_t len;
8549 
8550         lo = rfs4_findlockowner_by_pid(flk->l_pid);
8551         if (lo != NULL) {
8552                 cp = lo->rl_client;
8553                 if (rfs4_lease_expired(cp)) {
8554                         rfs4_lockowner_rele(lo);
8555                         rfs4_dbe_hold(cp->rc_dbe);
8556                         rfs4_client_close(cp);
8557                         return (NFS4ERR_EXPIRED);
8558                 }
8559                 dp->owner.clientid = lo->rl_owner.clientid;
8560                 len = lo->rl_owner.owner_len;
8561                 dp->owner.owner_val = kmem_alloc(len, KM_SLEEP);
8562                 bcopy(lo->rl_owner.owner_val, dp->owner.owner_val, len);
8563                 dp->owner.owner_len = len;
8564                 rfs4_lockowner_rele(lo);
8565                 goto finish;
8566         }
8567 
8568         /*
8569          * Its not a NFS4 lock. We take advantage that the upper 32 bits
8570          * of the client id contain the boot time for a NFS4 lock. So we
8571          * fabricate and identity by setting clientid to the sysid, and
8572          * the lock owner to the pid.
8573          */
8574         dp->owner.clientid = flk->l_sysid;
8575         len = sizeof (pid_t);
8576         dp->owner.owner_len = len;
8577         dp->owner.owner_val = kmem_alloc(len, KM_SLEEP);
8578         bcopy(&flk->l_pid, dp->owner.owner_val, len);
8579 finish:
8580         dp->offset = flk->l_start;
8581         dp->length = flk->l_len;
8582 
8583         if (flk->l_type == F_RDLCK)
8584                 dp->locktype = READ_LT;
8585         else if (flk->l_type == F_WRLCK)
8586                 dp->locktype = WRITE_LT;
8587         else
8588                 return (NFS4ERR_INVAL); /* no mapping from POSIX ltype to v4 */
8589 
8590         return (NFS4_OK);
8591 }
8592 
8593 /*
8594  * The NFSv4.0 LOCK operation does not support the blocking lock (at the
8595  * NFSv4.0 protocol level) so the client needs to resend the LOCK request in a
8596  * case the lock is denied by the NFSv4.0 server.  NFSv4.0 clients are prepared
8597  * for that (obviously); they are sending the LOCK requests with some delays
8598  * between the attempts.  See nfs4frlock() and nfs4_block_and_wait() for the
8599  * locking and delay implementation at the client side.
8600  *
8601  * To make the life of the clients easier, the NFSv4.0 server tries to do some
8602  * fast retries on its own (the for loop below) in a hope the lock will be
8603  * available soon.  And if not, the client won't need to resend the LOCK
8604  * requests so fast to check the lock availability.  This basically saves some
8605  * network traffic and tries to make sure the client gets the lock ASAP.
8606  */
8607 static int
8608 setlock(vnode_t *vp, struct flock64 *flock, int flag, cred_t *cred)
8609 {
8610         int error;
8611         struct flock64 flk;
8612         int i;
8613         clock_t delaytime;
8614         int cmd;
8615         int spin_cnt = 0;
8616 
8617         cmd = nbl_need_check(vp) ? F_SETLK_NBMAND : F_SETLK;
8618 retry:
8619         delaytime = MSEC_TO_TICK_ROUNDUP(rfs4_lock_delay);
8620 
8621         for (i = 0; i < rfs4_maxlock_tries; i++) {
8622                 LOCK_PRINT(rfs4_debug, "setlock", cmd, flock);
8623                 error = VOP_FRLOCK(vp, cmd,
8624                     flock, flag, (u_offset_t)0, NULL, cred, NULL);
8625 
8626                 if (error != EAGAIN && error != EACCES)
8627                         break;
8628 
8629                 if (i < rfs4_maxlock_tries - 1) {
8630                         delay(delaytime);
8631                         delaytime *= 2;
8632                 }
8633         }
8634 
8635         if (error == EAGAIN || error == EACCES) {
8636                 /* Get the owner of the lock */
8637                 flk = *flock;
8638                 LOCK_PRINT(rfs4_debug, "setlock", F_GETLK, &flk);
8639                 if (VOP_FRLOCK(vp, F_GETLK, &flk, flag, 0, NULL, cred,
8640                     NULL) == 0) {
8641                         /*
8642                          * There's a race inherent in the current VOP_FRLOCK
8643                          * design where:
8644                          * a: "other guy" takes a lock that conflicts with a
8645                          * lock we want
8646                          * b: we attempt to take our lock (non-blocking) and
8647                          * the attempt fails.
8648                          * c: "other guy" releases the conflicting lock
8649                          * d: we ask what lock conflicts with the lock we want,
8650                          * getting F_UNLCK (no lock blocks us)
8651                          *
8652                          * If we retry the non-blocking lock attempt in this
8653                          * case (restart at step 'b') there's some possibility
8654                          * that many such attempts might fail.  However a test
8655                          * designed to actually provoke this race shows that
8656                          * the vast majority of cases require no retry, and
8657                          * only a few took as many as three retries.  Here's
8658                          * the test outcome:
8659                          *
8660                          *         number of retries    how many times we needed
8661                          *                              that many retries
8662                          *         0                    79461
8663                          *         1                      862
8664                          *         2                       49
8665                          *         3                        5
8666                          *
8667                          * Given those empirical results, we arbitrarily limit
8668                          * the retry count to ten.
8669                          *
8670                          * If we actually make to ten retries and give up,
8671                          * nothing catastrophic happens, but we're unable to
8672                          * return the information about the conflicting lock to
8673                          * the NFS client.  That's an acceptable trade off vs.
8674                          * letting this retry loop run forever.
8675                          */
8676                         if (flk.l_type == F_UNLCK) {
8677                                 if (spin_cnt++ < 10) {
8678                                         /* No longer locked, retry */
8679                                         goto retry;
8680                                 }
8681                         } else {
8682                                 *flock = flk;
8683                                 LOCK_PRINT(rfs4_debug, "setlock(blocking lock)",
8684                                     F_GETLK, &flk);
8685                         }
8686                 }
8687         }
8688 
8689         return (error);
8690 }
8691 
8692 /*ARGSUSED*/
8693 static nfsstat4
8694 rfs4_do_lock(rfs4_lo_state_t *lsp, nfs_lock_type4 locktype,
8695     offset4 offset, length4 length, cred_t *cred, nfs_resop4 *resop)
8696 {
8697         nfsstat4 status;
8698         rfs4_lockowner_t *lo = lsp->rls_locker;
8699         rfs4_state_t *sp = lsp->rls_state;
8700         struct flock64 flock;
8701         int16_t ltype;
8702         int flag;
8703         int error;
8704         sysid_t sysid;
8705         LOCK4res *lres;
8706         vnode_t *vp;
8707 
8708         if (rfs4_lease_expired(lo->rl_client)) {
8709                 return (NFS4ERR_EXPIRED);
8710         }
8711 
8712         if ((status = rfs4_client_sysid(lo->rl_client, &sysid)) != NFS4_OK)
8713                 return (status);
8714 
8715         /* Check for zero length. To lock to end of file use all ones for V4 */
8716         if (length == 0)
8717                 return (NFS4ERR_INVAL);
8718         else if (length == (length4)(~0))
8719                 length = 0;             /* Posix to end of file  */
8720 
8721 retry:
8722         rfs4_dbe_lock(sp->rs_dbe);
8723         if (sp->rs_closed == TRUE) {
8724                 rfs4_dbe_unlock(sp->rs_dbe);
8725                 return (NFS4ERR_OLD_STATEID);
8726         }
8727 
8728         if (resop->resop != OP_LOCKU) {
8729                 switch (locktype) {
8730                 case READ_LT:
8731                 case READW_LT:
8732                         if ((sp->rs_share_access
8733                             & OPEN4_SHARE_ACCESS_READ) == 0) {
8734                                 rfs4_dbe_unlock(sp->rs_dbe);
8735 
8736                                 return (NFS4ERR_OPENMODE);
8737                         }
8738                         ltype = F_RDLCK;
8739                         break;
8740                 case WRITE_LT:
8741                 case WRITEW_LT:
8742                         if ((sp->rs_share_access
8743                             & OPEN4_SHARE_ACCESS_WRITE) == 0) {
8744                                 rfs4_dbe_unlock(sp->rs_dbe);
8745 
8746                                 return (NFS4ERR_OPENMODE);
8747                         }
8748                         ltype = F_WRLCK;
8749                         break;
8750                 }
8751         } else
8752                 ltype = F_UNLCK;
8753 
8754         flock.l_type = ltype;
8755         flock.l_whence = 0;             /* SEEK_SET */
8756         flock.l_start = offset;
8757         flock.l_len = length;
8758         flock.l_sysid = sysid;
8759         flock.l_pid = lsp->rls_locker->rl_pid;
8760 
8761         /* Note that length4 is uint64_t but l_len and l_start are off64_t */
8762         if (flock.l_len < 0 || flock.l_start < 0) {
8763                 rfs4_dbe_unlock(sp->rs_dbe);
8764                 return (NFS4ERR_INVAL);
8765         }
8766 
8767         /*
8768          * N.B. FREAD has the same value as OPEN4_SHARE_ACCESS_READ and
8769          * FWRITE has the same value as OPEN4_SHARE_ACCESS_WRITE.
8770          */
8771         flag = (int)sp->rs_share_access | F_REMOTELOCK;
8772 
8773         vp = sp->rs_finfo->rf_vp;
8774         VN_HOLD(vp);
8775 
8776         /*
8777          * We need to unlock sp before we call the underlying filesystem to
8778          * acquire the file lock.
8779          */
8780         rfs4_dbe_unlock(sp->rs_dbe);
8781 
8782         error = setlock(vp, &flock, flag, cred);
8783 
8784         /*
8785          * Make sure the file is still open.  In a case the file was closed in
8786          * the meantime, clean the lock we acquired using the setlock() call
8787          * above, and return the appropriate error.
8788          */
8789         rfs4_dbe_lock(sp->rs_dbe);
8790         if (sp->rs_closed == TRUE) {
8791                 cleanlocks(vp, lsp->rls_locker->rl_pid, sysid);
8792                 rfs4_dbe_unlock(sp->rs_dbe);
8793 
8794                 VN_RELE(vp);
8795 
8796                 return (NFS4ERR_OLD_STATEID);
8797         }
8798         rfs4_dbe_unlock(sp->rs_dbe);
8799 
8800         VN_RELE(vp);
8801 
8802         if (error == 0) {
8803                 rfs4_dbe_lock(lsp->rls_dbe);
8804                 next_stateid(&lsp->rls_lockid);
8805                 rfs4_dbe_unlock(lsp->rls_dbe);
8806         }
8807 
8808         /*
8809          * N.B. We map error values to nfsv4 errors. This is differrent
8810          * than puterrno4 routine.
8811          */
8812         switch (error) {
8813         case 0:
8814                 status = NFS4_OK;
8815                 break;
8816         case EAGAIN:
8817         case EACCES:            /* Old value */
8818                 /* Can only get here if op is OP_LOCK */
8819                 ASSERT(resop->resop == OP_LOCK);
8820                 lres = &resop->nfs_resop4_u.oplock;
8821                 status = NFS4ERR_DENIED;
8822                 if (lock_denied(&lres->LOCK4res_u.denied, &flock)
8823                     == NFS4ERR_EXPIRED)
8824                         goto retry;
8825                 break;
8826         case ENOLCK:
8827                 status = NFS4ERR_DELAY;
8828                 break;
8829         case EOVERFLOW:
8830                 status = NFS4ERR_INVAL;
8831                 break;
8832         case EINVAL:
8833                 status = NFS4ERR_NOTSUPP;
8834                 break;
8835         default:
8836                 status = NFS4ERR_SERVERFAULT;
8837                 break;
8838         }
8839 
8840         return (status);
8841 }
8842 
8843 /*ARGSUSED*/
8844 void
8845 rfs4_op_lock(nfs_argop4 *argop, nfs_resop4 *resop,
8846     struct svc_req *req, struct compound_state *cs)
8847 {
8848         LOCK4args *args = &argop->nfs_argop4_u.oplock;
8849         LOCK4res *resp = &resop->nfs_resop4_u.oplock;
8850         nfsstat4 status;
8851         stateid4 *stateid;
8852         rfs4_lockowner_t *lo;
8853         rfs4_client_t *cp;
8854         rfs4_state_t *sp = NULL;
8855         rfs4_lo_state_t *lsp = NULL;
8856         bool_t ls_sw_held = FALSE;
8857         bool_t create = TRUE;
8858         bool_t lcreate = TRUE;
8859         bool_t dup_lock = FALSE;
8860         int rc;
8861 
8862         DTRACE_NFSV4_2(op__lock__start, struct compound_state *, cs,
8863             LOCK4args *, args);
8864 
8865         if (cs->vp == NULL) {
8866                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
8867                 DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
8868                     cs, LOCK4res *, resp);
8869                 return;
8870         }
8871 
8872         if (args->locker.new_lock_owner) {
8873                 /* Create a new lockowner for this instance */
8874                 open_to_lock_owner4 *olo = &args->locker.locker4_u.open_owner;
8875 
8876                 NFS4_DEBUG(rfs4_debug, (CE_NOTE, "Creating new lock owner"));
8877 
8878                 stateid = &olo->open_stateid;
8879                 status = rfs4_get_state(stateid, &sp, RFS4_DBS_VALID);
8880                 if (status != NFS4_OK) {
8881                         NFS4_DEBUG(rfs4_debug,
8882                             (CE_NOTE, "Get state failed in lock %d", status));
8883                         *cs->statusp = resp->status = status;
8884                         DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
8885                             cs, LOCK4res *, resp);
8886                         return;
8887                 }
8888 
8889                 /* Ensure specified filehandle matches */
8890                 if (cs->vp != sp->rs_finfo->rf_vp) {
8891                         rfs4_state_rele(sp);
8892                         *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8893                         DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
8894                             cs, LOCK4res *, resp);
8895                         return;
8896                 }
8897 
8898                 /* hold off other access to open_owner while we tinker */
8899                 rfs4_sw_enter(&sp->rs_owner->ro_sw);
8900 
8901                 switch (rc = rfs4_check_stateid_seqid(sp, stateid)) {
8902                 case NFS4_CHECK_STATEID_OLD:
8903                         *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8904                         goto end;
8905                 case NFS4_CHECK_STATEID_BAD:
8906                         *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8907                         goto end;
8908                 case NFS4_CHECK_STATEID_EXPIRED:
8909                         *cs->statusp = resp->status = NFS4ERR_EXPIRED;
8910                         goto end;
8911                 case NFS4_CHECK_STATEID_UNCONFIRMED:
8912                         *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8913                         goto end;
8914                 case NFS4_CHECK_STATEID_CLOSED:
8915                         *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8916                         goto end;
8917                 case NFS4_CHECK_STATEID_OKAY:
8918                 case NFS4_CHECK_STATEID_REPLAY:
8919                         switch (rfs4_check_olo_seqid(olo->open_seqid,
8920                             sp->rs_owner, resop)) {
8921                         case NFS4_CHKSEQ_OKAY:
8922                                 if (rc == NFS4_CHECK_STATEID_OKAY)
8923                                         break;
8924                                 /*
8925                                  * This is replayed stateid; if seqid
8926                                  * matches next expected, then client
8927                                  * is using wrong seqid.
8928                                  */
8929                                 /* FALLTHROUGH */
8930                         case NFS4_CHKSEQ_BAD:
8931                                 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8932                                 goto end;
8933                         case NFS4_CHKSEQ_REPLAY:
8934                                 /* This is a duplicate LOCK request */
8935                                 dup_lock = TRUE;
8936 
8937                                 /*
8938                                  * For a duplicate we do not want to
8939                                  * create a new lockowner as it should
8940                                  * already exist.
8941                                  * Turn off the lockowner create flag.
8942                                  */
8943                                 lcreate = FALSE;
8944                         }
8945                         break;
8946                 }
8947 
8948                 lo = rfs4_findlockowner(&olo->lock_owner, &lcreate);
8949                 if (lo == NULL) {
8950                         NFS4_DEBUG(rfs4_debug,
8951                             (CE_NOTE, "rfs4_op_lock: no lock owner"));
8952                         *cs->statusp = resp->status = NFS4ERR_RESOURCE;
8953                         goto end;
8954                 }
8955 
8956                 lsp = rfs4_findlo_state_by_owner(lo, sp, &create);
8957                 if (lsp == NULL) {
8958                         rfs4_update_lease(sp->rs_owner->ro_client);
8959                         /*
8960                          * Only update theh open_seqid if this is not
8961                          * a duplicate request
8962                          */
8963                         if (dup_lock == FALSE) {
8964                                 rfs4_update_open_sequence(sp->rs_owner);
8965                         }
8966 
8967                         NFS4_DEBUG(rfs4_debug,
8968                             (CE_NOTE, "rfs4_op_lock: no state"));
8969                         *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
8970                         rfs4_update_open_resp(sp->rs_owner, resop, NULL);
8971                         rfs4_lockowner_rele(lo);
8972                         goto end;
8973                 }
8974 
8975                 /*
8976                  * This is the new_lock_owner branch and the client is
8977                  * supposed to be associating a new lock_owner with
8978                  * the open file at this point.  If we find that a
8979                  * lock_owner/state association already exists and a
8980                  * successful LOCK request was returned to the client,
8981                  * an error is returned to the client since this is
8982                  * not appropriate.  The client should be using the
8983                  * existing lock_owner branch.
8984                  */
8985                 if (dup_lock == FALSE && create == FALSE) {
8986                         if (lsp->rls_lock_completed == TRUE) {
8987                                 *cs->statusp =
8988                                     resp->status = NFS4ERR_BAD_SEQID;
8989                                 rfs4_lockowner_rele(lo);
8990                                 goto end;
8991                         }
8992                 }
8993 
8994                 rfs4_update_lease(sp->rs_owner->ro_client);
8995 
8996                 /*
8997                  * Only update theh open_seqid if this is not
8998                  * a duplicate request
8999                  */
9000                 if (dup_lock == FALSE) {
9001                         rfs4_update_open_sequence(sp->rs_owner);
9002                 }
9003 
9004                 /*
9005                  * If this is a duplicate lock request, just copy the
9006                  * previously saved reply and return.
9007                  */
9008                 if (dup_lock == TRUE) {
9009                         /* verify that lock_seqid's match */
9010                         if (lsp->rls_seqid != olo->lock_seqid) {
9011                                 NFS4_DEBUG(rfs4_debug,
9012                                     (CE_NOTE, "rfs4_op_lock: Dup-Lock seqid bad"
9013                                     "lsp->seqid=%d old->seqid=%d",
9014                                     lsp->rls_seqid, olo->lock_seqid));
9015                                 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9016                         } else {
9017                                 rfs4_copy_reply(resop, &lsp->rls_reply);
9018                                 /*
9019                                  * Make sure to copy the just
9020                                  * retrieved reply status into the
9021                                  * overall compound status
9022                                  */
9023                                 *cs->statusp = resp->status;
9024                         }
9025                         rfs4_lockowner_rele(lo);
9026                         goto end;
9027                 }
9028 
9029                 rfs4_dbe_lock(lsp->rls_dbe);
9030 
9031                 /* Make sure to update the lock sequence id */
9032                 lsp->rls_seqid = olo->lock_seqid;
9033 
9034                 NFS4_DEBUG(rfs4_debug,
9035                     (CE_NOTE, "Lock seqid established as %d", lsp->rls_seqid));
9036 
9037                 /*
9038                  * This is used to signify the newly created lockowner
9039                  * stateid and its sequence number.  The checks for
9040                  * sequence number and increment don't occur on the
9041                  * very first lock request for a lockowner.
9042                  */
9043                 lsp->rls_skip_seqid_check = TRUE;
9044 
9045                 /* hold off other access to lsp while we tinker */
9046                 rfs4_sw_enter(&lsp->rls_sw);
9047                 ls_sw_held = TRUE;
9048 
9049                 rfs4_dbe_unlock(lsp->rls_dbe);
9050 
9051                 rfs4_lockowner_rele(lo);
9052         } else {
9053                 stateid = &args->locker.locker4_u.lock_owner.lock_stateid;
9054                 /* get lsp and hold the lock on the underlying file struct */
9055                 if ((status = rfs4_get_lo_state(stateid, &lsp, TRUE))
9056                     != NFS4_OK) {
9057                         *cs->statusp = resp->status = status;
9058                         DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
9059                             cs, LOCK4res *, resp);
9060                         return;
9061                 }
9062                 create = FALSE; /* We didn't create lsp */
9063 
9064                 /* Ensure specified filehandle matches */
9065                 if (cs->vp != lsp->rls_state->rs_finfo->rf_vp) {
9066                         rfs4_lo_state_rele(lsp, TRUE);
9067                         *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9068                         DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
9069                             cs, LOCK4res *, resp);
9070                         return;
9071                 }
9072 
9073                 /* hold off other access to lsp while we tinker */
9074                 rfs4_sw_enter(&lsp->rls_sw);
9075                 ls_sw_held = TRUE;
9076 
9077                 switch (rfs4_check_lo_stateid_seqid(lsp, stateid)) {
9078                 /*
9079                  * The stateid looks like it was okay (expected to be
9080                  * the next one)
9081                  */
9082                 case NFS4_CHECK_STATEID_OKAY:
9083                         /*
9084                          * The sequence id is now checked.  Determine
9085                          * if this is a replay or if it is in the
9086                          * expected (next) sequence.  In the case of a
9087                          * replay, there are two replay conditions
9088                          * that may occur.  The first is the normal
9089                          * condition where a LOCK is done with a
9090                          * NFS4_OK response and the stateid is
9091                          * updated.  That case is handled below when
9092                          * the stateid is identified as a REPLAY.  The
9093                          * second is the case where an error is
9094                          * returned, like NFS4ERR_DENIED, and the
9095                          * sequence number is updated but the stateid
9096                          * is not updated.  This second case is dealt
9097                          * with here.  So it may seem odd that the
9098                          * stateid is okay but the sequence id is a
9099                          * replay but it is okay.
9100                          */
9101                         switch (rfs4_check_lock_seqid(
9102                             args->locker.locker4_u.lock_owner.lock_seqid,
9103                             lsp, resop)) {
9104                         case NFS4_CHKSEQ_REPLAY:
9105                                 if (resp->status != NFS4_OK) {
9106                                         /*
9107                                          * Here is our replay and need
9108                                          * to verify that the last
9109                                          * response was an error.
9110                                          */
9111                                         *cs->statusp = resp->status;
9112                                         goto end;
9113                                 }
9114                                 /*
9115                                  * This is done since the sequence id
9116                                  * looked like a replay but it didn't
9117                                  * pass our check so a BAD_SEQID is
9118                                  * returned as a result.
9119                                  */
9120                                 /*FALLTHROUGH*/
9121                         case NFS4_CHKSEQ_BAD:
9122                                 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9123                                 goto end;
9124                         case NFS4_CHKSEQ_OKAY:
9125                                 /* Everything looks okay move ahead */
9126                                 break;
9127                         }
9128                         break;
9129                 case NFS4_CHECK_STATEID_OLD:
9130                         *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9131                         goto end;
9132                 case NFS4_CHECK_STATEID_BAD:
9133                         *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9134                         goto end;
9135                 case NFS4_CHECK_STATEID_EXPIRED:
9136                         *cs->statusp = resp->status = NFS4ERR_EXPIRED;
9137                         goto end;
9138                 case NFS4_CHECK_STATEID_CLOSED:
9139                         *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9140                         goto end;
9141                 case NFS4_CHECK_STATEID_REPLAY:
9142                         switch (rfs4_check_lock_seqid(
9143                             args->locker.locker4_u.lock_owner.lock_seqid,
9144                             lsp, resop)) {
9145                         case NFS4_CHKSEQ_OKAY:
9146                                 /*
9147                                  * This is a replayed stateid; if
9148                                  * seqid matches the next expected,
9149                                  * then client is using wrong seqid.
9150                                  */
9151                         case NFS4_CHKSEQ_BAD:
9152                                 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9153                                 goto end;
9154                         case NFS4_CHKSEQ_REPLAY:
9155                                 rfs4_update_lease(lsp->rls_locker->rl_client);
9156                                 *cs->statusp = status = resp->status;
9157                                 goto end;
9158                         }
9159                         break;
9160                 default:
9161                         ASSERT(FALSE);
9162                         break;
9163                 }
9164 
9165                 rfs4_update_lock_sequence(lsp);
9166                 rfs4_update_lease(lsp->rls_locker->rl_client);
9167         }
9168 
9169         /*
9170          * NFS4 only allows locking on regular files, so
9171          * verify type of object.
9172          */
9173         if (cs->vp->v_type != VREG) {
9174                 if (cs->vp->v_type == VDIR)
9175                         status = NFS4ERR_ISDIR;
9176                 else
9177                         status = NFS4ERR_INVAL;
9178                 goto out;
9179         }
9180 
9181         cp = lsp->rls_state->rs_owner->ro_client;
9182 
9183         if (rfs4_clnt_in_grace(cp) && !args->reclaim) {
9184                 status = NFS4ERR_GRACE;
9185                 goto out;
9186         }
9187 
9188         if (rfs4_clnt_in_grace(cp) && args->reclaim && !cp->rc_can_reclaim) {
9189                 status = NFS4ERR_NO_GRACE;
9190                 goto out;
9191         }
9192 
9193         if (!rfs4_clnt_in_grace(cp) && args->reclaim) {
9194                 status = NFS4ERR_NO_GRACE;
9195                 goto out;
9196         }
9197 
9198         if (lsp->rls_state->rs_finfo->rf_dinfo.rd_dtype == OPEN_DELEGATE_WRITE)
9199                 cs->deleg = TRUE;
9200 
9201         status = rfs4_do_lock(lsp, args->locktype,
9202             args->offset, args->length, cs->cr, resop);
9203 
9204 out:
9205         lsp->rls_skip_seqid_check = FALSE;
9206 
9207         *cs->statusp = resp->status = status;
9208 
9209         if (status == NFS4_OK) {
9210                 resp->LOCK4res_u.lock_stateid = lsp->rls_lockid.stateid;
9211                 lsp->rls_lock_completed = TRUE;
9212         }
9213         /*
9214          * Only update the "OPEN" response here if this was a new
9215          * lock_owner
9216          */
9217         if (sp)
9218                 rfs4_update_open_resp(sp->rs_owner, resop, NULL);
9219 
9220         rfs4_update_lock_resp(lsp, resop);
9221 
9222 end:
9223         if (lsp) {
9224                 if (ls_sw_held)
9225                         rfs4_sw_exit(&lsp->rls_sw);
9226                 /*
9227                  * If an sp obtained, then the lsp does not represent
9228                  * a lock on the file struct.
9229                  */
9230                 if (sp != NULL)
9231                         rfs4_lo_state_rele(lsp, FALSE);
9232                 else
9233                         rfs4_lo_state_rele(lsp, TRUE);
9234         }
9235         if (sp) {
9236                 rfs4_sw_exit(&sp->rs_owner->ro_sw);
9237                 rfs4_state_rele(sp);
9238         }
9239 
9240         DTRACE_NFSV4_2(op__lock__done, struct compound_state *, cs,
9241             LOCK4res *, resp);
9242 }
9243 
9244 /* free function for LOCK/LOCKT */
9245 static void
9246 lock_denied_free(nfs_resop4 *resop)
9247 {
9248         LOCK4denied *dp = NULL;
9249 
9250         switch (resop->resop) {
9251         case OP_LOCK:
9252                 if (resop->nfs_resop4_u.oplock.status == NFS4ERR_DENIED)
9253                         dp = &resop->nfs_resop4_u.oplock.LOCK4res_u.denied;
9254                 break;
9255         case OP_LOCKT:
9256                 if (resop->nfs_resop4_u.oplockt.status == NFS4ERR_DENIED)
9257                         dp = &resop->nfs_resop4_u.oplockt.denied;
9258                 break;
9259         default:
9260                 break;
9261         }
9262 
9263         if (dp)
9264                 kmem_free(dp->owner.owner_val, dp->owner.owner_len);
9265 }
9266 
9267 /*ARGSUSED*/
9268 void
9269 rfs4_op_locku(nfs_argop4 *argop, nfs_resop4 *resop,
9270     struct svc_req *req, struct compound_state *cs)
9271 {
9272         LOCKU4args *args = &argop->nfs_argop4_u.oplocku;
9273         LOCKU4res *resp = &resop->nfs_resop4_u.oplocku;
9274         nfsstat4 status;
9275         stateid4 *stateid = &args->lock_stateid;
9276         rfs4_lo_state_t *lsp;
9277 
9278         DTRACE_NFSV4_2(op__locku__start, struct compound_state *, cs,
9279             LOCKU4args *, args);
9280 
9281         if (cs->vp == NULL) {
9282                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
9283                 DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
9284                     LOCKU4res *, resp);
9285                 return;
9286         }
9287 
9288         if ((status = rfs4_get_lo_state(stateid, &lsp, TRUE)) != NFS4_OK) {
9289                 *cs->statusp = resp->status = status;
9290                 DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
9291                     LOCKU4res *, resp);
9292                 return;
9293         }
9294 
9295         /* Ensure specified filehandle matches */
9296         if (cs->vp != lsp->rls_state->rs_finfo->rf_vp) {
9297                 rfs4_lo_state_rele(lsp, TRUE);
9298                 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9299                 DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
9300                     LOCKU4res *, resp);
9301                 return;
9302         }
9303 
9304         /* hold off other access to lsp while we tinker */
9305         rfs4_sw_enter(&lsp->rls_sw);
9306 
9307         switch (rfs4_check_lo_stateid_seqid(lsp, stateid)) {
9308         case NFS4_CHECK_STATEID_OKAY:
9309                 if (rfs4_check_lock_seqid(args->seqid, lsp, resop)
9310                     != NFS4_CHKSEQ_OKAY) {
9311                         *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9312                         goto end;
9313                 }
9314                 break;
9315         case NFS4_CHECK_STATEID_OLD:
9316                 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9317                 goto end;
9318         case NFS4_CHECK_STATEID_BAD:
9319                 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9320                 goto end;
9321         case NFS4_CHECK_STATEID_EXPIRED:
9322                 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
9323                 goto end;
9324         case NFS4_CHECK_STATEID_CLOSED:
9325                 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9326                 goto end;
9327         case NFS4_CHECK_STATEID_REPLAY:
9328                 switch (rfs4_check_lock_seqid(args->seqid, lsp, resop)) {
9329                 case NFS4_CHKSEQ_OKAY:
9330                                 /*
9331                                  * This is a replayed stateid; if
9332                                  * seqid matches the next expected,
9333                                  * then client is using wrong seqid.
9334                                  */
9335                 case NFS4_CHKSEQ_BAD:
9336                         *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9337                         goto end;
9338                 case NFS4_CHKSEQ_REPLAY:
9339                         rfs4_update_lease(lsp->rls_locker->rl_client);
9340                         *cs->statusp = status = resp->status;
9341                         goto end;
9342                 }
9343                 break;
9344         default:
9345                 ASSERT(FALSE);
9346                 break;
9347         }
9348 
9349         rfs4_update_lock_sequence(lsp);
9350         rfs4_update_lease(lsp->rls_locker->rl_client);
9351 
9352         /*
9353          * NFS4 only allows locking on regular files, so
9354          * verify type of object.
9355          */
9356         if (cs->vp->v_type != VREG) {
9357                 if (cs->vp->v_type == VDIR)
9358                         status = NFS4ERR_ISDIR;
9359                 else
9360                         status = NFS4ERR_INVAL;
9361                 goto out;
9362         }
9363 
9364         if (rfs4_clnt_in_grace(lsp->rls_state->rs_owner->ro_client)) {
9365                 status = NFS4ERR_GRACE;
9366                 goto out;
9367         }
9368 
9369         status = rfs4_do_lock(lsp, args->locktype,
9370             args->offset, args->length, cs->cr, resop);
9371 
9372 out:
9373         *cs->statusp = resp->status = status;
9374 
9375         if (status == NFS4_OK)
9376                 resp->lock_stateid = lsp->rls_lockid.stateid;
9377 
9378         rfs4_update_lock_resp(lsp, resop);
9379 
9380 end:
9381         rfs4_sw_exit(&lsp->rls_sw);
9382         rfs4_lo_state_rele(lsp, TRUE);
9383 
9384         DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
9385             LOCKU4res *, resp);
9386 }
9387 
9388 /*
9389  * LOCKT is a best effort routine, the client can not be guaranteed that
9390  * the status return is still in effect by the time the reply is received.
9391  * They are numerous race conditions in this routine, but we are not required
9392  * and can not be accurate.
9393  */
9394 /*ARGSUSED*/
9395 void
9396 rfs4_op_lockt(nfs_argop4 *argop, nfs_resop4 *resop,
9397     struct svc_req *req, struct compound_state *cs)
9398 {
9399         LOCKT4args *args = &argop->nfs_argop4_u.oplockt;
9400         LOCKT4res *resp = &resop->nfs_resop4_u.oplockt;
9401         rfs4_lockowner_t *lo;
9402         rfs4_client_t *cp;
9403         bool_t create = FALSE;
9404         struct flock64 flk;
9405         int error;
9406         int flag = FREAD | FWRITE;
9407         int ltype;
9408         length4 posix_length;
9409         sysid_t sysid;
9410         pid_t pid;
9411 
9412         DTRACE_NFSV4_2(op__lockt__start, struct compound_state *, cs,
9413             LOCKT4args *, args);
9414 
9415         if (cs->vp == NULL) {
9416                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
9417                 goto out;
9418         }
9419 
9420         /*
9421          * NFS4 only allows locking on regular files, so
9422          * verify type of object.
9423          */
9424         if (cs->vp->v_type != VREG) {
9425                 if (cs->vp->v_type == VDIR)
9426                         *cs->statusp = resp->status = NFS4ERR_ISDIR;
9427                 else
9428                         *cs->statusp = resp->status =  NFS4ERR_INVAL;
9429                 goto out;
9430         }
9431 
9432         /*
9433          * Check out the clientid to ensure the server knows about it
9434          * so that we correctly inform the client of a server reboot.
9435          */
9436         if ((cp = rfs4_findclient_by_id(args->owner.clientid, FALSE))
9437             == NULL) {
9438                 *cs->statusp = resp->status =
9439                     rfs4_check_clientid(&args->owner.clientid, 0);
9440                 goto out;
9441         }
9442         if (rfs4_lease_expired(cp)) {
9443                 rfs4_client_close(cp);
9444                 /*
9445                  * Protocol doesn't allow returning NFS4ERR_STALE as
9446                  * other operations do on this check so STALE_CLIENTID
9447                  * is returned instead
9448                  */
9449                 *cs->statusp = resp->status = NFS4ERR_STALE_CLIENTID;
9450                 goto out;
9451         }
9452 
9453         if (rfs4_clnt_in_grace(cp) && !(cp->rc_can_reclaim)) {
9454                 *cs->statusp = resp->status = NFS4ERR_GRACE;
9455                 rfs4_client_rele(cp);
9456                 goto out;
9457         }
9458         rfs4_client_rele(cp);
9459 
9460         resp->status = NFS4_OK;
9461 
9462         switch (args->locktype) {
9463         case READ_LT:
9464         case READW_LT:
9465                 ltype = F_RDLCK;
9466                 break;
9467         case WRITE_LT:
9468         case WRITEW_LT:
9469                 ltype = F_WRLCK;
9470                 break;
9471         }
9472 
9473         posix_length = args->length;
9474         /* Check for zero length. To lock to end of file use all ones for V4 */
9475         if (posix_length == 0) {
9476                 *cs->statusp = resp->status = NFS4ERR_INVAL;
9477                 goto out;
9478         } else if (posix_length == (length4)(~0)) {
9479                 posix_length = 0;       /* Posix to end of file  */
9480         }
9481 
9482         /* Find or create a lockowner */
9483         lo = rfs4_findlockowner(&args->owner, &create);
9484 
9485         if (lo) {
9486                 pid = lo->rl_pid;
9487                 if ((resp->status =
9488                     rfs4_client_sysid(lo->rl_client, &sysid)) != NFS4_OK)
9489                         goto err;
9490         } else {
9491                 pid = 0;
9492                 sysid = lockt_sysid;
9493         }
9494 retry:
9495         flk.l_type = ltype;
9496         flk.l_whence = 0;               /* SEEK_SET */
9497         flk.l_start = args->offset;
9498         flk.l_len = posix_length;
9499         flk.l_sysid = sysid;
9500         flk.l_pid = pid;
9501         flag |= F_REMOTELOCK;
9502 
9503         LOCK_PRINT(rfs4_debug, "rfs4_op_lockt", F_GETLK, &flk);
9504 
9505         /* Note that length4 is uint64_t but l_len and l_start are off64_t */
9506         if (flk.l_len < 0 || flk.l_start < 0) {
9507                 resp->status = NFS4ERR_INVAL;
9508                 goto err;
9509         }
9510         error = VOP_FRLOCK(cs->vp, F_GETLK, &flk, flag, (u_offset_t)0,
9511             NULL, cs->cr, NULL);
9512 
9513         /*
9514          * N.B. We map error values to nfsv4 errors. This is differrent
9515          * than puterrno4 routine.
9516          */
9517         switch (error) {
9518         case 0:
9519                 if (flk.l_type == F_UNLCK)
9520                         resp->status = NFS4_OK;
9521                 else {
9522                         if (lock_denied(&resp->denied, &flk) == NFS4ERR_EXPIRED)
9523                                 goto retry;
9524                         resp->status = NFS4ERR_DENIED;
9525                 }
9526                 break;
9527         case EOVERFLOW:
9528                 resp->status = NFS4ERR_INVAL;
9529                 break;
9530         case EINVAL:
9531                 resp->status = NFS4ERR_NOTSUPP;
9532                 break;
9533         default:
9534                 cmn_err(CE_WARN, "rfs4_op_lockt: unexpected errno (%d)",
9535                     error);
9536                 resp->status = NFS4ERR_SERVERFAULT;
9537                 break;
9538         }
9539 
9540 err:
9541         if (lo)
9542                 rfs4_lockowner_rele(lo);
9543         *cs->statusp = resp->status;
9544 out:
9545         DTRACE_NFSV4_2(op__lockt__done, struct compound_state *, cs,
9546             LOCKT4res *, resp);
9547 }
9548 
9549 int
9550 rfs4_share(rfs4_state_t *sp, uint32_t access, uint32_t deny)
9551 {
9552         int err;
9553         int cmd;
9554         vnode_t *vp;
9555         struct shrlock shr;
9556         struct shr_locowner shr_loco;
9557         int fflags = 0;
9558 
9559         ASSERT(rfs4_dbe_islocked(sp->rs_dbe));
9560         ASSERT(sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID);
9561 
9562         if (sp->rs_closed)
9563                 return (NFS4ERR_OLD_STATEID);
9564 
9565         vp = sp->rs_finfo->rf_vp;
9566         ASSERT(vp);
9567 
9568         shr.s_access = shr.s_deny = 0;
9569 
9570         if (access & OPEN4_SHARE_ACCESS_READ) {
9571                 fflags |= FREAD;
9572                 shr.s_access |= F_RDACC;
9573         }
9574         if (access & OPEN4_SHARE_ACCESS_WRITE) {
9575                 fflags |= FWRITE;
9576                 shr.s_access |= F_WRACC;
9577         }
9578         ASSERT(shr.s_access);
9579 
9580         if (deny & OPEN4_SHARE_DENY_READ)
9581                 shr.s_deny |= F_RDDNY;
9582         if (deny & OPEN4_SHARE_DENY_WRITE)
9583                 shr.s_deny |= F_WRDNY;
9584 
9585         shr.s_pid = rfs4_dbe_getid(sp->rs_owner->ro_dbe);
9586         shr.s_sysid = sp->rs_owner->ro_client->rc_sysidt;
9587         shr_loco.sl_pid = shr.s_pid;
9588         shr_loco.sl_id = shr.s_sysid;
9589         shr.s_owner = (caddr_t)&shr_loco;
9590         shr.s_own_len = sizeof (shr_loco);
9591 
9592         cmd = nbl_need_check(vp) ? F_SHARE_NBMAND : F_SHARE;
9593 
9594         err = VOP_SHRLOCK(vp, cmd, &shr, fflags, CRED(), NULL);
9595         if (err != 0) {
9596                 if (err == EAGAIN)
9597                         err = NFS4ERR_SHARE_DENIED;
9598                 else
9599                         err = puterrno4(err);
9600                 return (err);
9601         }
9602 
9603         sp->rs_share_access |= access;
9604         sp->rs_share_deny |= deny;
9605 
9606         return (0);
9607 }
9608 
9609 int
9610 rfs4_unshare(rfs4_state_t *sp)
9611 {
9612         int err;
9613         struct shrlock shr;
9614         struct shr_locowner shr_loco;
9615 
9616         ASSERT(rfs4_dbe_islocked(sp->rs_dbe));
9617 
9618         if (sp->rs_closed || sp->rs_share_access == 0)
9619                 return (0);
9620 
9621         ASSERT(sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID);
9622         ASSERT(sp->rs_finfo->rf_vp);
9623 
9624         shr.s_access = shr.s_deny = 0;
9625         shr.s_pid = rfs4_dbe_getid(sp->rs_owner->ro_dbe);
9626         shr.s_sysid = sp->rs_owner->ro_client->rc_sysidt;
9627         shr_loco.sl_pid = shr.s_pid;
9628         shr_loco.sl_id = shr.s_sysid;
9629         shr.s_owner = (caddr_t)&shr_loco;
9630         shr.s_own_len = sizeof (shr_loco);
9631 
9632         err = VOP_SHRLOCK(sp->rs_finfo->rf_vp, F_UNSHARE, &shr, 0, CRED(),
9633             NULL);
9634         if (err != 0) {
9635                 err = puterrno4(err);
9636                 return (err);
9637         }
9638 
9639         sp->rs_share_access = 0;
9640         sp->rs_share_deny = 0;
9641 
9642         return (0);
9643 
9644 }
9645 
9646 static int
9647 rdma_setup_read_data4(READ4args *args, READ4res *rok)
9648 {
9649         struct clist    *wcl;
9650         count4          count = rok->data_len;
9651         int             wlist_len;
9652 
9653         wcl = args->wlist;
9654         if (rdma_setup_read_chunks(wcl, count, &wlist_len) == FALSE) {
9655                 return (FALSE);
9656         }
9657         wcl = args->wlist;
9658         rok->wlist_len = wlist_len;
9659         rok->wlist = wcl;
9660         return (TRUE);
9661 }
9662 
9663 /* tunable to disable server referrals */
9664 int rfs4_no_referrals = 0;
9665 
9666 /*
9667  * Find an NFS record in reparse point data.
9668  * Returns 0 for success and <0 or an errno value on failure.
9669  */
9670 int
9671 vn_find_nfs_record(vnode_t *vp, nvlist_t **nvlp, char **svcp, char **datap)
9672 {
9673         int err;
9674         char *stype, *val;
9675         nvlist_t *nvl;
9676         nvpair_t *curr;
9677 
9678         if ((nvl = reparse_init()) == NULL)
9679                 return (-1);
9680 
9681         if ((err = reparse_vnode_parse(vp, nvl)) != 0) {
9682                 reparse_free(nvl);
9683                 return (err);
9684         }
9685 
9686         curr = NULL;
9687         while ((curr = nvlist_next_nvpair(nvl, curr)) != NULL) {
9688                 if ((stype = nvpair_name(curr)) == NULL) {
9689                         reparse_free(nvl);
9690                         return (-2);
9691                 }
9692                 if (strncasecmp(stype, "NFS", 3) == 0)
9693                         break;
9694         }
9695 
9696         if ((curr == NULL) ||
9697             (nvpair_value_string(curr, &val))) {
9698                 reparse_free(nvl);
9699                 return (-3);
9700         }
9701         *nvlp = nvl;
9702         *svcp = stype;
9703         *datap = val;
9704         return (0);
9705 }
9706 
9707 int
9708 vn_is_nfs_reparse(vnode_t *vp, cred_t *cr)
9709 {
9710         nvlist_t *nvl;
9711         char *s, *d;
9712 
9713         if (rfs4_no_referrals != 0)
9714                 return (B_FALSE);
9715 
9716         if (vn_is_reparse(vp, cr, NULL) == B_FALSE)
9717                 return (B_FALSE);
9718 
9719         if (vn_find_nfs_record(vp, &nvl, &s, &d) != 0)
9720                 return (B_FALSE);
9721 
9722         reparse_free(nvl);
9723 
9724         return (B_TRUE);
9725 }
9726 
9727 /*
9728  * There is a user-level copy of this routine in ref_subr.c.
9729  * Changes should be kept in sync.
9730  */
9731 static int
9732 nfs4_create_components(char *path, component4 *comp4)
9733 {
9734         int slen, plen, ncomp;
9735         char *ori_path, *nxtc, buf[MAXNAMELEN];
9736 
9737         if (path == NULL)
9738                 return (0);
9739 
9740         plen = strlen(path) + 1;        /* include the terminator */
9741         ori_path = path;
9742         ncomp = 0;
9743 
9744         /* count number of components in the path */
9745         for (nxtc = path; nxtc < ori_path + plen; nxtc++) {
9746                 if (*nxtc == '/' || *nxtc == '\0' || *nxtc == '\n') {
9747                         if ((slen = nxtc - path) == 0) {
9748                                 path = nxtc + 1;
9749                                 continue;
9750                         }
9751 
9752                         if (comp4 != NULL) {
9753                                 bcopy(path, buf, slen);
9754                                 buf[slen] = '\0';
9755                                 (void) str_to_utf8(buf, &comp4[ncomp]);
9756                         }
9757 
9758                         ncomp++;        /* 1 valid component */
9759                         path = nxtc + 1;
9760                 }
9761                 if (*nxtc == '\0' || *nxtc == '\n')
9762                         break;
9763         }
9764 
9765         return (ncomp);
9766 }
9767 
9768 /*
9769  * There is a user-level copy of this routine in ref_subr.c.
9770  * Changes should be kept in sync.
9771  */
9772 static int
9773 make_pathname4(char *path, pathname4 *pathname)
9774 {
9775         int ncomp;
9776         component4 *comp4;
9777 
9778         if (pathname == NULL)
9779                 return (0);
9780 
9781         if (path == NULL) {
9782                 pathname->pathname4_val = NULL;
9783                 pathname->pathname4_len = 0;
9784                 return (0);
9785         }
9786 
9787         /* count number of components to alloc buffer */
9788         if ((ncomp = nfs4_create_components(path, NULL)) == 0) {
9789                 pathname->pathname4_val = NULL;
9790                 pathname->pathname4_len = 0;
9791                 return (0);
9792         }
9793         comp4 = kmem_zalloc(ncomp * sizeof (component4), KM_SLEEP);
9794 
9795         /* copy components into allocated buffer */
9796         ncomp = nfs4_create_components(path, comp4);
9797 
9798         pathname->pathname4_val = comp4;
9799         pathname->pathname4_len = ncomp;
9800 
9801         return (ncomp);
9802 }
9803 
9804 #define xdr_fs_locations4 xdr_fattr4_fs_locations
9805 
9806 fs_locations4 *
9807 fetch_referral(vnode_t *vp, cred_t *cr)
9808 {
9809         nvlist_t *nvl;
9810         char *stype, *sdata;
9811         fs_locations4 *result;
9812         char buf[1024];
9813         size_t bufsize;
9814         XDR xdr;
9815         int err;
9816 
9817         /*
9818          * Check attrs to ensure it's a reparse point
9819          */
9820         if (vn_is_reparse(vp, cr, NULL) == B_FALSE)
9821                 return (NULL);
9822 
9823         /*
9824          * Look for an NFS record and get the type and data
9825          */
9826         if (vn_find_nfs_record(vp, &nvl, &stype, &sdata) != 0)
9827                 return (NULL);
9828 
9829         /*
9830          * With the type and data, upcall to get the referral
9831          */
9832         bufsize = sizeof (buf);
9833         bzero(buf, sizeof (buf));
9834         err = reparse_kderef((const char *)stype, (const char *)sdata,
9835             buf, &bufsize);
9836         reparse_free(nvl);
9837 
9838         DTRACE_PROBE4(nfs4serv__func__referral__upcall,
9839             char *, stype, char *, sdata, char *, buf, int, err);
9840         if (err) {
9841                 cmn_err(CE_NOTE,
9842                     "reparsed daemon not running: unable to get referral (%d)",
9843                     err);
9844                 return (NULL);
9845         }
9846 
9847         /*
9848          * We get an XDR'ed record back from the kderef call
9849          */
9850         xdrmem_create(&xdr, buf, bufsize, XDR_DECODE);
9851         result = kmem_alloc(sizeof (fs_locations4), KM_SLEEP);
9852         err = xdr_fs_locations4(&xdr, result);
9853         XDR_DESTROY(&xdr);
9854         if (err != TRUE) {
9855                 DTRACE_PROBE1(nfs4serv__func__referral__upcall__xdrfail,
9856                     int, err);
9857                 return (NULL);
9858         }
9859 
9860         /*
9861          * Look at path to recover fs_root, ignoring the leading '/'
9862          */
9863         (void) make_pathname4(vp->v_path, &result->fs_root);
9864 
9865         return (result);
9866 }
9867 
9868 char *
9869 build_symlink(vnode_t *vp, cred_t *cr, size_t *strsz)
9870 {
9871         fs_locations4 *fsl;
9872         fs_location4 *fs;
9873         char *server, *path, *symbuf;
9874         static char *prefix = "/net/";
9875         int i, size, npaths;
9876         uint_t len;
9877 
9878         /* Get the referral */
9879         if ((fsl = fetch_referral(vp, cr)) == NULL)
9880                 return (NULL);
9881 
9882         /* Deal with only the first location and first server */
9883         fs = &fsl->locations_val[0];
9884         server = utf8_to_str(&fs->server_val[0], &len, NULL);
9885         if (server == NULL) {
9886                 rfs4_free_fs_locations4(fsl);
9887                 kmem_free(fsl, sizeof (fs_locations4));
9888                 return (NULL);
9889         }
9890 
9891         /* Figure out size for "/net/" + host + /path/path/path + NULL */
9892         size = strlen(prefix) + len;
9893         for (i = 0; i < fs->rootpath.pathname4_len; i++)
9894                 size += fs->rootpath.pathname4_val[i].utf8string_len + 1;
9895 
9896         /* Allocate the symlink buffer and fill it */
9897         symbuf = kmem_zalloc(size, KM_SLEEP);
9898         (void) strcat(symbuf, prefix);
9899         (void) strcat(symbuf, server);
9900         kmem_free(server, len);
9901 
9902         npaths = 0;
9903         for (i = 0; i < fs->rootpath.pathname4_len; i++) {
9904                 path = utf8_to_str(&fs->rootpath.pathname4_val[i], &len, NULL);
9905                 if (path == NULL)
9906                         continue;
9907                 (void) strcat(symbuf, "/");
9908                 (void) strcat(symbuf, path);
9909                 npaths++;
9910                 kmem_free(path, len);
9911         }
9912 
9913         rfs4_free_fs_locations4(fsl);
9914         kmem_free(fsl, sizeof (fs_locations4));
9915 
9916         if (strsz != NULL)
9917                 *strsz = size;
9918         return (symbuf);
9919 }
9920 
9921 /*
9922  * Check to see if we have a downrev Solaris client, so that we
9923  * can send it a symlink instead of a referral.
9924  */
9925 int
9926 client_is_downrev(struct svc_req *req)
9927 {
9928         struct sockaddr *ca;
9929         rfs4_clntip_t *ci;
9930         bool_t create = FALSE;
9931         int is_downrev;
9932 
9933         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
9934         ASSERT(ca);
9935         ci = rfs4_find_clntip(ca, &create);
9936         if (ci == NULL)
9937                 return (0);
9938         is_downrev = ci->ri_no_referrals;
9939         rfs4_dbe_rele(ci->ri_dbe);
9940         return (is_downrev);
9941 }
9942 
9943 /*
9944  * Do the main work of handling HA-NFSv4 Resource Group failover on
9945  * Sun Cluster.
9946  * We need to detect whether any RG admin paths have been added or removed,
9947  * and adjust resources accordingly.
9948  * Currently we're using a very inefficient algorithm, ~ 2 * O(n**2). In
9949  * order to scale, the list and array of paths need to be held in more
9950  * suitable data structures.
9951  */
9952 static void
9953 hanfsv4_failover(nfs4_srv_t *nsrv4)
9954 {
9955         int i, start_grace, numadded_paths = 0;
9956         char **added_paths = NULL;
9957         rfs4_dss_path_t *dss_path;
9958 
9959         /*
9960          * Note: currently, dss_pathlist cannot be NULL, since
9961          * it will always include an entry for NFS4_DSS_VAR_DIR. If we
9962          * make the latter dynamically specified too, the following will
9963          * need to be adjusted.
9964          */
9965 
9966         /*
9967          * First, look for removed paths: RGs that have been failed-over
9968          * away from this node.
9969          * Walk the "currently-serving" dss_pathlist and, for each
9970          * path, check if it is on the "passed-in" rfs4_dss_newpaths array
9971          * from nfsd. If not, that RG path has been removed.
9972          *
9973          * Note that nfsd has sorted rfs4_dss_newpaths for us, and removed
9974          * any duplicates.
9975          */
9976         dss_path = nsrv4->dss_pathlist;
9977         do {
9978                 int found = 0;
9979                 char *path = dss_path->path;
9980 
9981                 /* used only for non-HA so may not be removed */
9982                 if (strcmp(path, NFS4_DSS_VAR_DIR) == 0) {
9983                         dss_path = dss_path->next;
9984                         continue;
9985                 }
9986 
9987                 for (i = 0; i < rfs4_dss_numnewpaths; i++) {
9988                         int cmpret;
9989                         char *newpath = rfs4_dss_newpaths[i];
9990 
9991                         /*
9992                          * Since nfsd has sorted rfs4_dss_newpaths for us,
9993                          * once the return from strcmp is negative we know
9994                          * we've passed the point where "path" should be,
9995                          * and can stop searching: "path" has been removed.
9996                          */
9997                         cmpret = strcmp(path, newpath);
9998                         if (cmpret < 0)
9999                                 break;
10000                         if (cmpret == 0) {
10001                                 found = 1;
10002                                 break;
10003                         }
10004                 }
10005 
10006                 if (found == 0) {
10007                         unsigned index = dss_path->index;
10008                         rfs4_servinst_t *sip = dss_path->sip;
10009                         rfs4_dss_path_t *path_next = dss_path->next;
10010 
10011                         /*
10012                          * This path has been removed.
10013                          * We must clear out the servinst reference to
10014                          * it, since it's now owned by another
10015                          * node: we should not attempt to touch it.
10016                          */
10017                         ASSERT(dss_path == sip->dss_paths[index]);
10018                         sip->dss_paths[index] = NULL;
10019 
10020                         /* remove from "currently-serving" list, and destroy */
10021                         remque(dss_path);
10022                         /* allow for NUL */
10023                         kmem_free(dss_path->path, strlen(dss_path->path) + 1);
10024                         kmem_free(dss_path, sizeof (rfs4_dss_path_t));
10025 
10026                         dss_path = path_next;
10027                 } else {
10028                         /* path was found; not removed */
10029                         dss_path = dss_path->next;
10030                 }
10031         } while (dss_path != nsrv4->dss_pathlist);
10032 
10033         /*
10034          * Now, look for added paths: RGs that have been failed-over
10035          * to this node.
10036          * Walk the "passed-in" rfs4_dss_newpaths array from nfsd and,
10037          * for each path, check if it is on the "currently-serving"
10038          * dss_pathlist. If not, that RG path has been added.
10039          *
10040          * Note: we don't do duplicate detection here; nfsd does that for us.
10041          *
10042          * Note: numadded_paths <= rfs4_dss_numnewpaths, which gives us
10043          * an upper bound for the size needed for added_paths[numadded_paths].
10044          */
10045 
10046         /* probably more space than we need, but guaranteed to be enough */
10047         if (rfs4_dss_numnewpaths > 0) {
10048                 size_t sz = rfs4_dss_numnewpaths * sizeof (char *);
10049                 added_paths = kmem_zalloc(sz, KM_SLEEP);
10050         }
10051 
10052         /* walk the "passed-in" rfs4_dss_newpaths array from nfsd */
10053         for (i = 0; i < rfs4_dss_numnewpaths; i++) {
10054                 int found = 0;
10055                 char *newpath = rfs4_dss_newpaths[i];
10056 
10057                 dss_path = nsrv4->dss_pathlist;
10058                 do {
10059                         char *path = dss_path->path;
10060 
10061                         /* used only for non-HA */
10062                         if (strcmp(path, NFS4_DSS_VAR_DIR) == 0) {
10063                                 dss_path = dss_path->next;
10064                                 continue;
10065                         }
10066 
10067                         if (strncmp(path, newpath, strlen(path)) == 0) {
10068                                 found = 1;
10069                                 break;
10070                         }
10071 
10072                         dss_path = dss_path->next;
10073                 } while (dss_path != nsrv4->dss_pathlist);
10074 
10075                 if (found == 0) {
10076                         added_paths[numadded_paths] = newpath;
10077                         numadded_paths++;
10078                 }
10079         }
10080 
10081         /* did we find any added paths? */
10082         if (numadded_paths > 0) {
10083 
10084                 /* create a new server instance, and start its grace period */
10085                 start_grace = 1;
10086                 /* CSTYLED */
10087                 rfs4_servinst_create(nsrv4, start_grace, numadded_paths, added_paths);
10088 
10089                 /* read in the stable storage state from these paths */
10090                 rfs4_dss_readstate(nsrv4, numadded_paths, added_paths);
10091 
10092                 /*
10093                  * Multiple failovers during a grace period will cause
10094                  * clients of the same resource group to be partitioned
10095                  * into different server instances, with different
10096                  * grace periods.  Since clients of the same resource
10097                  * group must be subject to the same grace period,
10098                  * we need to reset all currently active grace periods.
10099                  */
10100                 rfs4_grace_reset_all(nsrv4);
10101         }
10102 
10103         if (rfs4_dss_numnewpaths > 0)
10104                 kmem_free(added_paths, rfs4_dss_numnewpaths * sizeof (char *));
10105 }