1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
  24  */
  25 
  26 /*
  27  *      Copyright (c) 1983,1984,1985,1986,1987,1988,1989  AT&T.
  28  *      All Rights Reserved
  29  */
  30 
  31 /*
  32  * Copyright (c) 2012, 2016 by Delphix. All rights reserved.
  33  * Copyright 2019 Nexenta Systems, Inc.
  34  * Copyright 2019 Nexenta by DDN, Inc.
  35  */
  36 
  37 #include <sys/param.h>
  38 #include <sys/types.h>
  39 #include <sys/systm.h>
  40 #include <sys/cred.h>
  41 #include <sys/buf.h>
  42 #include <sys/vfs.h>
  43 #include <sys/vfs_opreg.h>
  44 #include <sys/vnode.h>
  45 #include <sys/uio.h>
  46 #include <sys/errno.h>
  47 #include <sys/sysmacros.h>
  48 #include <sys/statvfs.h>
  49 #include <sys/kmem.h>
  50 #include <sys/dirent.h>
  51 #include <sys/cmn_err.h>
  52 #include <sys/debug.h>
  53 #include <sys/systeminfo.h>
  54 #include <sys/flock.h>
  55 #include <sys/pathname.h>
  56 #include <sys/nbmlock.h>
  57 #include <sys/share.h>
  58 #include <sys/atomic.h>
  59 #include <sys/policy.h>
  60 #include <sys/fem.h>
  61 #include <sys/sdt.h>
  62 #include <sys/ddi.h>
  63 #include <sys/zone.h>
  64 
  65 #include <fs/fs_reparse.h>
  66 
  67 #include <rpc/types.h>
  68 #include <rpc/auth.h>
  69 #include <rpc/rpcsec_gss.h>
  70 #include <rpc/svc.h>
  71 
  72 #include <nfs/nfs.h>
  73 #include <nfs/nfssys.h>
  74 #include <nfs/export.h>
  75 #include <nfs/nfs_cmd.h>
  76 #include <nfs/lm.h>
  77 #include <nfs/nfs4.h>
  78 #include <nfs/nfs4_drc.h>
  79 
  80 #include <sys/strsubr.h>
  81 #include <sys/strsun.h>
  82 
  83 #include <inet/common.h>
  84 #include <inet/ip.h>
  85 #include <inet/ip6.h>
  86 
  87 #include <sys/tsol/label.h>
  88 #include <sys/tsol/tndb.h>
  89 
  90 #define RFS4_MAXLOCK_TRIES 4    /* Try to get the lock this many times */
  91 static int rfs4_maxlock_tries = RFS4_MAXLOCK_TRIES;
  92 #define RFS4_LOCK_DELAY 10      /* Milliseconds */
  93 static clock_t  rfs4_lock_delay = RFS4_LOCK_DELAY;
  94 extern struct svc_ops rdma_svc_ops;
  95 extern int nfs_loaned_buffers;
  96 /* End of Tunables */
  97 
  98 static int rdma_setup_read_data4(READ4args *, READ4res *);
  99 
 100 /*
 101  * Used to bump the stateid4.seqid value and show changes in the stateid
 102  */
 103 #define next_stateid(sp) (++(sp)->bits.chgseq)
 104 
 105 /*
 106  * RFS4_MINLEN_ENTRY4: XDR-encoded size of smallest possible dirent.
 107  *      This is used to return NFS4ERR_TOOSMALL when clients specify
 108  *      maxcount that isn't large enough to hold the smallest possible
 109  *      XDR encoded dirent.
 110  *
 111  *          sizeof cookie (8 bytes) +
 112  *          sizeof name_len (4 bytes) +
 113  *          sizeof smallest (padded) name (4 bytes) +
 114  *          sizeof bitmap4_len (12 bytes) +   NOTE: we always encode len=2 bm4
 115  *          sizeof attrlist4_len (4 bytes) +
 116  *          sizeof next boolean (4 bytes)
 117  *
 118  * RFS4_MINLEN_RDDIR4: XDR-encoded size of READDIR op reply containing
 119  * the smallest possible entry4 (assumes no attrs requested).
 120  *      sizeof nfsstat4 (4 bytes) +
 121  *      sizeof verifier4 (8 bytes) +
 122  *      sizeof entry4list bool (4 bytes) +
 123  *      sizeof entry4   (36 bytes) +
 124  *      sizeof eof bool  (4 bytes)
 125  *
 126  * RFS4_MINLEN_RDDIR_BUF: minimum length of buffer server will provide to
 127  *      VOP_READDIR.  Its value is the size of the maximum possible dirent
 128  *      for solaris.  The DIRENT64_RECLEN macro returns the size of dirent
 129  *      required for a given name length.  MAXNAMELEN is the maximum
 130  *      filename length allowed in Solaris.  The first two DIRENT64_RECLEN()
 131  *      macros are to allow for . and .. entries -- just a minor tweak to try
 132  *      and guarantee that buffer we give to VOP_READDIR will be large enough
 133  *      to hold ., .., and the largest possible solaris dirent64.
 134  */
 135 #define RFS4_MINLEN_ENTRY4 36
 136 #define RFS4_MINLEN_RDDIR4 (4 + NFS4_VERIFIER_SIZE + 4 + RFS4_MINLEN_ENTRY4 + 4)
 137 #define RFS4_MINLEN_RDDIR_BUF \
 138         (DIRENT64_RECLEN(1) + DIRENT64_RECLEN(2) + DIRENT64_RECLEN(MAXNAMELEN))
 139 
 140 /*
 141  * It would be better to pad to 4 bytes since that's what XDR would do,
 142  * but the dirents UFS gives us are already padded to 8, so just take
 143  * what we're given.  Dircount is only a hint anyway.  Currently the
 144  * solaris kernel is ASCII only, so there's no point in calling the
 145  * UTF8 functions.
 146  *
 147  * dirent64: named padded to provide 8 byte struct alignment
 148  *      d_ino(8) + d_off(8) + d_reclen(2) + d_name(namelen + null(1) + pad)
 149  *
 150  * cookie: uint64_t   +  utf8namelen: uint_t  +   utf8name padded to 8 bytes
 151  *
 152  */
 153 #define DIRENT64_TO_DIRCOUNT(dp) \
 154         (3 * BYTES_PER_XDR_UNIT + DIRENT64_NAMELEN((dp)->d_reclen))
 155 
 156 
 157 static sysid_t          lockt_sysid;    /* dummy sysid for all LOCKT calls */
 158 
 159 u_longlong_t    nfs4_srv_caller_id;
 160 uint_t          nfs4_srv_vkey = 0;
 161 
 162 void    rfs4_init_compound_state(struct compound_state *);
 163 
 164 static void     nullfree(caddr_t);
 165 static void     rfs4_op_inval(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 166                     struct compound_state *);
 167 static void     rfs4_op_access(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 168                     struct compound_state *);
 169 static void     rfs4_op_close(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 170                     struct compound_state *);
 171 static void     rfs4_op_commit(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 172                     struct compound_state *);
 173 static void     rfs4_op_create(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 174                     struct compound_state *);
 175 static void     rfs4_op_create_free(nfs_resop4 *resop);
 176 static void     rfs4_op_delegreturn(nfs_argop4 *, nfs_resop4 *,
 177                     struct svc_req *, struct compound_state *);
 178 static void     rfs4_op_delegpurge(nfs_argop4 *, nfs_resop4 *,
 179                     struct svc_req *, struct compound_state *);
 180 static void     rfs4_op_getattr(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 181                     struct compound_state *);
 182 static void     rfs4_op_getattr_free(nfs_resop4 *);
 183 static void     rfs4_op_getfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 184                     struct compound_state *);
 185 static void     rfs4_op_getfh_free(nfs_resop4 *);
 186 static void     rfs4_op_illegal(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 187                     struct compound_state *);
 188 static void     rfs4_op_link(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 189                     struct compound_state *);
 190 static void     rfs4_op_lock(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 191                     struct compound_state *);
 192 static void     lock_denied_free(nfs_resop4 *);
 193 static void     rfs4_op_locku(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 194                     struct compound_state *);
 195 static void     rfs4_op_lockt(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 196                     struct compound_state *);
 197 static void     rfs4_op_lookup(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 198                     struct compound_state *);
 199 static void     rfs4_op_lookupp(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 200                     struct compound_state *);
 201 static void     rfs4_op_openattr(nfs_argop4 *argop, nfs_resop4 *resop,
 202                     struct svc_req *req, struct compound_state *cs);
 203 static void     rfs4_op_nverify(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 204                     struct compound_state *);
 205 static void     rfs4_op_open(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 206                     struct compound_state *);
 207 static void     rfs4_op_open_confirm(nfs_argop4 *, nfs_resop4 *,
 208                     struct svc_req *, struct compound_state *);
 209 static void     rfs4_op_open_downgrade(nfs_argop4 *, nfs_resop4 *,
 210                     struct svc_req *, struct compound_state *);
 211 static void     rfs4_op_putfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 212                     struct compound_state *);
 213 static void     rfs4_op_putpubfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 214                     struct compound_state *);
 215 static void     rfs4_op_putrootfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 216                     struct compound_state *);
 217 static void     rfs4_op_read(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 218                     struct compound_state *);
 219 static void     rfs4_op_read_free(nfs_resop4 *);
 220 static void     rfs4_op_readdir_free(nfs_resop4 *resop);
 221 static void     rfs4_op_readlink(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 222                     struct compound_state *);
 223 static void     rfs4_op_readlink_free(nfs_resop4 *);
 224 static void     rfs4_op_release_lockowner(nfs_argop4 *, nfs_resop4 *,
 225                     struct svc_req *, struct compound_state *);
 226 static void     rfs4_op_remove(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 227                     struct compound_state *);
 228 static void     rfs4_op_rename(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 229                     struct compound_state *);
 230 static void     rfs4_op_renew(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 231                     struct compound_state *);
 232 static void     rfs4_op_restorefh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 233                     struct compound_state *);
 234 static void     rfs4_op_savefh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 235                     struct compound_state *);
 236 static void     rfs4_op_setattr(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 237                     struct compound_state *);
 238 static void     rfs4_op_verify(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 239                     struct compound_state *);
 240 static void     rfs4_op_write(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 241                     struct compound_state *);
 242 static void     rfs4_op_setclientid(nfs_argop4 *, nfs_resop4 *,
 243                     struct svc_req *, struct compound_state *);
 244 static void     rfs4_op_setclientid_confirm(nfs_argop4 *, nfs_resop4 *,
 245                     struct svc_req *req, struct compound_state *);
 246 static void     rfs4_op_secinfo(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 247                     struct compound_state *);
 248 static void     rfs4_op_secinfo_free(nfs_resop4 *);
 249 
 250 static nfsstat4 check_open_access(uint32_t, struct compound_state *,
 251                     struct svc_req *);
 252 nfsstat4        rfs4_client_sysid(rfs4_client_t *, sysid_t *);
 253 void            rfs4_ss_clid(nfs4_srv_t *, rfs4_client_t *);
 254 
 255 
 256 /*
 257  * translation table for attrs
 258  */
 259 struct nfs4_ntov_table {
 260         union nfs4_attr_u *na;
 261         uint8_t amap[NFS4_MAXNUM_ATTRS];
 262         int attrcnt;
 263         bool_t vfsstat;
 264 };
 265 
 266 static void     nfs4_ntov_table_init(struct nfs4_ntov_table *ntovp);
 267 static void     nfs4_ntov_table_free(struct nfs4_ntov_table *ntovp,
 268                     struct nfs4_svgetit_arg *sargp);
 269 
 270 static nfsstat4 do_rfs4_set_attrs(bitmap4 *resp, fattr4 *fattrp,
 271                     struct compound_state *cs, struct nfs4_svgetit_arg *sargp,
 272                     struct nfs4_ntov_table *ntovp, nfs4_attr_cmd_t cmd);
 273 
 274 static void     hanfsv4_failover(nfs4_srv_t *);
 275 
 276 fem_t           *deleg_rdops;
 277 fem_t           *deleg_wrops;
 278 
 279 /*
 280  * NFS4 op dispatch table
 281  */
 282 
 283 struct rfsv4disp {
 284         void    (*dis_proc)();          /* proc to call */
 285         void    (*dis_resfree)();       /* frees space allocated by proc */
 286         int     dis_flags;              /* RPC_IDEMPOTENT, etc... */
 287 };
 288 
 289 static struct rfsv4disp rfsv4disptab[] = {
 290         /*
 291          * NFS VERSION 4
 292          */
 293 
 294         /* RFS_NULL = 0 */
 295         {rfs4_op_illegal, nullfree, 0},
 296 
 297         /* UNUSED = 1 */
 298         {rfs4_op_illegal, nullfree, 0},
 299 
 300         /* UNUSED = 2 */
 301         {rfs4_op_illegal, nullfree, 0},
 302 
 303         /* OP_ACCESS = 3 */
 304         {rfs4_op_access, nullfree, RPC_IDEMPOTENT},
 305 
 306         /* OP_CLOSE = 4 */
 307         {rfs4_op_close, nullfree, 0},
 308 
 309         /* OP_COMMIT = 5 */
 310         {rfs4_op_commit, nullfree, RPC_IDEMPOTENT},
 311 
 312         /* OP_CREATE = 6 */
 313         {rfs4_op_create, nullfree, 0},
 314 
 315         /* OP_DELEGPURGE = 7 */
 316         {rfs4_op_delegpurge, nullfree, 0},
 317 
 318         /* OP_DELEGRETURN = 8 */
 319         {rfs4_op_delegreturn, nullfree, 0},
 320 
 321         /* OP_GETATTR = 9 */
 322         {rfs4_op_getattr, rfs4_op_getattr_free, RPC_IDEMPOTENT},
 323 
 324         /* OP_GETFH = 10 */
 325         {rfs4_op_getfh, rfs4_op_getfh_free, RPC_ALL},
 326 
 327         /* OP_LINK = 11 */
 328         {rfs4_op_link, nullfree, 0},
 329 
 330         /* OP_LOCK = 12 */
 331         {rfs4_op_lock, lock_denied_free, 0},
 332 
 333         /* OP_LOCKT = 13 */
 334         {rfs4_op_lockt, lock_denied_free, 0},
 335 
 336         /* OP_LOCKU = 14 */
 337         {rfs4_op_locku, nullfree, 0},
 338 
 339         /* OP_LOOKUP = 15 */
 340         {rfs4_op_lookup, nullfree, (RPC_IDEMPOTENT | RPC_PUBLICFH_OK)},
 341 
 342         /* OP_LOOKUPP = 16 */
 343         {rfs4_op_lookupp, nullfree, (RPC_IDEMPOTENT | RPC_PUBLICFH_OK)},
 344 
 345         /* OP_NVERIFY = 17 */
 346         {rfs4_op_nverify, nullfree, RPC_IDEMPOTENT},
 347 
 348         /* OP_OPEN = 18 */
 349         {rfs4_op_open, rfs4_free_reply, 0},
 350 
 351         /* OP_OPENATTR = 19 */
 352         {rfs4_op_openattr, nullfree, 0},
 353 
 354         /* OP_OPEN_CONFIRM = 20 */
 355         {rfs4_op_open_confirm, nullfree, 0},
 356 
 357         /* OP_OPEN_DOWNGRADE = 21 */
 358         {rfs4_op_open_downgrade, nullfree, 0},
 359 
 360         /* OP_OPEN_PUTFH = 22 */
 361         {rfs4_op_putfh, nullfree, RPC_ALL},
 362 
 363         /* OP_PUTPUBFH = 23 */
 364         {rfs4_op_putpubfh, nullfree, RPC_ALL},
 365 
 366         /* OP_PUTROOTFH = 24 */
 367         {rfs4_op_putrootfh, nullfree, RPC_ALL},
 368 
 369         /* OP_READ = 25 */
 370         {rfs4_op_read, rfs4_op_read_free, RPC_IDEMPOTENT},
 371 
 372         /* OP_READDIR = 26 */
 373         {rfs4_op_readdir, rfs4_op_readdir_free, RPC_IDEMPOTENT},
 374 
 375         /* OP_READLINK = 27 */
 376         {rfs4_op_readlink, rfs4_op_readlink_free, RPC_IDEMPOTENT},
 377 
 378         /* OP_REMOVE = 28 */
 379         {rfs4_op_remove, nullfree, 0},
 380 
 381         /* OP_RENAME = 29 */
 382         {rfs4_op_rename, nullfree, 0},
 383 
 384         /* OP_RENEW = 30 */
 385         {rfs4_op_renew, nullfree, 0},
 386 
 387         /* OP_RESTOREFH = 31 */
 388         {rfs4_op_restorefh, nullfree, RPC_ALL},
 389 
 390         /* OP_SAVEFH = 32 */
 391         {rfs4_op_savefh, nullfree, RPC_ALL},
 392 
 393         /* OP_SECINFO = 33 */
 394         {rfs4_op_secinfo, rfs4_op_secinfo_free, 0},
 395 
 396         /* OP_SETATTR = 34 */
 397         {rfs4_op_setattr, nullfree, 0},
 398 
 399         /* OP_SETCLIENTID = 35 */
 400         {rfs4_op_setclientid, nullfree, 0},
 401 
 402         /* OP_SETCLIENTID_CONFIRM = 36 */
 403         {rfs4_op_setclientid_confirm, nullfree, 0},
 404 
 405         /* OP_VERIFY = 37 */
 406         {rfs4_op_verify, nullfree, RPC_IDEMPOTENT},
 407 
 408         /* OP_WRITE = 38 */
 409         {rfs4_op_write, nullfree, 0},
 410 
 411         /* OP_RELEASE_LOCKOWNER = 39 */
 412         {rfs4_op_release_lockowner, nullfree, 0},
 413 };
 414 
 415 static uint_t rfsv4disp_cnt = sizeof (rfsv4disptab) / sizeof (rfsv4disptab[0]);
 416 
 417 #define OP_ILLEGAL_IDX (rfsv4disp_cnt)
 418 
 419 #ifdef DEBUG
 420 
 421 int             rfs4_fillone_debug = 0;
 422 int             rfs4_no_stub_access = 1;
 423 int             rfs4_rddir_debug = 0;
 424 
 425 static char    *rfs4_op_string[] = {
 426         "rfs4_op_null",
 427         "rfs4_op_1 unused",
 428         "rfs4_op_2 unused",
 429         "rfs4_op_access",
 430         "rfs4_op_close",
 431         "rfs4_op_commit",
 432         "rfs4_op_create",
 433         "rfs4_op_delegpurge",
 434         "rfs4_op_delegreturn",
 435         "rfs4_op_getattr",
 436         "rfs4_op_getfh",
 437         "rfs4_op_link",
 438         "rfs4_op_lock",
 439         "rfs4_op_lockt",
 440         "rfs4_op_locku",
 441         "rfs4_op_lookup",
 442         "rfs4_op_lookupp",
 443         "rfs4_op_nverify",
 444         "rfs4_op_open",
 445         "rfs4_op_openattr",
 446         "rfs4_op_open_confirm",
 447         "rfs4_op_open_downgrade",
 448         "rfs4_op_putfh",
 449         "rfs4_op_putpubfh",
 450         "rfs4_op_putrootfh",
 451         "rfs4_op_read",
 452         "rfs4_op_readdir",
 453         "rfs4_op_readlink",
 454         "rfs4_op_remove",
 455         "rfs4_op_rename",
 456         "rfs4_op_renew",
 457         "rfs4_op_restorefh",
 458         "rfs4_op_savefh",
 459         "rfs4_op_secinfo",
 460         "rfs4_op_setattr",
 461         "rfs4_op_setclientid",
 462         "rfs4_op_setclient_confirm",
 463         "rfs4_op_verify",
 464         "rfs4_op_write",
 465         "rfs4_op_release_lockowner",
 466         "rfs4_op_illegal"
 467 };
 468 #endif
 469 
 470 void    rfs4_ss_chkclid(nfs4_srv_t *, rfs4_client_t *);
 471 
 472 extern size_t   strlcpy(char *dst, const char *src, size_t dstsize);
 473 
 474 extern void     rfs4_free_fs_locations4(fs_locations4 *);
 475 
 476 #ifdef  nextdp
 477 #undef nextdp
 478 #endif
 479 #define nextdp(dp)      ((struct dirent64 *)((char *)(dp) + (dp)->d_reclen))
 480 
 481 static const fs_operation_def_t nfs4_rd_deleg_tmpl[] = {
 482         VOPNAME_OPEN,           { .femop_open = deleg_rd_open },
 483         VOPNAME_WRITE,          { .femop_write = deleg_rd_write },
 484         VOPNAME_SETATTR,        { .femop_setattr = deleg_rd_setattr },
 485         VOPNAME_RWLOCK,         { .femop_rwlock = deleg_rd_rwlock },
 486         VOPNAME_SPACE,          { .femop_space = deleg_rd_space },
 487         VOPNAME_SETSECATTR,     { .femop_setsecattr = deleg_rd_setsecattr },
 488         VOPNAME_VNEVENT,        { .femop_vnevent = deleg_rd_vnevent },
 489         NULL,                   NULL
 490 };
 491 static const fs_operation_def_t nfs4_wr_deleg_tmpl[] = {
 492         VOPNAME_OPEN,           { .femop_open = deleg_wr_open },
 493         VOPNAME_READ,           { .femop_read = deleg_wr_read },
 494         VOPNAME_WRITE,          { .femop_write = deleg_wr_write },
 495         VOPNAME_SETATTR,        { .femop_setattr = deleg_wr_setattr },
 496         VOPNAME_RWLOCK,         { .femop_rwlock = deleg_wr_rwlock },
 497         VOPNAME_SPACE,          { .femop_space = deleg_wr_space },
 498         VOPNAME_SETSECATTR,     { .femop_setsecattr = deleg_wr_setsecattr },
 499         VOPNAME_VNEVENT,        { .femop_vnevent = deleg_wr_vnevent },
 500         NULL,                   NULL
 501 };
 502 
 503 nfs4_srv_t *
 504 nfs4_get_srv(void)
 505 {
 506         nfs_globals_t *ng = zone_getspecific(nfssrv_zone_key, curzone);
 507         nfs4_srv_t *srv = ng->nfs4_srv;
 508         ASSERT(srv != NULL);
 509         return (srv);
 510 }
 511 
 512 void
 513 rfs4_srv_zone_init(nfs_globals_t *ng)
 514 {
 515         nfs4_srv_t *nsrv4;
 516         timespec32_t verf;
 517 
 518         nsrv4 = kmem_zalloc(sizeof (*nsrv4), KM_SLEEP);
 519 
 520         /*
 521          * The following algorithm attempts to find a unique verifier
 522          * to be used as the write verifier returned from the server
 523          * to the client.  It is important that this verifier change
 524          * whenever the server reboots.  Of secondary importance, it
 525          * is important for the verifier to be unique between two
 526          * different servers.
 527          *
 528          * Thus, an attempt is made to use the system hostid and the
 529          * current time in seconds when the nfssrv kernel module is
 530          * loaded.  It is assumed that an NFS server will not be able
 531          * to boot and then to reboot in less than a second.  If the
 532          * hostid has not been set, then the current high resolution
 533          * time is used.  This will ensure different verifiers each
 534          * time the server reboots and minimize the chances that two
 535          * different servers will have the same verifier.
 536          * XXX - this is broken on LP64 kernels.
 537          */
 538         verf.tv_sec = (time_t)zone_get_hostid(NULL);
 539         if (verf.tv_sec != 0) {
 540                 verf.tv_nsec = gethrestime_sec();
 541         } else {
 542                 timespec_t tverf;
 543 
 544                 gethrestime(&tverf);
 545                 verf.tv_sec = (time_t)tverf.tv_sec;
 546                 verf.tv_nsec = tverf.tv_nsec;
 547         }
 548         nsrv4->write4verf = *(uint64_t *)&verf;
 549 
 550         /* Used to manage create/destroy of server state */
 551         nsrv4->nfs4_server_state = NULL;
 552         nsrv4->nfs4_cur_servinst = NULL;
 553         nsrv4->nfs4_deleg_policy = SRV_NEVER_DELEGATE;
 554         mutex_init(&nsrv4->deleg_lock, NULL, MUTEX_DEFAULT, NULL);
 555         mutex_init(&nsrv4->state_lock, NULL, MUTEX_DEFAULT, NULL);
 556         mutex_init(&nsrv4->servinst_lock, NULL, MUTEX_DEFAULT, NULL);
 557         rw_init(&nsrv4->deleg_policy_lock, NULL, RW_DEFAULT, NULL);
 558 
 559         ng->nfs4_srv = nsrv4;
 560 }
 561 
 562 void
 563 rfs4_srv_zone_fini(nfs_globals_t *ng)
 564 {
 565         nfs4_srv_t *nsrv4 = ng->nfs4_srv;
 566 
 567         ng->nfs4_srv = NULL;
 568 
 569         mutex_destroy(&nsrv4->deleg_lock);
 570         mutex_destroy(&nsrv4->state_lock);
 571         mutex_destroy(&nsrv4->servinst_lock);
 572         rw_destroy(&nsrv4->deleg_policy_lock);
 573 
 574         kmem_free(nsrv4, sizeof (*nsrv4));
 575 }
 576 
 577 void
 578 rfs4_srvrinit(void)
 579 {
 580         extern void rfs4_attr_init();
 581 
 582         rfs4_attr_init();
 583 
 584         if (fem_create("deleg_rdops", nfs4_rd_deleg_tmpl, &deleg_rdops) != 0) {
 585                 rfs4_disable_delegation();
 586         } else if (fem_create("deleg_wrops", nfs4_wr_deleg_tmpl,
 587             &deleg_wrops) != 0) {
 588                 rfs4_disable_delegation();
 589                 fem_free(deleg_rdops);
 590         }
 591 
 592         nfs4_srv_caller_id = fs_new_caller_id();
 593         lockt_sysid = lm_alloc_sysidt();
 594         vsd_create(&nfs4_srv_vkey, NULL);
 595         rfs4_state_g_init();
 596 }
 597 
 598 void
 599 rfs4_srvrfini(void)
 600 {
 601         if (lockt_sysid != LM_NOSYSID) {
 602                 lm_free_sysidt(lockt_sysid);
 603                 lockt_sysid = LM_NOSYSID;
 604         }
 605 
 606         rfs4_state_g_fini();
 607 
 608         fem_free(deleg_rdops);
 609         fem_free(deleg_wrops);
 610 }
 611 
 612 void
 613 rfs4_do_server_start(int server_upordown,
 614     int srv_delegation, int cluster_booted)
 615 {
 616         nfs4_srv_t *nsrv4 = nfs4_get_srv();
 617 
 618         /* Is this a warm start? */
 619         if (server_upordown == NFS_SERVER_QUIESCED) {
 620                 cmn_err(CE_NOTE, "nfs4_srv: "
 621                     "server was previously quiesced; "
 622                     "existing NFSv4 state will be re-used");
 623 
 624                 /*
 625                  * HA-NFSv4: this is also the signal
 626                  * that a Resource Group failover has
 627                  * occurred.
 628                  */
 629                 if (cluster_booted)
 630                         hanfsv4_failover(nsrv4);
 631         } else {
 632                 /* Cold start */
 633                 nsrv4->rfs4_start_time = 0;
 634                 rfs4_state_zone_init(nsrv4);
 635                 nsrv4->nfs4_drc = rfs4_init_drc(nfs4_drc_max,
 636                     nfs4_drc_hash);
 637 
 638                 /*
 639                  * The nfsd service was started with the -s option
 640                  * we need to pull in any state from the paths indicated.
 641                  */
 642                 if (curzone == global_zone && rfs4_dss_numnewpaths > 0) {
 643                         /* read in the stable storage state from these paths */
 644                         rfs4_dss_readstate(nsrv4, rfs4_dss_numnewpaths,
 645                             rfs4_dss_newpaths);
 646                 }
 647         }
 648 
 649         /* Check if delegation is to be enabled */
 650         if (srv_delegation != FALSE)
 651                 rfs4_set_deleg_policy(nsrv4, SRV_NORMAL_DELEGATE);
 652 }
 653 
 654 void
 655 rfs4_init_compound_state(struct compound_state *cs)
 656 {
 657         bzero(cs, sizeof (*cs));
 658         cs->cont = TRUE;
 659         cs->access = CS_ACCESS_DENIED;
 660         cs->deleg = FALSE;
 661         cs->mandlock = FALSE;
 662         cs->fh.nfs_fh4_val = cs->fhbuf;
 663 }
 664 
 665 void
 666 rfs4_grace_start(rfs4_servinst_t *sip)
 667 {
 668         rw_enter(&sip->rwlock, RW_WRITER);
 669         sip->start_time = (time_t)TICK_TO_SEC(ddi_get_lbolt());
 670         sip->grace_period = rfs4_grace_period;
 671         rw_exit(&sip->rwlock);
 672 }
 673 
 674 /*
 675  * returns true if the instance's grace period has never been started
 676  */
 677 int
 678 rfs4_servinst_grace_new(rfs4_servinst_t *sip)
 679 {
 680         time_t start_time;
 681 
 682         rw_enter(&sip->rwlock, RW_READER);
 683         start_time = sip->start_time;
 684         rw_exit(&sip->rwlock);
 685 
 686         return (start_time == 0);
 687 }
 688 
 689 /*
 690  * Indicates if server instance is within the
 691  * grace period.
 692  */
 693 int
 694 rfs4_servinst_in_grace(rfs4_servinst_t *sip)
 695 {
 696         time_t grace_expiry;
 697 
 698         rw_enter(&sip->rwlock, RW_READER);
 699         grace_expiry = sip->start_time + sip->grace_period;
 700         rw_exit(&sip->rwlock);
 701 
 702         return (((time_t)TICK_TO_SEC(ddi_get_lbolt())) < grace_expiry);
 703 }
 704 
 705 int
 706 rfs4_clnt_in_grace(rfs4_client_t *cp)
 707 {
 708         ASSERT(rfs4_dbe_refcnt(cp->rc_dbe) > 0);
 709 
 710         return (rfs4_servinst_in_grace(cp->rc_server_instance));
 711 }
 712 
 713 /*
 714  * reset all currently active grace periods
 715  */
 716 void
 717 rfs4_grace_reset_all(nfs4_srv_t *nsrv4)
 718 {
 719         rfs4_servinst_t *sip;
 720 
 721         mutex_enter(&nsrv4->servinst_lock);
 722         for (sip = nsrv4->nfs4_cur_servinst; sip != NULL; sip = sip->prev)
 723                 if (rfs4_servinst_in_grace(sip))
 724                         rfs4_grace_start(sip);
 725         mutex_exit(&nsrv4->servinst_lock);
 726 }
 727 
 728 /*
 729  * start any new instances' grace periods
 730  */
 731 void
 732 rfs4_grace_start_new(nfs4_srv_t *nsrv4)
 733 {
 734         rfs4_servinst_t *sip;
 735 
 736         mutex_enter(&nsrv4->servinst_lock);
 737         for (sip = nsrv4->nfs4_cur_servinst; sip != NULL; sip = sip->prev)
 738                 if (rfs4_servinst_grace_new(sip))
 739                         rfs4_grace_start(sip);
 740         mutex_exit(&nsrv4->servinst_lock);
 741 }
 742 
 743 static rfs4_dss_path_t *
 744 rfs4_dss_newpath(nfs4_srv_t *nsrv4, rfs4_servinst_t *sip,
 745     char *path, unsigned index)
 746 {
 747         size_t len;
 748         rfs4_dss_path_t *dss_path;
 749 
 750         dss_path = kmem_alloc(sizeof (rfs4_dss_path_t), KM_SLEEP);
 751 
 752         /*
 753          * Take a copy of the string, since the original may be overwritten.
 754          * Sadly, no strdup() in the kernel.
 755          */
 756         /* allow for NUL */
 757         len = strlen(path) + 1;
 758         dss_path->path = kmem_alloc(len, KM_SLEEP);
 759         (void) strlcpy(dss_path->path, path, len);
 760 
 761         /* associate with servinst */
 762         dss_path->sip = sip;
 763         dss_path->index = index;
 764 
 765         /*
 766          * Add to list of served paths.
 767          * No locking required, as we're only ever called at startup.
 768          */
 769         if (nsrv4->dss_pathlist == NULL) {
 770                 /* this is the first dss_path_t */
 771 
 772                 /* needed for insque/remque */
 773                 dss_path->next = dss_path->prev = dss_path;
 774 
 775                 nsrv4->dss_pathlist = dss_path;
 776         } else {
 777                 insque(dss_path, nsrv4->dss_pathlist);
 778         }
 779 
 780         return (dss_path);
 781 }
 782 
 783 /*
 784  * Create a new server instance, and make it the currently active instance.
 785  * Note that starting the grace period too early will reduce the clients'
 786  * recovery window.
 787  */
 788 void
 789 rfs4_servinst_create(nfs4_srv_t *nsrv4, int start_grace,
 790     int dss_npaths, char **dss_paths)
 791 {
 792         unsigned i;
 793         rfs4_servinst_t *sip;
 794         rfs4_oldstate_t *oldstate;
 795 
 796         sip = kmem_alloc(sizeof (rfs4_servinst_t), KM_SLEEP);
 797         rw_init(&sip->rwlock, NULL, RW_DEFAULT, NULL);
 798 
 799         sip->start_time = (time_t)0;
 800         sip->grace_period = (time_t)0;
 801         sip->next = NULL;
 802         sip->prev = NULL;
 803 
 804         rw_init(&sip->oldstate_lock, NULL, RW_DEFAULT, NULL);
 805         /*
 806          * This initial dummy entry is required to setup for insque/remque.
 807          * It must be skipped over whenever the list is traversed.
 808          */
 809         oldstate = kmem_alloc(sizeof (rfs4_oldstate_t), KM_SLEEP);
 810         /* insque/remque require initial list entry to be self-terminated */
 811         oldstate->next = oldstate;
 812         oldstate->prev = oldstate;
 813         sip->oldstate = oldstate;
 814 
 815 
 816         sip->dss_npaths = dss_npaths;
 817         sip->dss_paths = kmem_alloc(dss_npaths *
 818             sizeof (rfs4_dss_path_t *), KM_SLEEP);
 819 
 820         for (i = 0; i < dss_npaths; i++) {
 821                 sip->dss_paths[i] =
 822                     rfs4_dss_newpath(nsrv4, sip, dss_paths[i], i);
 823         }
 824 
 825         mutex_enter(&nsrv4->servinst_lock);
 826         if (nsrv4->nfs4_cur_servinst != NULL) {
 827                 /* add to linked list */
 828                 sip->prev = nsrv4->nfs4_cur_servinst;
 829                 nsrv4->nfs4_cur_servinst->next = sip;
 830         }
 831         if (start_grace)
 832                 rfs4_grace_start(sip);
 833         /* make the new instance "current" */
 834         nsrv4->nfs4_cur_servinst = sip;
 835 
 836         mutex_exit(&nsrv4->servinst_lock);
 837 }
 838 
 839 /*
 840  * In future, we might add a rfs4_servinst_destroy(sip) but, for now, destroy
 841  * all instances directly.
 842  */
 843 void
 844 rfs4_servinst_destroy_all(nfs4_srv_t *nsrv4)
 845 {
 846         rfs4_servinst_t *sip, *prev, *current;
 847 #ifdef DEBUG
 848         int n = 0;
 849 #endif
 850 
 851         mutex_enter(&nsrv4->servinst_lock);
 852         ASSERT(nsrv4->nfs4_cur_servinst != NULL);
 853         current = nsrv4->nfs4_cur_servinst;
 854         nsrv4->nfs4_cur_servinst = NULL;
 855         for (sip = current; sip != NULL; sip = prev) {
 856                 prev = sip->prev;
 857                 rw_destroy(&sip->rwlock);
 858                 if (sip->oldstate)
 859                         kmem_free(sip->oldstate, sizeof (rfs4_oldstate_t));
 860                 if (sip->dss_paths) {
 861                         int i = sip->dss_npaths;
 862 
 863                         while (i > 0) {
 864                                 i--;
 865                                 if (sip->dss_paths[i] != NULL) {
 866                                         char *path = sip->dss_paths[i]->path;
 867 
 868                                         if (path != NULL) {
 869                                                 kmem_free(path,
 870                                                     strlen(path) + 1);
 871                                         }
 872                                         kmem_free(sip->dss_paths[i],
 873                                             sizeof (rfs4_dss_path_t));
 874                                 }
 875                         }
 876                         kmem_free(sip->dss_paths,
 877                             sip->dss_npaths * sizeof (rfs4_dss_path_t *));
 878                 }
 879                 kmem_free(sip, sizeof (rfs4_servinst_t));
 880 #ifdef DEBUG
 881                 n++;
 882 #endif
 883         }
 884         mutex_exit(&nsrv4->servinst_lock);
 885 }
 886 
 887 /*
 888  * Assign the current server instance to a client_t.
 889  * Should be called with cp->rc_dbe held.
 890  */
 891 void
 892 rfs4_servinst_assign(nfs4_srv_t *nsrv4, rfs4_client_t *cp,
 893     rfs4_servinst_t *sip)
 894 {
 895         ASSERT(rfs4_dbe_refcnt(cp->rc_dbe) > 0);
 896 
 897         /*
 898          * The lock ensures that if the current instance is in the process
 899          * of changing, we will see the new one.
 900          */
 901         mutex_enter(&nsrv4->servinst_lock);
 902         cp->rc_server_instance = sip;
 903         mutex_exit(&nsrv4->servinst_lock);
 904 }
 905 
 906 rfs4_servinst_t *
 907 rfs4_servinst(rfs4_client_t *cp)
 908 {
 909         ASSERT(rfs4_dbe_refcnt(cp->rc_dbe) > 0);
 910 
 911         return (cp->rc_server_instance);
 912 }
 913 
 914 /* ARGSUSED */
 915 static void
 916 nullfree(caddr_t resop)
 917 {
 918 }
 919 
 920 /*
 921  * This is a fall-through for invalid or not implemented (yet) ops
 922  */
 923 /* ARGSUSED */
 924 static void
 925 rfs4_op_inval(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
 926     struct compound_state *cs)
 927 {
 928         *cs->statusp = *((nfsstat4 *)&(resop)->nfs_resop4_u) = NFS4ERR_INVAL;
 929 }
 930 
 931 /*
 932  * Check if the security flavor, nfsnum, is in the flavor_list.
 933  */
 934 bool_t
 935 in_flavor_list(int nfsnum, int *flavor_list, int count)
 936 {
 937         int i;
 938 
 939         for (i = 0; i < count; i++) {
 940                 if (nfsnum == flavor_list[i])
 941                         return (TRUE);
 942         }
 943         return (FALSE);
 944 }
 945 
 946 /*
 947  * Used by rfs4_op_secinfo to get the security information from the
 948  * export structure associated with the component.
 949  */
 950 /* ARGSUSED */
 951 static nfsstat4
 952 do_rfs4_op_secinfo(struct compound_state *cs, char *nm, SECINFO4res *resp)
 953 {
 954         int error, different_export = 0;
 955         vnode_t *dvp, *vp;
 956         struct exportinfo *exi;
 957         fid_t fid;
 958         uint_t count, i;
 959         secinfo4 *resok_val;
 960         struct secinfo *secp;
 961         seconfig_t *si;
 962         bool_t did_traverse = FALSE;
 963         int dotdot, walk;
 964         nfs_export_t *ne = nfs_get_export();
 965 
 966         dvp = cs->vp;
 967         exi = cs->exi;
 968         ASSERT(exi != NULL);
 969         dotdot = (nm[0] == '.' && nm[1] == '.' && nm[2] == '\0');
 970 
 971         /*
 972          * If dotdotting, then need to check whether it's above the
 973          * root of a filesystem, or above an export point.
 974          */
 975         if (dotdot) {
 976                 ASSERT3U(exi->exi_zoneid, ==, curzone->zone_id);
 977                 /*
 978                  * If dotdotting at the root of a filesystem, then
 979                  * need to traverse back to the mounted-on filesystem
 980                  * and do the dotdot lookup there.
 981                  */
 982                 if ((dvp->v_flag & VROOT) || VN_IS_CURZONEROOT(dvp)) {
 983 
 984                         /*
 985                          * If at the system root, then can
 986                          * go up no further.
 987                          */
 988                         if (VN_CMP(dvp, ZONE_ROOTVP()))
 989                                 return (puterrno4(ENOENT));
 990 
 991                         /*
 992                          * Traverse back to the mounted-on filesystem
 993                          */
 994                         dvp = untraverse(ne, dvp);
 995 
 996                         /*
 997                          * Set the different_export flag so we remember
 998                          * to pick up a new exportinfo entry for
 999                          * this new filesystem.
1000                          */
1001                         different_export = 1;
1002                 } else {
1003 
1004                         /*
1005                          * If dotdotting above an export point then set
1006                          * the different_export to get new export info.
1007                          */
1008                         different_export = nfs_exported(exi, dvp);
1009                 }
1010         }
1011 
1012         /*
1013          * Get the vnode for the component "nm".
1014          */
1015         error = VOP_LOOKUP(dvp, nm, &vp, NULL, 0, NULL, cs->cr,
1016             NULL, NULL, NULL);
1017         if (error)
1018                 return (puterrno4(error));
1019 
1020         /*
1021          * If the vnode is in a pseudo filesystem, or if the security flavor
1022          * used in the request is valid but not an explicitly shared flavor,
1023          * or the access bit indicates that this is a limited access,
1024          * check whether this vnode is visible.
1025          */
1026         if (!different_export &&
1027             (PSEUDO(exi) || !is_exported_sec(cs->nfsflavor, exi) ||
1028             cs->access & CS_ACCESS_LIMITED)) {
1029                 if (! nfs_visible(exi, vp, &different_export)) {
1030                         VN_RELE(vp);
1031                         return (puterrno4(ENOENT));
1032                 }
1033         }
1034 
1035         /*
1036          * If it's a mountpoint, then traverse it.
1037          */
1038         if (vn_ismntpt(vp)) {
1039                 if ((error = traverse(&vp)) != 0) {
1040                         VN_RELE(vp);
1041                         return (puterrno4(error));
1042                 }
1043                 /* remember that we had to traverse mountpoint */
1044                 did_traverse = TRUE;
1045                 different_export = 1;
1046         } else if (vp->v_vfsp != dvp->v_vfsp) {
1047                 /*
1048                  * If vp isn't a mountpoint and the vfs ptrs aren't the same,
1049                  * then vp is probably an LOFS object.  We don't need the
1050                  * realvp, we just need to know that we might have crossed
1051                  * a server fs boundary and need to call checkexport4.
1052                  * (LOFS lookup hides server fs mountpoints, and actually calls
1053                  * traverse)
1054                  */
1055                 different_export = 1;
1056         }
1057 
1058         /*
1059          * Get the export information for it.
1060          */
1061         if (different_export) {
1062 
1063                 bzero(&fid, sizeof (fid));
1064                 fid.fid_len = MAXFIDSZ;
1065                 error = vop_fid_pseudo(vp, &fid);
1066                 if (error) {
1067                         VN_RELE(vp);
1068                         return (puterrno4(error));
1069                 }
1070 
1071                 /* We'll need to reassign "exi". */
1072                 if (dotdot)
1073                         exi = nfs_vptoexi(NULL, vp, cs->cr, &walk, NULL, TRUE);
1074                 else
1075                         exi = checkexport4(&vp->v_vfsp->vfs_fsid, &fid, vp);
1076 
1077                 if (exi == NULL) {
1078                         if (did_traverse == TRUE) {
1079                                 /*
1080                                  * If this vnode is a mounted-on vnode,
1081                                  * but the mounted-on file system is not
1082                                  * exported, send back the secinfo for
1083                                  * the exported node that the mounted-on
1084                                  * vnode lives in.
1085                                  */
1086                                 exi = cs->exi;
1087                         } else {
1088                                 VN_RELE(vp);
1089                                 return (puterrno4(EACCES));
1090                         }
1091                 }
1092         }
1093         ASSERT(exi != NULL);
1094 
1095 
1096         /*
1097          * Create the secinfo result based on the security information
1098          * from the exportinfo structure (exi).
1099          *
1100          * Return all flavors for a pseudo node.
1101          * For a real export node, return the flavor that the client
1102          * has access with.
1103          */
1104         ASSERT(RW_LOCK_HELD(&ne->exported_lock));
1105         if (PSEUDO(exi)) {
1106                 count = exi->exi_export.ex_seccnt; /* total sec count */
1107                 resok_val = kmem_alloc(count * sizeof (secinfo4), KM_SLEEP);
1108                 secp = exi->exi_export.ex_secinfo;
1109 
1110                 for (i = 0; i < count; i++) {
1111                         si = &secp[i].s_secinfo;
1112                         resok_val[i].flavor = si->sc_rpcnum;
1113                         if (resok_val[i].flavor == RPCSEC_GSS) {
1114                                 rpcsec_gss_info *info;
1115 
1116                                 info = &resok_val[i].flavor_info;
1117                                 info->qop = si->sc_qop;
1118                                 info->service = (rpc_gss_svc_t)si->sc_service;
1119 
1120                                 /* get oid opaque data */
1121                                 info->oid.sec_oid4_len =
1122                                     si->sc_gss_mech_type->length;
1123                                 info->oid.sec_oid4_val = kmem_alloc(
1124                                     si->sc_gss_mech_type->length, KM_SLEEP);
1125                                 bcopy(
1126                                     si->sc_gss_mech_type->elements,
1127                                     info->oid.sec_oid4_val,
1128                                     info->oid.sec_oid4_len);
1129                         }
1130                 }
1131                 resp->SECINFO4resok_len = count;
1132                 resp->SECINFO4resok_val = resok_val;
1133         } else {
1134                 int ret_cnt = 0, k = 0;
1135                 int *flavor_list;
1136 
1137                 count = exi->exi_export.ex_seccnt; /* total sec count */
1138                 secp = exi->exi_export.ex_secinfo;
1139 
1140                 flavor_list = kmem_alloc(count * sizeof (int), KM_SLEEP);
1141                 /* find out which flavors to return */
1142                 for (i = 0; i < count; i ++) {
1143                         int access, flavor, perm;
1144 
1145                         flavor = secp[i].s_secinfo.sc_nfsnum;
1146                         perm = secp[i].s_flags;
1147 
1148                         access = nfsauth4_secinfo_access(exi, cs->req,
1149                             flavor, perm, cs->basecr);
1150 
1151                         if (! (access & NFSAUTH_DENIED) &&
1152                             ! (access & NFSAUTH_WRONGSEC)) {
1153                                 flavor_list[ret_cnt] = flavor;
1154                                 ret_cnt++;
1155                         }
1156                 }
1157 
1158                 /* Create the returning SECINFO value */
1159                 resok_val = kmem_alloc(ret_cnt * sizeof (secinfo4), KM_SLEEP);
1160 
1161                 for (i = 0; i < count; i++) {
1162                         /*
1163                          * If the flavor is in the flavor list,
1164                          * fill in resok_val.
1165                          */
1166                         si = &secp[i].s_secinfo;
1167                         if (in_flavor_list(si->sc_nfsnum,
1168                             flavor_list, ret_cnt)) {
1169                                 resok_val[k].flavor = si->sc_rpcnum;
1170                                 if (resok_val[k].flavor == RPCSEC_GSS) {
1171                                         rpcsec_gss_info *info;
1172 
1173                                         info = &resok_val[k].flavor_info;
1174                                         info->qop = si->sc_qop;
1175                                         info->service = (rpc_gss_svc_t)
1176                                             si->sc_service;
1177 
1178                                         /* get oid opaque data */
1179                                         info->oid.sec_oid4_len =
1180                                             si->sc_gss_mech_type->length;
1181                                         info->oid.sec_oid4_val = kmem_alloc(
1182                                             si->sc_gss_mech_type->length,
1183                                             KM_SLEEP);
1184                                         bcopy(si->sc_gss_mech_type->elements,
1185                                             info->oid.sec_oid4_val,
1186                                             info->oid.sec_oid4_len);
1187                                 }
1188                                 k++;
1189                         }
1190                         if (k >= ret_cnt)
1191                                 break;
1192                 }
1193                 resp->SECINFO4resok_len = ret_cnt;
1194                 resp->SECINFO4resok_val = resok_val;
1195                 kmem_free(flavor_list, count * sizeof (int));
1196         }
1197 
1198         VN_RELE(vp);
1199         return (NFS4_OK);
1200 }
1201 
1202 /*
1203  * SECINFO (Operation 33): Obtain required security information on
1204  * the component name in the format of (security-mechanism-oid, qop, service)
1205  * triplets.
1206  */
1207 /* ARGSUSED */
1208 static void
1209 rfs4_op_secinfo(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1210     struct compound_state *cs)
1211 {
1212         SECINFO4args *args = &argop->nfs_argop4_u.opsecinfo;
1213         SECINFO4res *resp = &resop->nfs_resop4_u.opsecinfo;
1214         utf8string *utfnm = &args->name;
1215         uint_t len;
1216         char *nm;
1217         struct sockaddr *ca;
1218         char *name = NULL;
1219         nfsstat4 status = NFS4_OK;
1220 
1221         DTRACE_NFSV4_2(op__secinfo__start, struct compound_state *, cs,
1222             SECINFO4args *, args);
1223 
1224         /*
1225          * Current file handle (cfh) should have been set before getting
1226          * into this function. If not, return error.
1227          */
1228         if (cs->vp == NULL) {
1229                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1230                 goto out;
1231         }
1232 
1233         if (cs->vp->v_type != VDIR) {
1234                 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
1235                 goto out;
1236         }
1237 
1238         /*
1239          * Verify the component name. If failed, error out, but
1240          * do not error out if the component name is a "..".
1241          * SECINFO will return its parents secinfo data for SECINFO "..".
1242          */
1243         status = utf8_dir_verify(utfnm);
1244         if (status != NFS4_OK) {
1245                 if (utfnm->utf8string_len != 2 ||
1246                     utfnm->utf8string_val[0] != '.' ||
1247                     utfnm->utf8string_val[1] != '.') {
1248                         *cs->statusp = resp->status = status;
1249                         goto out;
1250                 }
1251         }
1252 
1253         nm = utf8_to_str(utfnm, &len, NULL);
1254         if (nm == NULL) {
1255                 *cs->statusp = resp->status = NFS4ERR_INVAL;
1256                 goto out;
1257         }
1258 
1259         if (len > MAXNAMELEN) {
1260                 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
1261                 kmem_free(nm, len);
1262                 goto out;
1263         }
1264 
1265         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
1266         name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
1267             MAXPATHLEN  + 1);
1268 
1269         if (name == NULL) {
1270                 *cs->statusp = resp->status = NFS4ERR_INVAL;
1271                 kmem_free(nm, len);
1272                 goto out;
1273         }
1274 
1275 
1276         *cs->statusp = resp->status = do_rfs4_op_secinfo(cs, name, resp);
1277 
1278         if (name != nm)
1279                 kmem_free(name, MAXPATHLEN + 1);
1280         kmem_free(nm, len);
1281 
1282 out:
1283         DTRACE_NFSV4_2(op__secinfo__done, struct compound_state *, cs,
1284             SECINFO4res *, resp);
1285 }
1286 
1287 /*
1288  * Free SECINFO result.
1289  */
1290 /* ARGSUSED */
1291 static void
1292 rfs4_op_secinfo_free(nfs_resop4 *resop)
1293 {
1294         SECINFO4res *resp = &resop->nfs_resop4_u.opsecinfo;
1295         int count, i;
1296         secinfo4 *resok_val;
1297 
1298         /* If this is not an Ok result, nothing to free. */
1299         if (resp->status != NFS4_OK) {
1300                 return;
1301         }
1302 
1303         count = resp->SECINFO4resok_len;
1304         resok_val = resp->SECINFO4resok_val;
1305 
1306         for (i = 0; i < count; i++) {
1307                 if (resok_val[i].flavor == RPCSEC_GSS) {
1308                         rpcsec_gss_info *info;
1309 
1310                         info = &resok_val[i].flavor_info;
1311                         kmem_free(info->oid.sec_oid4_val,
1312                             info->oid.sec_oid4_len);
1313                 }
1314         }
1315         kmem_free(resok_val, count * sizeof (secinfo4));
1316         resp->SECINFO4resok_len = 0;
1317         resp->SECINFO4resok_val = NULL;
1318 }
1319 
1320 /* ARGSUSED */
1321 static void
1322 rfs4_op_access(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1323     struct compound_state *cs)
1324 {
1325         ACCESS4args *args = &argop->nfs_argop4_u.opaccess;
1326         ACCESS4res *resp = &resop->nfs_resop4_u.opaccess;
1327         int error;
1328         vnode_t *vp;
1329         struct vattr va;
1330         int checkwriteperm;
1331         cred_t *cr = cs->cr;
1332         bslabel_t *clabel, *slabel;
1333         ts_label_t *tslabel;
1334         boolean_t admin_low_client;
1335 
1336         DTRACE_NFSV4_2(op__access__start, struct compound_state *, cs,
1337             ACCESS4args *, args);
1338 
1339 #if 0   /* XXX allow access even if !cs->access. Eventually only pseudo fs */
1340         if (cs->access == CS_ACCESS_DENIED) {
1341                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
1342                 goto out;
1343         }
1344 #endif
1345         if (cs->vp == NULL) {
1346                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1347                 goto out;
1348         }
1349 
1350         ASSERT(cr != NULL);
1351 
1352         vp = cs->vp;
1353 
1354         /*
1355          * If the file system is exported read only, it is not appropriate
1356          * to check write permissions for regular files and directories.
1357          * Special files are interpreted by the client, so the underlying
1358          * permissions are sent back to the client for interpretation.
1359          */
1360         if (rdonly4(req, cs) &&
1361             (vp->v_type == VREG || vp->v_type == VDIR))
1362                 checkwriteperm = 0;
1363         else
1364                 checkwriteperm = 1;
1365 
1366         /*
1367          * XXX
1368          * We need the mode so that we can correctly determine access
1369          * permissions relative to a mandatory lock file.  Access to
1370          * mandatory lock files is denied on the server, so it might
1371          * as well be reflected to the server during the open.
1372          */
1373         va.va_mask = AT_MODE;
1374         error = VOP_GETATTR(vp, &va, 0, cr, NULL);
1375         if (error) {
1376                 *cs->statusp = resp->status = puterrno4(error);
1377                 goto out;
1378         }
1379         resp->access = 0;
1380         resp->supported = 0;
1381 
1382         if (is_system_labeled()) {
1383                 ASSERT(req->rq_label != NULL);
1384                 clabel = req->rq_label;
1385                 DTRACE_PROBE2(tx__rfs4__log__info__opaccess__clabel, char *,
1386                     "got client label from request(1)",
1387                     struct svc_req *, req);
1388                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
1389                         if ((tslabel = nfs_getflabel(vp, cs->exi)) == NULL) {
1390                                 *cs->statusp = resp->status = puterrno4(EACCES);
1391                                 goto out;
1392                         }
1393                         slabel = label2bslabel(tslabel);
1394                         DTRACE_PROBE3(tx__rfs4__log__info__opaccess__slabel,
1395                             char *, "got server label(1) for vp(2)",
1396                             bslabel_t *, slabel, vnode_t *, vp);
1397 
1398                         admin_low_client = B_FALSE;
1399                 } else
1400                         admin_low_client = B_TRUE;
1401         }
1402 
1403         if (args->access & ACCESS4_READ) {
1404                 error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
1405                 if (!error && !MANDLOCK(vp, va.va_mode) &&
1406                     (!is_system_labeled() || admin_low_client ||
1407                     bldominates(clabel, slabel)))
1408                         resp->access |= ACCESS4_READ;
1409                 resp->supported |= ACCESS4_READ;
1410         }
1411         if ((args->access & ACCESS4_LOOKUP) && vp->v_type == VDIR) {
1412                 error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL);
1413                 if (!error && (!is_system_labeled() || admin_low_client ||
1414                     bldominates(clabel, slabel)))
1415                         resp->access |= ACCESS4_LOOKUP;
1416                 resp->supported |= ACCESS4_LOOKUP;
1417         }
1418         if (checkwriteperm &&
1419             (args->access & (ACCESS4_MODIFY|ACCESS4_EXTEND))) {
1420                 error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
1421                 if (!error && !MANDLOCK(vp, va.va_mode) &&
1422                     (!is_system_labeled() || admin_low_client ||
1423                     blequal(clabel, slabel)))
1424                         resp->access |=
1425                             (args->access & (ACCESS4_MODIFY | ACCESS4_EXTEND));
1426                 resp->supported |=
1427                     resp->access & (ACCESS4_MODIFY | ACCESS4_EXTEND);
1428         }
1429 
1430         if (checkwriteperm &&
1431             (args->access & ACCESS4_DELETE) && vp->v_type == VDIR) {
1432                 error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
1433                 if (!error && (!is_system_labeled() || admin_low_client ||
1434                     blequal(clabel, slabel)))
1435                         resp->access |= ACCESS4_DELETE;
1436                 resp->supported |= ACCESS4_DELETE;
1437         }
1438         if (args->access & ACCESS4_EXECUTE && vp->v_type != VDIR) {
1439                 error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL);
1440                 if (!error && !MANDLOCK(vp, va.va_mode) &&
1441                     (!is_system_labeled() || admin_low_client ||
1442                     bldominates(clabel, slabel)))
1443                         resp->access |= ACCESS4_EXECUTE;
1444                 resp->supported |= ACCESS4_EXECUTE;
1445         }
1446 
1447         if (is_system_labeled() && !admin_low_client)
1448                 label_rele(tslabel);
1449 
1450         *cs->statusp = resp->status = NFS4_OK;
1451 out:
1452         DTRACE_NFSV4_2(op__access__done, struct compound_state *, cs,
1453             ACCESS4res *, resp);
1454 }
1455 
1456 /* ARGSUSED */
1457 static void
1458 rfs4_op_commit(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1459     struct compound_state *cs)
1460 {
1461         COMMIT4args *args = &argop->nfs_argop4_u.opcommit;
1462         COMMIT4res *resp = &resop->nfs_resop4_u.opcommit;
1463         int error;
1464         vnode_t *vp = cs->vp;
1465         cred_t *cr = cs->cr;
1466         vattr_t va;
1467         nfs4_srv_t *nsrv4;
1468 
1469         DTRACE_NFSV4_2(op__commit__start, struct compound_state *, cs,
1470             COMMIT4args *, args);
1471 
1472         if (vp == NULL) {
1473                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1474                 goto out;
1475         }
1476         if (cs->access == CS_ACCESS_DENIED) {
1477                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
1478                 goto out;
1479         }
1480 
1481         if (args->offset + args->count < args->offset) {
1482                 *cs->statusp = resp->status = NFS4ERR_INVAL;
1483                 goto out;
1484         }
1485 
1486         va.va_mask = AT_UID;
1487         error = VOP_GETATTR(vp, &va, 0, cr, NULL);
1488 
1489         /*
1490          * If we can't get the attributes, then we can't do the
1491          * right access checking.  So, we'll fail the request.
1492          */
1493         if (error) {
1494                 *cs->statusp = resp->status = puterrno4(error);
1495                 goto out;
1496         }
1497         if (rdonly4(req, cs)) {
1498                 *cs->statusp = resp->status = NFS4ERR_ROFS;
1499                 goto out;
1500         }
1501 
1502         if (vp->v_type != VREG) {
1503                 if (vp->v_type == VDIR)
1504                         resp->status = NFS4ERR_ISDIR;
1505                 else
1506                         resp->status = NFS4ERR_INVAL;
1507                 *cs->statusp = resp->status;
1508                 goto out;
1509         }
1510 
1511         if (crgetuid(cr) != va.va_uid &&
1512             (error = VOP_ACCESS(vp, VWRITE, 0, cs->cr, NULL))) {
1513                 *cs->statusp = resp->status = puterrno4(error);
1514                 goto out;
1515         }
1516 
1517         error = VOP_FSYNC(vp, FSYNC, cr, NULL);
1518 
1519         if (error) {
1520                 *cs->statusp = resp->status = puterrno4(error);
1521                 goto out;
1522         }
1523 
1524         nsrv4 = nfs4_get_srv();
1525         *cs->statusp = resp->status = NFS4_OK;
1526         resp->writeverf = nsrv4->write4verf;
1527 out:
1528         DTRACE_NFSV4_2(op__commit__done, struct compound_state *, cs,
1529             COMMIT4res *, resp);
1530 }
1531 
1532 /*
1533  * rfs4_op_mknod is called from rfs4_op_create after all initial verification
1534  * was completed. It does the nfsv4 create for special files.
1535  */
1536 /* ARGSUSED */
1537 static vnode_t *
1538 do_rfs4_op_mknod(CREATE4args *args, CREATE4res *resp, struct svc_req *req,
1539     struct compound_state *cs, vattr_t *vap, char *nm)
1540 {
1541         int error;
1542         cred_t *cr = cs->cr;
1543         vnode_t *dvp = cs->vp;
1544         vnode_t *vp = NULL;
1545         int mode;
1546         enum vcexcl excl;
1547 
1548         switch (args->type) {
1549         case NF4CHR:
1550         case NF4BLK:
1551                 if (secpolicy_sys_devices(cr) != 0) {
1552                         *cs->statusp = resp->status = NFS4ERR_PERM;
1553                         return (NULL);
1554                 }
1555                 if (args->type == NF4CHR)
1556                         vap->va_type = VCHR;
1557                 else
1558                         vap->va_type = VBLK;
1559                 vap->va_rdev = makedevice(args->ftype4_u.devdata.specdata1,
1560                     args->ftype4_u.devdata.specdata2);
1561                 vap->va_mask |= AT_RDEV;
1562                 break;
1563         case NF4SOCK:
1564                 vap->va_type = VSOCK;
1565                 break;
1566         case NF4FIFO:
1567                 vap->va_type = VFIFO;
1568                 break;
1569         default:
1570                 *cs->statusp = resp->status = NFS4ERR_BADTYPE;
1571                 return (NULL);
1572         }
1573 
1574         /*
1575          * Must specify the mode.
1576          */
1577         if (!(vap->va_mask & AT_MODE)) {
1578                 *cs->statusp = resp->status = NFS4ERR_INVAL;
1579                 return (NULL);
1580         }
1581 
1582         excl = EXCL;
1583 
1584         mode = 0;
1585 
1586         error = VOP_CREATE(dvp, nm, vap, excl, mode, &vp, cr, 0, NULL, NULL);
1587         if (error) {
1588                 *cs->statusp = resp->status = puterrno4(error);
1589                 return (NULL);
1590         }
1591         return (vp);
1592 }
1593 
1594 /*
1595  * nfsv4 create is used to create non-regular files. For regular files,
1596  * use nfsv4 open.
1597  */
1598 /* ARGSUSED */
1599 static void
1600 rfs4_op_create(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1601     struct compound_state *cs)
1602 {
1603         CREATE4args *args = &argop->nfs_argop4_u.opcreate;
1604         CREATE4res *resp = &resop->nfs_resop4_u.opcreate;
1605         int error;
1606         struct vattr bva, iva, iva2, ava, *vap;
1607         cred_t *cr = cs->cr;
1608         vnode_t *dvp = cs->vp;
1609         vnode_t *vp = NULL;
1610         vnode_t *realvp;
1611         char *nm, *lnm;
1612         uint_t len, llen;
1613         int syncval = 0;
1614         struct nfs4_svgetit_arg sarg;
1615         struct nfs4_ntov_table ntov;
1616         struct statvfs64 sb;
1617         nfsstat4 status;
1618         struct sockaddr *ca;
1619         char *name = NULL;
1620         char *lname = NULL;
1621 
1622         DTRACE_NFSV4_2(op__create__start, struct compound_state *, cs,
1623             CREATE4args *, args);
1624 
1625         resp->attrset = 0;
1626 
1627         if (dvp == NULL) {
1628                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1629                 goto out;
1630         }
1631 
1632         /*
1633          * If there is an unshared filesystem mounted on this vnode,
1634          * do not allow to create an object in this directory.
1635          */
1636         if (vn_ismntpt(dvp)) {
1637                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
1638                 goto out;
1639         }
1640 
1641         /* Verify that type is correct */
1642         switch (args->type) {
1643         case NF4LNK:
1644         case NF4BLK:
1645         case NF4CHR:
1646         case NF4SOCK:
1647         case NF4FIFO:
1648         case NF4DIR:
1649                 break;
1650         default:
1651                 *cs->statusp = resp->status = NFS4ERR_BADTYPE;
1652                 goto out;
1653         };
1654 
1655         if (cs->access == CS_ACCESS_DENIED) {
1656                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
1657                 goto out;
1658         }
1659         if (dvp->v_type != VDIR) {
1660                 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
1661                 goto out;
1662         }
1663         status = utf8_dir_verify(&args->objname);
1664         if (status != NFS4_OK) {
1665                 *cs->statusp = resp->status = status;
1666                 goto out;
1667         }
1668 
1669         if (rdonly4(req, cs)) {
1670                 *cs->statusp = resp->status = NFS4ERR_ROFS;
1671                 goto out;
1672         }
1673 
1674         /*
1675          * Name of newly created object
1676          */
1677         nm = utf8_to_fn(&args->objname, &len, NULL);
1678         if (nm == NULL) {
1679                 *cs->statusp = resp->status = NFS4ERR_INVAL;
1680                 goto out;
1681         }
1682 
1683         if (len > MAXNAMELEN) {
1684                 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
1685                 kmem_free(nm, len);
1686                 goto out;
1687         }
1688 
1689         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
1690         name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
1691             MAXPATHLEN  + 1);
1692 
1693         if (name == NULL) {
1694                 *cs->statusp = resp->status = NFS4ERR_INVAL;
1695                 kmem_free(nm, len);
1696                 goto out;
1697         }
1698 
1699         resp->attrset = 0;
1700 
1701         sarg.sbp = &sb;
1702         sarg.is_referral = B_FALSE;
1703         nfs4_ntov_table_init(&ntov);
1704 
1705         status = do_rfs4_set_attrs(&resp->attrset,
1706             &args->createattrs, cs, &sarg, &ntov, NFS4ATTR_SETIT);
1707 
1708         if (sarg.vap->va_mask == 0 && status == NFS4_OK)
1709                 status = NFS4ERR_INVAL;
1710 
1711         if (status != NFS4_OK) {
1712                 *cs->statusp = resp->status = status;
1713                 if (name != nm)
1714                         kmem_free(name, MAXPATHLEN + 1);
1715                 kmem_free(nm, len);
1716                 nfs4_ntov_table_free(&ntov, &sarg);
1717                 resp->attrset = 0;
1718                 goto out;
1719         }
1720 
1721         /* Get "before" change value */
1722         bva.va_mask = AT_CTIME|AT_SEQ|AT_MODE;
1723         error = VOP_GETATTR(dvp, &bva, 0, cr, NULL);
1724         if (error) {
1725                 *cs->statusp = resp->status = puterrno4(error);
1726                 if (name != nm)
1727                         kmem_free(name, MAXPATHLEN + 1);
1728                 kmem_free(nm, len);
1729                 nfs4_ntov_table_free(&ntov, &sarg);
1730                 resp->attrset = 0;
1731                 goto out;
1732         }
1733         NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bva.va_ctime)
1734 
1735         vap = sarg.vap;
1736 
1737         /*
1738          * Set the default initial values for attributes when the parent
1739          * directory does not have the VSUID/VSGID bit set and they have
1740          * not been specified in createattrs.
1741          */
1742         if (!(bva.va_mode & VSUID) && (vap->va_mask & AT_UID) == 0) {
1743                 vap->va_uid = crgetuid(cr);
1744                 vap->va_mask |= AT_UID;
1745         }
1746         if (!(bva.va_mode & VSGID) && (vap->va_mask & AT_GID) == 0) {
1747                 vap->va_gid = crgetgid(cr);
1748                 vap->va_mask |= AT_GID;
1749         }
1750 
1751         vap->va_mask |= AT_TYPE;
1752         switch (args->type) {
1753         case NF4DIR:
1754                 vap->va_type = VDIR;
1755                 if ((vap->va_mask & AT_MODE) == 0) {
1756                         vap->va_mode = 0700; /* default: owner rwx only */
1757                         vap->va_mask |= AT_MODE;
1758                 }
1759                 error = VOP_MKDIR(dvp, name, vap, &vp, cr, NULL, 0, NULL);
1760                 if (error)
1761                         break;
1762 
1763                 /*
1764                  * Get the initial "after" sequence number, if it fails,
1765                  * set to zero
1766                  */
1767                 iva.va_mask = AT_SEQ;
1768                 if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL))
1769                         iva.va_seq = 0;
1770                 break;
1771         case NF4LNK:
1772                 vap->va_type = VLNK;
1773                 if ((vap->va_mask & AT_MODE) == 0) {
1774                         vap->va_mode = 0700; /* default: owner rwx only */
1775                         vap->va_mask |= AT_MODE;
1776                 }
1777 
1778                 /*
1779                  * symlink names must be treated as data
1780                  */
1781                 lnm = utf8_to_str((utf8string *)&args->ftype4_u.linkdata,
1782                     &llen, NULL);
1783 
1784                 if (lnm == NULL) {
1785                         *cs->statusp = resp->status = NFS4ERR_INVAL;
1786                         if (name != nm)
1787                                 kmem_free(name, MAXPATHLEN + 1);
1788                         kmem_free(nm, len);
1789                         nfs4_ntov_table_free(&ntov, &sarg);
1790                         resp->attrset = 0;
1791                         goto out;
1792                 }
1793 
1794                 if (llen > MAXPATHLEN) {
1795                         *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
1796                         if (name != nm)
1797                                 kmem_free(name, MAXPATHLEN + 1);
1798                         kmem_free(nm, len);
1799                         kmem_free(lnm, llen);
1800                         nfs4_ntov_table_free(&ntov, &sarg);
1801                         resp->attrset = 0;
1802                         goto out;
1803                 }
1804 
1805                 lname = nfscmd_convname(ca, cs->exi, lnm,
1806                     NFSCMD_CONV_INBOUND, MAXPATHLEN  + 1);
1807 
1808                 if (lname == NULL) {
1809                         *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
1810                         if (name != nm)
1811                                 kmem_free(name, MAXPATHLEN + 1);
1812                         kmem_free(nm, len);
1813                         kmem_free(lnm, llen);
1814                         nfs4_ntov_table_free(&ntov, &sarg);
1815                         resp->attrset = 0;
1816                         goto out;
1817                 }
1818 
1819                 error = VOP_SYMLINK(dvp, name, vap, lname, cr, NULL, 0);
1820                 if (lname != lnm)
1821                         kmem_free(lname, MAXPATHLEN + 1);
1822                 kmem_free(lnm, llen);
1823                 if (error)
1824                         break;
1825 
1826                 /*
1827                  * Get the initial "after" sequence number, if it fails,
1828                  * set to zero
1829                  */
1830                 iva.va_mask = AT_SEQ;
1831                 if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL))
1832                         iva.va_seq = 0;
1833 
1834                 error = VOP_LOOKUP(dvp, name, &vp, NULL, 0, NULL, cr,
1835                     NULL, NULL, NULL);
1836                 if (error)
1837                         break;
1838 
1839                 /*
1840                  * va_seq is not safe over VOP calls, check it again
1841                  * if it has changed zero out iva to force atomic = FALSE.
1842                  */
1843                 iva2.va_mask = AT_SEQ;
1844                 if (VOP_GETATTR(dvp, &iva2, 0, cs->cr, NULL) ||
1845                     iva2.va_seq != iva.va_seq)
1846                         iva.va_seq = 0;
1847                 break;
1848         default:
1849                 /*
1850                  * probably a special file.
1851                  */
1852                 if ((vap->va_mask & AT_MODE) == 0) {
1853                         vap->va_mode = 0600; /* default: owner rw only */
1854                         vap->va_mask |= AT_MODE;
1855                 }
1856                 syncval = FNODSYNC;
1857                 /*
1858                  * We know this will only generate one VOP call
1859                  */
1860                 vp = do_rfs4_op_mknod(args, resp, req, cs, vap, name);
1861 
1862                 if (vp == NULL) {
1863                         if (name != nm)
1864                                 kmem_free(name, MAXPATHLEN + 1);
1865                         kmem_free(nm, len);
1866                         nfs4_ntov_table_free(&ntov, &sarg);
1867                         resp->attrset = 0;
1868                         goto out;
1869                 }
1870 
1871                 /*
1872                  * Get the initial "after" sequence number, if it fails,
1873                  * set to zero
1874                  */
1875                 iva.va_mask = AT_SEQ;
1876                 if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL))
1877                         iva.va_seq = 0;
1878 
1879                 break;
1880         }
1881         if (name != nm)
1882                 kmem_free(name, MAXPATHLEN + 1);
1883         kmem_free(nm, len);
1884 
1885         if (error) {
1886                 *cs->statusp = resp->status = puterrno4(error);
1887         }
1888 
1889         /*
1890          * Force modified data and metadata out to stable storage.
1891          */
1892         (void) VOP_FSYNC(dvp, 0, cr, NULL);
1893 
1894         if (resp->status != NFS4_OK) {
1895                 if (vp != NULL)
1896                         VN_RELE(vp);
1897                 nfs4_ntov_table_free(&ntov, &sarg);
1898                 resp->attrset = 0;
1899                 goto out;
1900         }
1901 
1902         /*
1903          * Finish setup of cinfo response, "before" value already set.
1904          * Get "after" change value, if it fails, simply return the
1905          * before value.
1906          */
1907         ava.va_mask = AT_CTIME|AT_SEQ;
1908         if (VOP_GETATTR(dvp, &ava, 0, cr, NULL)) {
1909                 ava.va_ctime = bva.va_ctime;
1910                 ava.va_seq = 0;
1911         }
1912         NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, ava.va_ctime);
1913 
1914         /*
1915          * True verification that object was created with correct
1916          * attrs is impossible.  The attrs could have been changed
1917          * immediately after object creation.  If attributes did
1918          * not verify, the only recourse for the server is to
1919          * destroy the object.  Maybe if some attrs (like gid)
1920          * are set incorrectly, the object should be destroyed;
1921          * however, seems bad as a default policy.  Do we really
1922          * want to destroy an object over one of the times not
1923          * verifying correctly?  For these reasons, the server
1924          * currently sets bits in attrset for createattrs
1925          * that were set; however, no verification is done.
1926          *
1927          * vmask_to_nmask accounts for vattr bits set on create
1928          *      [do_rfs4_set_attrs() only sets resp bits for
1929          *       non-vattr/vfs bits.]
1930          * Mask off any bits set by default so as not to return
1931          * more attrset bits than were requested in createattrs
1932          */
1933         nfs4_vmask_to_nmask(sarg.vap->va_mask, &resp->attrset);
1934         resp->attrset &= args->createattrs.attrmask;
1935         nfs4_ntov_table_free(&ntov, &sarg);
1936 
1937         error = makefh4(&cs->fh, vp, cs->exi);
1938         if (error) {
1939                 *cs->statusp = resp->status = puterrno4(error);
1940         }
1941 
1942         /*
1943          * The cinfo.atomic = TRUE only if we got no errors, we have
1944          * non-zero va_seq's, and it has incremented by exactly one
1945          * during the creation and it didn't change during the VOP_LOOKUP
1946          * or VOP_FSYNC.
1947          */
1948         if (!error && bva.va_seq && iva.va_seq && ava.va_seq &&
1949             iva.va_seq == (bva.va_seq + 1) && iva.va_seq == ava.va_seq)
1950                 resp->cinfo.atomic = TRUE;
1951         else
1952                 resp->cinfo.atomic = FALSE;
1953 
1954         /*
1955          * Force modified metadata out to stable storage.
1956          *
1957          * if a underlying vp exists, pass it to VOP_FSYNC
1958          */
1959         if (VOP_REALVP(vp, &realvp, NULL) == 0)
1960                 (void) VOP_FSYNC(realvp, syncval, cr, NULL);
1961         else
1962                 (void) VOP_FSYNC(vp, syncval, cr, NULL);
1963 
1964         if (resp->status != NFS4_OK) {
1965                 VN_RELE(vp);
1966                 goto out;
1967         }
1968         if (cs->vp)
1969                 VN_RELE(cs->vp);
1970 
1971         cs->vp = vp;
1972         *cs->statusp = resp->status = NFS4_OK;
1973 out:
1974         DTRACE_NFSV4_2(op__create__done, struct compound_state *, cs,
1975             CREATE4res *, resp);
1976 }
1977 
1978 /*ARGSUSED*/
1979 static void
1980 rfs4_op_delegpurge(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1981     struct compound_state *cs)
1982 {
1983         DTRACE_NFSV4_2(op__delegpurge__start, struct compound_state *, cs,
1984             DELEGPURGE4args *, &argop->nfs_argop4_u.opdelegpurge);
1985 
1986         rfs4_op_inval(argop, resop, req, cs);
1987 
1988         DTRACE_NFSV4_2(op__delegpurge__done, struct compound_state *, cs,
1989             DELEGPURGE4res *, &resop->nfs_resop4_u.opdelegpurge);
1990 }
1991 
1992 /*ARGSUSED*/
1993 static void
1994 rfs4_op_delegreturn(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1995     struct compound_state *cs)
1996 {
1997         DELEGRETURN4args *args = &argop->nfs_argop4_u.opdelegreturn;
1998         DELEGRETURN4res *resp = &resop->nfs_resop4_u.opdelegreturn;
1999         rfs4_deleg_state_t *dsp;
2000         nfsstat4 status;
2001 
2002         DTRACE_NFSV4_2(op__delegreturn__start, struct compound_state *, cs,
2003             DELEGRETURN4args *, args);
2004 
2005         status = rfs4_get_deleg_state(&args->deleg_stateid, &dsp);
2006         resp->status = *cs->statusp = status;
2007         if (status != NFS4_OK)
2008                 goto out;
2009 
2010         /* Ensure specified filehandle matches */
2011         if (cs->vp != dsp->rds_finfo->rf_vp) {
2012                 resp->status = *cs->statusp = NFS4ERR_BAD_STATEID;
2013         } else
2014                 rfs4_return_deleg(dsp, FALSE);
2015 
2016         rfs4_update_lease(dsp->rds_client);
2017 
2018         rfs4_deleg_state_rele(dsp);
2019 out:
2020         DTRACE_NFSV4_2(op__delegreturn__done, struct compound_state *, cs,
2021             DELEGRETURN4res *, resp);
2022 }
2023 
2024 /*
2025  * Check to see if a given "flavor" is an explicitly shared flavor.
2026  * The assumption of this routine is the "flavor" is already a valid
2027  * flavor in the secinfo list of "exi".
2028  *
2029  *      e.g.
2030  *              # share -o sec=flavor1 /export
2031  *              # share -o sec=flavor2 /export/home
2032  *
2033  *              flavor2 is not an explicitly shared flavor for /export,
2034  *              however it is in the secinfo list for /export thru the
2035  *              server namespace setup.
2036  */
2037 int
2038 is_exported_sec(int flavor, struct exportinfo *exi)
2039 {
2040         int     i;
2041         struct secinfo *sp;
2042 
2043         sp = exi->exi_export.ex_secinfo;
2044         for (i = 0; i < exi->exi_export.ex_seccnt; i++) {
2045                 if (flavor == sp[i].s_secinfo.sc_nfsnum ||
2046                     sp[i].s_secinfo.sc_nfsnum == AUTH_NONE) {
2047                         return (SEC_REF_EXPORTED(&sp[i]));
2048                 }
2049         }
2050 
2051         /* Should not reach this point based on the assumption */
2052         return (0);
2053 }
2054 
2055 /*
2056  * Check if the security flavor used in the request matches what is
2057  * required at the export point or at the root pseudo node (exi_root).
2058  *
2059  * returns 1 if there's a match or if exported with AUTH_NONE; 0 otherwise.
2060  *
2061  */
2062 static int
2063 secinfo_match_or_authnone(struct compound_state *cs)
2064 {
2065         int     i;
2066         struct secinfo *sp;
2067 
2068         /*
2069          * Check cs->nfsflavor (from the request) against
2070          * the current export data in cs->exi.
2071          */
2072         sp = cs->exi->exi_export.ex_secinfo;
2073         for (i = 0; i < cs->exi->exi_export.ex_seccnt; i++) {
2074                 if (cs->nfsflavor == sp[i].s_secinfo.sc_nfsnum ||
2075                     sp[i].s_secinfo.sc_nfsnum == AUTH_NONE)
2076                         return (1);
2077         }
2078 
2079         return (0);
2080 }
2081 
2082 /*
2083  * Check the access authority for the client and return the correct error.
2084  */
2085 nfsstat4
2086 call_checkauth4(struct compound_state *cs, struct svc_req *req)
2087 {
2088         int     authres;
2089 
2090         /*
2091          * First, check if the security flavor used in the request
2092          * are among the flavors set in the server namespace.
2093          */
2094         if (!secinfo_match_or_authnone(cs)) {
2095                 *cs->statusp = NFS4ERR_WRONGSEC;
2096                 return (*cs->statusp);
2097         }
2098 
2099         authres = checkauth4(cs, req);
2100 
2101         if (authres > 0) {
2102                 *cs->statusp = NFS4_OK;
2103                 if (! (cs->access & CS_ACCESS_LIMITED))
2104                         cs->access = CS_ACCESS_OK;
2105         } else if (authres == 0) {
2106                 *cs->statusp = NFS4ERR_ACCESS;
2107         } else if (authres == -2) {
2108                 *cs->statusp = NFS4ERR_WRONGSEC;
2109         } else {
2110                 *cs->statusp = NFS4ERR_DELAY;
2111         }
2112         return (*cs->statusp);
2113 }
2114 
2115 /*
2116  * bitmap4_to_attrmask is called by getattr and readdir.
2117  * It sets up the vattr mask and determines whether vfsstat call is needed
2118  * based on the input bitmap.
2119  * Returns nfsv4 status.
2120  */
2121 static nfsstat4
2122 bitmap4_to_attrmask(bitmap4 breq, struct nfs4_svgetit_arg *sargp)
2123 {
2124         int i;
2125         uint_t  va_mask;
2126         struct statvfs64 *sbp = sargp->sbp;
2127 
2128         sargp->sbp = NULL;
2129         sargp->flag = 0;
2130         sargp->rdattr_error = NFS4_OK;
2131         sargp->mntdfid_set = FALSE;
2132         if (sargp->cs->vp)
2133                 sargp->xattr = get_fh4_flag(&sargp->cs->fh,
2134                     FH4_ATTRDIR | FH4_NAMEDATTR);
2135         else
2136                 sargp->xattr = 0;
2137 
2138         /*
2139          * Set rdattr_error_req to true if return error per
2140          * failed entry rather than fail the readdir.
2141          */
2142         if (breq & FATTR4_RDATTR_ERROR_MASK)
2143                 sargp->rdattr_error_req = 1;
2144         else
2145                 sargp->rdattr_error_req = 0;
2146 
2147         /*
2148          * generate the va_mask
2149          * Handle the easy cases first
2150          */
2151         switch (breq) {
2152         case NFS4_NTOV_ATTR_MASK:
2153                 sargp->vap->va_mask = NFS4_NTOV_ATTR_AT_MASK;
2154                 return (NFS4_OK);
2155 
2156         case NFS4_FS_ATTR_MASK:
2157                 sargp->vap->va_mask = NFS4_FS_ATTR_AT_MASK;
2158                 sargp->sbp = sbp;
2159                 return (NFS4_OK);
2160 
2161         case NFS4_NTOV_ATTR_CACHE_MASK:
2162                 sargp->vap->va_mask = NFS4_NTOV_ATTR_CACHE_AT_MASK;
2163                 return (NFS4_OK);
2164 
2165         case FATTR4_LEASE_TIME_MASK:
2166                 sargp->vap->va_mask = 0;
2167                 return (NFS4_OK);
2168 
2169         default:
2170                 va_mask = 0;
2171                 for (i = 0; i < nfs4_ntov_map_size; i++) {
2172                         if ((breq & nfs4_ntov_map[i].fbit) &&
2173                             nfs4_ntov_map[i].vbit)
2174                                 va_mask |= nfs4_ntov_map[i].vbit;
2175                 }
2176 
2177                 /*
2178                  * Check is vfsstat is needed
2179                  */
2180                 if (breq & NFS4_FS_ATTR_MASK)
2181                         sargp->sbp = sbp;
2182 
2183                 sargp->vap->va_mask = va_mask;
2184                 return (NFS4_OK);
2185         }
2186         /* NOTREACHED */
2187 }
2188 
2189 /*
2190  * bitmap4_get_sysattrs is called by getattr and readdir.
2191  * It calls both VOP_GETATTR and VFS_STATVFS calls to get the attrs.
2192  * Returns nfsv4 status.
2193  */
2194 static nfsstat4
2195 bitmap4_get_sysattrs(struct nfs4_svgetit_arg *sargp)
2196 {
2197         int error;
2198         struct compound_state *cs = sargp->cs;
2199         vnode_t *vp = cs->vp;
2200 
2201         if (sargp->sbp != NULL) {
2202                 if (error = VFS_STATVFS(vp->v_vfsp, sargp->sbp)) {
2203                         sargp->sbp = NULL;   /* to identify error */
2204                         return (puterrno4(error));
2205                 }
2206         }
2207 
2208         return (rfs4_vop_getattr(vp, sargp->vap, 0, cs->cr));
2209 }
2210 
2211 static void
2212 nfs4_ntov_table_init(struct nfs4_ntov_table *ntovp)
2213 {
2214         ntovp->na = kmem_zalloc(sizeof (union nfs4_attr_u) * nfs4_ntov_map_size,
2215             KM_SLEEP);
2216         ntovp->attrcnt = 0;
2217         ntovp->vfsstat = FALSE;
2218 }
2219 
2220 static void
2221 nfs4_ntov_table_free(struct nfs4_ntov_table *ntovp,
2222     struct nfs4_svgetit_arg *sargp)
2223 {
2224         int i;
2225         union nfs4_attr_u *na;
2226         uint8_t *amap;
2227 
2228         /*
2229          * XXX Should do the same checks for whether the bit is set
2230          */
2231         for (i = 0, na = ntovp->na, amap = ntovp->amap;
2232             i < ntovp->attrcnt; i++, na++, amap++) {
2233                 (void) (*nfs4_ntov_map[*amap].sv_getit)(
2234                     NFS4ATTR_FREEIT, sargp, na);
2235         }
2236         if ((sargp->op == NFS4ATTR_SETIT) || (sargp->op == NFS4ATTR_VERIT)) {
2237                 /*
2238                  * xdr_free for getattr will be done later
2239                  */
2240                 for (i = 0, na = ntovp->na, amap = ntovp->amap;
2241                     i < ntovp->attrcnt; i++, na++, amap++) {
2242                         xdr_free(nfs4_ntov_map[*amap].xfunc, (caddr_t)na);
2243                 }
2244         }
2245         kmem_free(ntovp->na, sizeof (union nfs4_attr_u) * nfs4_ntov_map_size);
2246 }
2247 
2248 /*
2249  * do_rfs4_op_getattr gets the system attrs and converts into fattr4.
2250  */
2251 static nfsstat4
2252 do_rfs4_op_getattr(bitmap4 breq, fattr4 *fattrp,
2253     struct nfs4_svgetit_arg *sargp)
2254 {
2255         int error = 0;
2256         int i, k;
2257         struct nfs4_ntov_table ntov;
2258         XDR xdr;
2259         ulong_t xdr_size;
2260         char *xdr_attrs;
2261         nfsstat4 status = NFS4_OK;
2262         nfsstat4 prev_rdattr_error = sargp->rdattr_error;
2263         union nfs4_attr_u *na;
2264         uint8_t *amap;
2265 
2266         sargp->op = NFS4ATTR_GETIT;
2267         sargp->flag = 0;
2268 
2269         fattrp->attrmask = 0;
2270         /* if no bits requested, then return empty fattr4 */
2271         if (breq == 0) {
2272                 fattrp->attrlist4_len = 0;
2273                 fattrp->attrlist4 = NULL;
2274                 return (NFS4_OK);
2275         }
2276 
2277         /*
2278          * return NFS4ERR_INVAL when client requests write-only attrs
2279          */
2280         if (breq & (FATTR4_TIME_ACCESS_SET_MASK | FATTR4_TIME_MODIFY_SET_MASK))
2281                 return (NFS4ERR_INVAL);
2282 
2283         nfs4_ntov_table_init(&ntov);
2284         na = ntov.na;
2285         amap = ntov.amap;
2286 
2287         /*
2288          * Now loop to get or verify the attrs
2289          */
2290         for (i = 0; i < nfs4_ntov_map_size; i++) {
2291                 if (breq & nfs4_ntov_map[i].fbit) {
2292                         if ((*nfs4_ntov_map[i].sv_getit)(
2293                             NFS4ATTR_SUPPORTED, sargp, NULL) == 0) {
2294 
2295                                 error = (*nfs4_ntov_map[i].sv_getit)(
2296                                     NFS4ATTR_GETIT, sargp, na);
2297 
2298                                 /*
2299                                  * Possible error values:
2300                                  * >0 if sv_getit failed to
2301                                  * get the attr; 0 if succeeded;
2302                                  * <0 if rdattr_error and the
2303                                  * attribute cannot be returned.
2304                                  */
2305                                 if (error && !(sargp->rdattr_error_req))
2306                                         goto done;
2307                                 /*
2308                                  * If error then just for entry
2309                                  */
2310                                 if (error == 0) {
2311                                         fattrp->attrmask |=
2312                                             nfs4_ntov_map[i].fbit;
2313                                         *amap++ =
2314                                             (uint8_t)nfs4_ntov_map[i].nval;
2315                                         na++;
2316                                         (ntov.attrcnt)++;
2317                                 } else if ((error > 0) &&
2318                                     (sargp->rdattr_error == NFS4_OK)) {
2319                                         sargp->rdattr_error = puterrno4(error);
2320                                 }
2321                                 error = 0;
2322                         }
2323                 }
2324         }
2325 
2326         /*
2327          * If rdattr_error was set after the return value for it was assigned,
2328          * update it.
2329          */
2330         if (prev_rdattr_error != sargp->rdattr_error) {
2331                 na = ntov.na;
2332                 amap = ntov.amap;
2333                 for (i = 0; i < ntov.attrcnt; i++, na++, amap++) {
2334                         k = *amap;
2335                         if (k < FATTR4_RDATTR_ERROR) {
2336                                 continue;
2337                         }
2338                         if ((k == FATTR4_RDATTR_ERROR) &&
2339                             ((*nfs4_ntov_map[k].sv_getit)(
2340                             NFS4ATTR_SUPPORTED, sargp, NULL) == 0)) {
2341 
2342                                 (void) (*nfs4_ntov_map[k].sv_getit)(
2343                                     NFS4ATTR_GETIT, sargp, na);
2344                         }
2345                         break;
2346                 }
2347         }
2348 
2349         xdr_size = 0;
2350         na = ntov.na;
2351         amap = ntov.amap;
2352         for (i = 0; i < ntov.attrcnt; i++, na++, amap++) {
2353                 xdr_size += xdr_sizeof(nfs4_ntov_map[*amap].xfunc, na);
2354         }
2355 
2356         fattrp->attrlist4_len = xdr_size;
2357         if (xdr_size) {
2358                 /* freed by rfs4_op_getattr_free() */
2359                 fattrp->attrlist4 = xdr_attrs = kmem_zalloc(xdr_size, KM_SLEEP);
2360 
2361                 xdrmem_create(&xdr, xdr_attrs, xdr_size, XDR_ENCODE);
2362 
2363                 na = ntov.na;
2364                 amap = ntov.amap;
2365                 for (i = 0; i < ntov.attrcnt; i++, na++, amap++) {
2366                         if (!(*nfs4_ntov_map[*amap].xfunc)(&xdr, na)) {
2367                                 DTRACE_PROBE1(nfss__e__getattr4_encfail,
2368                                     int, *amap);
2369                                 status = NFS4ERR_SERVERFAULT;
2370                                 break;
2371                         }
2372                 }
2373                 /* xdrmem_destroy(&xdrs); */        /* NO-OP */
2374         } else {
2375                 fattrp->attrlist4 = NULL;
2376         }
2377 done:
2378 
2379         nfs4_ntov_table_free(&ntov, sargp);
2380 
2381         if (error != 0)
2382                 status = puterrno4(error);
2383 
2384         return (status);
2385 }
2386 
2387 /* ARGSUSED */
2388 static void
2389 rfs4_op_getattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2390     struct compound_state *cs)
2391 {
2392         GETATTR4args *args = &argop->nfs_argop4_u.opgetattr;
2393         GETATTR4res *resp = &resop->nfs_resop4_u.opgetattr;
2394         struct nfs4_svgetit_arg sarg;
2395         struct statvfs64 sb;
2396         nfsstat4 status;
2397 
2398         DTRACE_NFSV4_2(op__getattr__start, struct compound_state *, cs,
2399             GETATTR4args *, args);
2400 
2401         if (cs->vp == NULL) {
2402                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2403                 goto out;
2404         }
2405 
2406         if (cs->access == CS_ACCESS_DENIED) {
2407                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
2408                 goto out;
2409         }
2410 
2411         sarg.sbp = &sb;
2412         sarg.cs = cs;
2413         sarg.is_referral = B_FALSE;
2414 
2415         status = bitmap4_to_attrmask(args->attr_request, &sarg);
2416         if (status == NFS4_OK) {
2417 
2418                 status = bitmap4_get_sysattrs(&sarg);
2419                 if (status == NFS4_OK) {
2420 
2421                         /* Is this a referral? */
2422                         if (vn_is_nfs_reparse(cs->vp, cs->cr)) {
2423                                 /* Older V4 Solaris client sees a link */
2424                                 if (client_is_downrev(req))
2425                                         sarg.vap->va_type = VLNK;
2426                                 else
2427                                         sarg.is_referral = B_TRUE;
2428                         }
2429 
2430                         status = do_rfs4_op_getattr(args->attr_request,
2431                             &resp->obj_attributes, &sarg);
2432                 }
2433         }
2434         *cs->statusp = resp->status = status;
2435 out:
2436         DTRACE_NFSV4_2(op__getattr__done, struct compound_state *, cs,
2437             GETATTR4res *, resp);
2438 }
2439 
2440 static void
2441 rfs4_op_getattr_free(nfs_resop4 *resop)
2442 {
2443         GETATTR4res *resp = &resop->nfs_resop4_u.opgetattr;
2444 
2445         nfs4_fattr4_free(&resp->obj_attributes);
2446 }
2447 
2448 /* ARGSUSED */
2449 static void
2450 rfs4_op_getfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2451     struct compound_state *cs)
2452 {
2453         GETFH4res *resp = &resop->nfs_resop4_u.opgetfh;
2454 
2455         DTRACE_NFSV4_1(op__getfh__start, struct compound_state *, cs);
2456 
2457         if (cs->vp == NULL) {
2458                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2459                 goto out;
2460         }
2461         if (cs->access == CS_ACCESS_DENIED) {
2462                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
2463                 goto out;
2464         }
2465 
2466         /* check for reparse point at the share point */
2467         if (cs->exi->exi_moved || vn_is_nfs_reparse(cs->exi->exi_vp, cs->cr)) {
2468                 /* it's all bad */
2469                 cs->exi->exi_moved = 1;
2470                 *cs->statusp = resp->status = NFS4ERR_MOVED;
2471                 DTRACE_PROBE2(nfs4serv__func__referral__shared__moved,
2472                     vnode_t *, cs->vp, char *, "rfs4_op_getfh");
2473                 return;
2474         }
2475 
2476         /* check for reparse point at vp */
2477         if (vn_is_nfs_reparse(cs->vp, cs->cr) && !client_is_downrev(req)) {
2478                 /* it's not all bad */
2479                 *cs->statusp = resp->status = NFS4ERR_MOVED;
2480                 DTRACE_PROBE2(nfs4serv__func__referral__moved,
2481                     vnode_t *, cs->vp, char *, "rfs4_op_getfh");
2482                 return;
2483         }
2484 
2485         resp->object.nfs_fh4_val =
2486             kmem_alloc(cs->fh.nfs_fh4_len, KM_SLEEP);
2487         nfs_fh4_copy(&cs->fh, &resp->object);
2488         *cs->statusp = resp->status = NFS4_OK;
2489 out:
2490         DTRACE_NFSV4_2(op__getfh__done, struct compound_state *, cs,
2491             GETFH4res *, resp);
2492 }
2493 
2494 static void
2495 rfs4_op_getfh_free(nfs_resop4 *resop)
2496 {
2497         GETFH4res *resp = &resop->nfs_resop4_u.opgetfh;
2498 
2499         if (resp->status == NFS4_OK &&
2500             resp->object.nfs_fh4_val != NULL) {
2501                 kmem_free(resp->object.nfs_fh4_val, resp->object.nfs_fh4_len);
2502                 resp->object.nfs_fh4_val = NULL;
2503                 resp->object.nfs_fh4_len = 0;
2504         }
2505 }
2506 
2507 /*
2508  * illegal: args: void
2509  *          res : status (NFS4ERR_OP_ILLEGAL)
2510  */
2511 /* ARGSUSED */
2512 static void
2513 rfs4_op_illegal(nfs_argop4 *argop, nfs_resop4 *resop,
2514     struct svc_req *req, struct compound_state *cs)
2515 {
2516         ILLEGAL4res *resp = &resop->nfs_resop4_u.opillegal;
2517 
2518         resop->resop = OP_ILLEGAL;
2519         *cs->statusp = resp->status = NFS4ERR_OP_ILLEGAL;
2520 }
2521 
2522 /*
2523  * link: args: SAVED_FH: file, CURRENT_FH: target directory
2524  *       res: status. If success - CURRENT_FH unchanged, return change_info
2525  */
2526 /* ARGSUSED */
2527 static void
2528 rfs4_op_link(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2529     struct compound_state *cs)
2530 {
2531         LINK4args *args = &argop->nfs_argop4_u.oplink;
2532         LINK4res *resp = &resop->nfs_resop4_u.oplink;
2533         int error;
2534         vnode_t *vp;
2535         vnode_t *dvp;
2536         struct vattr bdva, idva, adva;
2537         char *nm;
2538         uint_t  len;
2539         struct sockaddr *ca;
2540         char *name = NULL;
2541         nfsstat4 status;
2542 
2543         DTRACE_NFSV4_2(op__link__start, struct compound_state *, cs,
2544             LINK4args *, args);
2545 
2546         /* SAVED_FH: source object */
2547         vp = cs->saved_vp;
2548         if (vp == NULL) {
2549                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2550                 goto out;
2551         }
2552 
2553         /* CURRENT_FH: target directory */
2554         dvp = cs->vp;
2555         if (dvp == NULL) {
2556                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2557                 goto out;
2558         }
2559 
2560         /*
2561          * If there is a non-shared filesystem mounted on this vnode,
2562          * do not allow to link any file in this directory.
2563          */
2564         if (vn_ismntpt(dvp)) {
2565                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
2566                 goto out;
2567         }
2568 
2569         if (cs->access == CS_ACCESS_DENIED) {
2570                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
2571                 goto out;
2572         }
2573 
2574         /* Check source object's type validity */
2575         if (vp->v_type == VDIR) {
2576                 *cs->statusp = resp->status = NFS4ERR_ISDIR;
2577                 goto out;
2578         }
2579 
2580         /* Check target directory's type */
2581         if (dvp->v_type != VDIR) {
2582                 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
2583                 goto out;
2584         }
2585 
2586         if (cs->saved_exi != cs->exi) {
2587                 *cs->statusp = resp->status = NFS4ERR_XDEV;
2588                 goto out;
2589         }
2590 
2591         status = utf8_dir_verify(&args->newname);
2592         if (status != NFS4_OK) {
2593                 *cs->statusp = resp->status = status;
2594                 goto out;
2595         }
2596 
2597         nm = utf8_to_fn(&args->newname, &len, NULL);
2598         if (nm == NULL) {
2599                 *cs->statusp = resp->status = NFS4ERR_INVAL;
2600                 goto out;
2601         }
2602 
2603         if (len > MAXNAMELEN) {
2604                 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
2605                 kmem_free(nm, len);
2606                 goto out;
2607         }
2608 
2609         if (rdonly4(req, cs)) {
2610                 *cs->statusp = resp->status = NFS4ERR_ROFS;
2611                 kmem_free(nm, len);
2612                 goto out;
2613         }
2614 
2615         /* Get "before" change value */
2616         bdva.va_mask = AT_CTIME|AT_SEQ;
2617         error = VOP_GETATTR(dvp, &bdva, 0, cs->cr, NULL);
2618         if (error) {
2619                 *cs->statusp = resp->status = puterrno4(error);
2620                 kmem_free(nm, len);
2621                 goto out;
2622         }
2623 
2624         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2625         name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
2626             MAXPATHLEN  + 1);
2627 
2628         if (name == NULL) {
2629                 *cs->statusp = resp->status = NFS4ERR_INVAL;
2630                 kmem_free(nm, len);
2631                 goto out;
2632         }
2633 
2634         NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bdva.va_ctime)
2635 
2636         error = VOP_LINK(dvp, vp, name, cs->cr, NULL, 0);
2637 
2638         if (nm != name)
2639                 kmem_free(name, MAXPATHLEN + 1);
2640         kmem_free(nm, len);
2641 
2642         /*
2643          * Get the initial "after" sequence number, if it fails, set to zero
2644          */
2645         idva.va_mask = AT_SEQ;
2646         if (VOP_GETATTR(dvp, &idva, 0, cs->cr, NULL))
2647                 idva.va_seq = 0;
2648 
2649         /*
2650          * Force modified data and metadata out to stable storage.
2651          */
2652         (void) VOP_FSYNC(vp, FNODSYNC, cs->cr, NULL);
2653         (void) VOP_FSYNC(dvp, 0, cs->cr, NULL);
2654 
2655         if (error) {
2656                 *cs->statusp = resp->status = puterrno4(error);
2657                 goto out;
2658         }
2659 
2660         /*
2661          * Get "after" change value, if it fails, simply return the
2662          * before value.
2663          */
2664         adva.va_mask = AT_CTIME|AT_SEQ;
2665         if (VOP_GETATTR(dvp, &adva, 0, cs->cr, NULL)) {
2666                 adva.va_ctime = bdva.va_ctime;
2667                 adva.va_seq = 0;
2668         }
2669 
2670         NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, adva.va_ctime)
2671 
2672         /*
2673          * The cinfo.atomic = TRUE only if we have
2674          * non-zero va_seq's, and it has incremented by exactly one
2675          * during the VOP_LINK and it didn't change during the VOP_FSYNC.
2676          */
2677         if (bdva.va_seq && idva.va_seq && adva.va_seq &&
2678             idva.va_seq == (bdva.va_seq + 1) && idva.va_seq == adva.va_seq)
2679                 resp->cinfo.atomic = TRUE;
2680         else
2681                 resp->cinfo.atomic = FALSE;
2682 
2683         *cs->statusp = resp->status = NFS4_OK;
2684 out:
2685         DTRACE_NFSV4_2(op__link__done, struct compound_state *, cs,
2686             LINK4res *, resp);
2687 }
2688 
2689 /*
2690  * Used by rfs4_op_lookup and rfs4_op_lookupp to do the actual work.
2691  */
2692 
2693 /* ARGSUSED */
2694 static nfsstat4
2695 do_rfs4_op_lookup(char *nm, struct svc_req *req, struct compound_state *cs)
2696 {
2697         int error;
2698         int different_export = 0;
2699         vnode_t *vp, *pre_tvp = NULL, *oldvp = NULL;
2700         struct exportinfo *exi = NULL, *pre_exi = NULL;
2701         nfsstat4 stat;
2702         fid_t fid;
2703         int attrdir, dotdot, walk;
2704         bool_t is_newvp = FALSE;
2705 
2706         if (cs->vp->v_flag & V_XATTRDIR) {
2707                 attrdir = 1;
2708                 ASSERT(get_fh4_flag(&cs->fh, FH4_ATTRDIR));
2709         } else {
2710                 attrdir = 0;
2711                 ASSERT(! get_fh4_flag(&cs->fh, FH4_ATTRDIR));
2712         }
2713 
2714         dotdot = (nm[0] == '.' && nm[1] == '.' && nm[2] == '\0');
2715 
2716         /*
2717          * If dotdotting, then need to check whether it's
2718          * above the root of a filesystem, or above an
2719          * export point.
2720          */
2721         if (dotdot) {
2722                 ASSERT(cs->exi != NULL);
2723                 ASSERT3U(cs->exi->exi_zoneid, ==, curzone->zone_id);
2724                 /*
2725                  * If dotdotting at the root of a filesystem, then
2726                  * need to traverse back to the mounted-on filesystem
2727                  * and do the dotdot lookup there.
2728                  */
2729                 if ((cs->vp->v_flag & VROOT) || VN_IS_CURZONEROOT(cs->vp)) {
2730 
2731                         /*
2732                          * If at the system root, then can
2733                          * go up no further.
2734                          */
2735                         if (VN_IS_CURZONEROOT(cs->vp))
2736                                 return (puterrno4(ENOENT));
2737 
2738                         /*
2739                          * Traverse back to the mounted-on filesystem
2740                          */
2741                         cs->vp = untraverse(cs->exi->exi_ne, cs->vp);
2742 
2743                         /*
2744                          * Set the different_export flag so we remember
2745                          * to pick up a new exportinfo entry for
2746                          * this new filesystem.
2747                          */
2748                         different_export = 1;
2749                 } else {
2750 
2751                         /*
2752                          * If dotdotting above an export point then set
2753                          * the different_export to get new export info.
2754                          */
2755                         different_export = nfs_exported(cs->exi, cs->vp);
2756                 }
2757         }
2758 
2759         error = VOP_LOOKUP(cs->vp, nm, &vp, NULL, 0, NULL, cs->cr,
2760             NULL, NULL, NULL);
2761         if (error)
2762                 return (puterrno4(error));
2763 
2764         /*
2765          * If the vnode is in a pseudo filesystem, check whether it is visible.
2766          *
2767          * XXX if the vnode is a symlink and it is not visible in
2768          * a pseudo filesystem, return ENOENT (not following symlink).
2769          * V4 client can not mount such symlink. This is a regression
2770          * from V2/V3.
2771          *
2772          * In the same exported filesystem, if the security flavor used
2773          * is not an explicitly shared flavor, limit the view to the visible
2774          * list entries only. This is not a WRONGSEC case because it's already
2775          * checked via PUTROOTFH/PUTPUBFH or PUTFH.
2776          */
2777         if (!different_export &&
2778             (PSEUDO(cs->exi) || ! is_exported_sec(cs->nfsflavor, cs->exi) ||
2779             cs->access & CS_ACCESS_LIMITED)) {
2780                 if (! nfs_visible(cs->exi, vp, &different_export)) {
2781                         VN_RELE(vp);
2782                         return (puterrno4(ENOENT));
2783                 }
2784         }
2785 
2786         /*
2787          * If it's a mountpoint, then traverse it.
2788          */
2789         if (vn_ismntpt(vp)) {
2790                 pre_exi = cs->exi;   /* save pre-traversed exportinfo */
2791                 pre_tvp = vp;           /* save pre-traversed vnode     */
2792 
2793                 /*
2794                  * hold pre_tvp to counteract rele by traverse.  We will
2795                  * need pre_tvp below if checkexport4 fails
2796                  */
2797                 VN_HOLD(pre_tvp);
2798                 if ((error = traverse(&vp)) != 0) {
2799                         VN_RELE(vp);
2800                         VN_RELE(pre_tvp);
2801                         return (puterrno4(error));
2802                 }
2803                 different_export = 1;
2804         } else if (vp->v_vfsp != cs->vp->v_vfsp) {
2805                 /*
2806                  * The vfsp comparison is to handle the case where
2807                  * a LOFS mount is shared.  lo_lookup traverses mount points,
2808                  * and NFS is unaware of local fs transistions because
2809                  * v_vfsmountedhere isn't set.  For this special LOFS case,
2810                  * the dir and the obj returned by lookup will have different
2811                  * vfs ptrs.
2812                  */
2813                 different_export = 1;
2814         }
2815 
2816         if (different_export) {
2817 
2818                 bzero(&fid, sizeof (fid));
2819                 fid.fid_len = MAXFIDSZ;
2820                 error = vop_fid_pseudo(vp, &fid);
2821                 if (error) {
2822                         VN_RELE(vp);
2823                         if (pre_tvp)
2824                                 VN_RELE(pre_tvp);
2825                         return (puterrno4(error));
2826                 }
2827 
2828                 if (dotdot)
2829                         exi = nfs_vptoexi(NULL, vp, cs->cr, &walk, NULL, TRUE);
2830                 else
2831                         exi = checkexport4(&vp->v_vfsp->vfs_fsid, &fid, vp);
2832 
2833                 if (exi == NULL) {
2834                         if (pre_tvp) {
2835                                 /*
2836                                  * If this vnode is a mounted-on vnode,
2837                                  * but the mounted-on file system is not
2838                                  * exported, send back the filehandle for
2839                                  * the mounted-on vnode, not the root of
2840                                  * the mounted-on file system.
2841                                  */
2842                                 VN_RELE(vp);
2843                                 vp = pre_tvp;
2844                                 exi = pre_exi;
2845                         } else {
2846                                 VN_RELE(vp);
2847                                 return (puterrno4(EACCES));
2848                         }
2849                 } else if (pre_tvp) {
2850                         /* we're done with pre_tvp now. release extra hold */
2851                         VN_RELE(pre_tvp);
2852                 }
2853 
2854                 cs->exi = exi;
2855 
2856                 /*
2857                  * Now we do a checkauth4. The reason is that
2858                  * this client/user may not have access to the new
2859                  * exported file system, and if they do,
2860                  * the client/user may be mapped to a different uid.
2861                  *
2862                  * We start with a new cr, because the checkauth4 done
2863                  * in the PUT*FH operation over wrote the cred's uid,
2864                  * gid, etc, and we want the real thing before calling
2865                  * checkauth4()
2866                  */
2867                 crfree(cs->cr);
2868                 cs->cr = crdup(cs->basecr);
2869 
2870                 oldvp = cs->vp;
2871                 cs->vp = vp;
2872                 is_newvp = TRUE;
2873 
2874                 stat = call_checkauth4(cs, req);
2875                 if (stat != NFS4_OK) {
2876                         VN_RELE(cs->vp);
2877                         cs->vp = oldvp;
2878                         return (stat);
2879                 }
2880         }
2881 
2882         /*
2883          * After various NFS checks, do a label check on the path
2884          * component. The label on this path should either be the
2885          * global zone's label or a zone's label. We are only
2886          * interested in the zone's label because exported files
2887          * in global zone is accessible (though read-only) to
2888          * clients. The exportability/visibility check is already
2889          * done before reaching this code.
2890          */
2891         if (is_system_labeled()) {
2892                 bslabel_t *clabel;
2893 
2894                 ASSERT(req->rq_label != NULL);
2895                 clabel = req->rq_label;
2896                 DTRACE_PROBE2(tx__rfs4__log__info__oplookup__clabel, char *,
2897                     "got client label from request(1)", struct svc_req *, req);
2898 
2899                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2900                         if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
2901                             cs->exi)) {
2902                                 error = EACCES;
2903                                 goto err_out;
2904                         }
2905                 } else {
2906                         /*
2907                          * We grant access to admin_low label clients
2908                          * only if the client is trusted, i.e. also
2909                          * running Solaris Trusted Extension.
2910                          */
2911                         struct sockaddr *ca;
2912                         int             addr_type;
2913                         void            *ipaddr;
2914                         tsol_tpc_t      *tp;
2915 
2916                         ca = (struct sockaddr *)svc_getrpccaller(
2917                             req->rq_xprt)->buf;
2918                         if (ca->sa_family == AF_INET) {
2919                                 addr_type = IPV4_VERSION;
2920                                 ipaddr = &((struct sockaddr_in *)ca)->sin_addr;
2921                         } else if (ca->sa_family == AF_INET6) {
2922                                 addr_type = IPV6_VERSION;
2923                                 ipaddr = &((struct sockaddr_in6 *)
2924                                     ca)->sin6_addr;
2925                         }
2926                         tp = find_tpc(ipaddr, addr_type, B_FALSE);
2927                         if (tp == NULL || tp->tpc_tp.tp_doi !=
2928                             l_admin_low->tsl_doi || tp->tpc_tp.host_type !=
2929                             SUN_CIPSO) {
2930                                 if (tp != NULL)
2931                                         TPC_RELE(tp);
2932                                 error = EACCES;
2933                                 goto err_out;
2934                         }
2935                         TPC_RELE(tp);
2936                 }
2937         }
2938 
2939         error = makefh4(&cs->fh, vp, cs->exi);
2940 
2941 err_out:
2942         if (error) {
2943                 if (is_newvp) {
2944                         VN_RELE(cs->vp);
2945                         cs->vp = oldvp;
2946                 } else
2947                         VN_RELE(vp);
2948                 return (puterrno4(error));
2949         }
2950 
2951         if (!is_newvp) {
2952                 if (cs->vp)
2953                         VN_RELE(cs->vp);
2954                 cs->vp = vp;
2955         } else if (oldvp)
2956                 VN_RELE(oldvp);
2957 
2958         /*
2959          * if did lookup on attrdir and didn't lookup .., set named
2960          * attr fh flag
2961          */
2962         if (attrdir && ! dotdot)
2963                 set_fh4_flag(&cs->fh, FH4_NAMEDATTR);
2964 
2965         /* Assume false for now, open proc will set this */
2966         cs->mandlock = FALSE;
2967 
2968         return (NFS4_OK);
2969 }
2970 
2971 /* ARGSUSED */
2972 static void
2973 rfs4_op_lookup(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2974     struct compound_state *cs)
2975 {
2976         LOOKUP4args *args = &argop->nfs_argop4_u.oplookup;
2977         LOOKUP4res *resp = &resop->nfs_resop4_u.oplookup;
2978         char *nm;
2979         uint_t len;
2980         struct sockaddr *ca;
2981         char *name = NULL;
2982         nfsstat4 status;
2983 
2984         DTRACE_NFSV4_2(op__lookup__start, struct compound_state *, cs,
2985             LOOKUP4args *, args);
2986 
2987         if (cs->vp == NULL) {
2988                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2989                 goto out;
2990         }
2991 
2992         if (cs->vp->v_type == VLNK) {
2993                 *cs->statusp = resp->status = NFS4ERR_SYMLINK;
2994                 goto out;
2995         }
2996 
2997         if (cs->vp->v_type != VDIR) {
2998                 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
2999                 goto out;
3000         }
3001 
3002         status = utf8_dir_verify(&args->objname);
3003         if (status != NFS4_OK) {
3004                 *cs->statusp = resp->status = status;
3005                 goto out;
3006         }
3007 
3008         nm = utf8_to_str(&args->objname, &len, NULL);
3009         if (nm == NULL) {
3010                 *cs->statusp = resp->status = NFS4ERR_INVAL;
3011                 goto out;
3012         }
3013 
3014         if (len > MAXNAMELEN) {
3015                 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
3016                 kmem_free(nm, len);
3017                 goto out;
3018         }
3019 
3020         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
3021         name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
3022             MAXPATHLEN  + 1);
3023 
3024         if (name == NULL) {
3025                 *cs->statusp = resp->status = NFS4ERR_INVAL;
3026                 kmem_free(nm, len);
3027                 goto out;
3028         }
3029 
3030         *cs->statusp = resp->status = do_rfs4_op_lookup(name, req, cs);
3031 
3032         if (name != nm)
3033                 kmem_free(name, MAXPATHLEN + 1);
3034         kmem_free(nm, len);
3035 
3036 out:
3037         DTRACE_NFSV4_2(op__lookup__done, struct compound_state *, cs,
3038             LOOKUP4res *, resp);
3039 }
3040 
3041 /* ARGSUSED */
3042 static void
3043 rfs4_op_lookupp(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req,
3044     struct compound_state *cs)
3045 {
3046         LOOKUPP4res *resp = &resop->nfs_resop4_u.oplookupp;
3047 
3048         DTRACE_NFSV4_1(op__lookupp__start, struct compound_state *, cs);
3049 
3050         if (cs->vp == NULL) {
3051                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3052                 goto out;
3053         }
3054 
3055         if (cs->vp->v_type != VDIR) {
3056                 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
3057                 goto out;
3058         }
3059 
3060         *cs->statusp = resp->status = do_rfs4_op_lookup("..", req, cs);
3061 
3062         /*
3063          * From NFSV4 Specification, LOOKUPP should not check for
3064          * NFS4ERR_WRONGSEC. Retrun NFS4_OK instead.
3065          */
3066         if (resp->status == NFS4ERR_WRONGSEC) {
3067                 *cs->statusp = resp->status = NFS4_OK;
3068         }
3069 
3070 out:
3071         DTRACE_NFSV4_2(op__lookupp__done, struct compound_state *, cs,
3072             LOOKUPP4res *, resp);
3073 }
3074 
3075 
3076 /*ARGSUSED2*/
3077 static void
3078 rfs4_op_openattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3079     struct compound_state *cs)
3080 {
3081         OPENATTR4args   *args = &argop->nfs_argop4_u.opopenattr;
3082         OPENATTR4res    *resp = &resop->nfs_resop4_u.opopenattr;
3083         vnode_t         *avp = NULL;
3084         int             lookup_flags = LOOKUP_XATTR, error;
3085         int             exp_ro = 0;
3086 
3087         DTRACE_NFSV4_2(op__openattr__start, struct compound_state *, cs,
3088             OPENATTR4args *, args);
3089 
3090         if (cs->vp == NULL) {
3091                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3092                 goto out;
3093         }
3094 
3095         if ((cs->vp->v_vfsp->vfs_flag & VFS_XATTR) == 0 &&
3096             !vfs_has_feature(cs->vp->v_vfsp, VFSFT_SYSATTR_VIEWS)) {
3097                 *cs->statusp = resp->status = puterrno4(ENOTSUP);
3098                 goto out;
3099         }
3100 
3101         /*
3102          * If file system supports passing ACE mask to VOP_ACCESS then
3103          * check for ACE_READ_NAMED_ATTRS, otherwise do legacy checks
3104          */
3105 
3106         if (vfs_has_feature(cs->vp->v_vfsp, VFSFT_ACEMASKONACCESS))
3107                 error = VOP_ACCESS(cs->vp, ACE_READ_NAMED_ATTRS,
3108                     V_ACE_MASK, cs->cr, NULL);
3109         else
3110                 error = ((VOP_ACCESS(cs->vp, VREAD, 0, cs->cr, NULL) != 0) &&
3111                     (VOP_ACCESS(cs->vp, VWRITE, 0, cs->cr, NULL) != 0) &&
3112                     (VOP_ACCESS(cs->vp, VEXEC, 0, cs->cr, NULL) != 0));
3113 
3114         if (error) {
3115                 *cs->statusp = resp->status = puterrno4(EACCES);
3116                 goto out;
3117         }
3118 
3119         /*
3120          * The CREATE_XATTR_DIR VOP flag cannot be specified if
3121          * the file system is exported read-only -- regardless of
3122          * createdir flag.  Otherwise the attrdir would be created
3123          * (assuming server fs isn't mounted readonly locally).  If
3124          * VOP_LOOKUP returns ENOENT in this case, the error will
3125          * be translated into EROFS.  ENOSYS is mapped to ENOTSUP
3126          * because specfs has no VOP_LOOKUP op, so the macro would
3127          * return ENOSYS.  EINVAL is returned by all (current)
3128          * Solaris file system implementations when any of their
3129          * restrictions are violated (xattr(dir) can't have xattrdir).
3130          * Returning NOTSUPP is more appropriate in this case
3131          * because the object will never be able to have an attrdir.
3132          */
3133         if (args->createdir && ! (exp_ro = rdonly4(req, cs)))
3134                 lookup_flags |= CREATE_XATTR_DIR;
3135 
3136         error = VOP_LOOKUP(cs->vp, "", &avp, NULL, lookup_flags, NULL, cs->cr,
3137             NULL, NULL, NULL);
3138 
3139         if (error) {
3140                 if (error == ENOENT && args->createdir && exp_ro)
3141                         *cs->statusp = resp->status = puterrno4(EROFS);
3142                 else if (error == EINVAL || error == ENOSYS)
3143                         *cs->statusp = resp->status = puterrno4(ENOTSUP);
3144                 else
3145                         *cs->statusp = resp->status = puterrno4(error);
3146                 goto out;
3147         }
3148 
3149         ASSERT(avp->v_flag & V_XATTRDIR);
3150 
3151         error = makefh4(&cs->fh, avp, cs->exi);
3152 
3153         if (error) {
3154                 VN_RELE(avp);
3155                 *cs->statusp = resp->status = puterrno4(error);
3156                 goto out;
3157         }
3158 
3159         VN_RELE(cs->vp);
3160         cs->vp = avp;
3161 
3162         /*
3163          * There is no requirement for an attrdir fh flag
3164          * because the attrdir has a vnode flag to distinguish
3165          * it from regular (non-xattr) directories.  The
3166          * FH4_ATTRDIR flag is set for future sanity checks.
3167          */
3168         set_fh4_flag(&cs->fh, FH4_ATTRDIR);
3169         *cs->statusp = resp->status = NFS4_OK;
3170 
3171 out:
3172         DTRACE_NFSV4_2(op__openattr__done, struct compound_state *, cs,
3173             OPENATTR4res *, resp);
3174 }
3175 
3176 static int
3177 do_io(int direction, vnode_t *vp, struct uio *uio, int ioflag, cred_t *cred,
3178     caller_context_t *ct)
3179 {
3180         int error;
3181         int i;
3182         clock_t delaytime;
3183 
3184         delaytime = MSEC_TO_TICK_ROUNDUP(rfs4_lock_delay);
3185 
3186         /*
3187          * Don't block on mandatory locks. If this routine returns
3188          * EAGAIN, the caller should return NFS4ERR_LOCKED.
3189          */
3190         uio->uio_fmode = FNONBLOCK;
3191 
3192         for (i = 0; i < rfs4_maxlock_tries; i++) {
3193 
3194 
3195                 if (direction == FREAD) {
3196                         (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, ct);
3197                         error = VOP_READ(vp, uio, ioflag, cred, ct);
3198                         VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, ct);
3199                 } else {
3200                         (void) VOP_RWLOCK(vp, V_WRITELOCK_TRUE, ct);
3201                         error = VOP_WRITE(vp, uio, ioflag, cred, ct);
3202                         VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, ct);
3203                 }
3204 
3205                 if (error != EAGAIN)
3206                         break;
3207 
3208                 if (i < rfs4_maxlock_tries - 1) {
3209                         delay(delaytime);
3210                         delaytime *= 2;
3211                 }
3212         }
3213 
3214         return (error);
3215 }
3216 
3217 /* ARGSUSED */
3218 static void
3219 rfs4_op_read(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3220     struct compound_state *cs)
3221 {
3222         READ4args *args = &argop->nfs_argop4_u.opread;
3223         READ4res *resp = &resop->nfs_resop4_u.opread;
3224         int error;
3225         int verror;
3226         vnode_t *vp;
3227         struct vattr va;
3228         struct iovec iov, *iovp = NULL;
3229         int iovcnt;
3230         struct uio uio;
3231         u_offset_t offset;
3232         bool_t *deleg = &cs->deleg;
3233         nfsstat4 stat;
3234         int in_crit = 0;
3235         mblk_t *mp = NULL;
3236         int alloc_err = 0;
3237         int rdma_used = 0;
3238         int loaned_buffers;
3239         caller_context_t ct;
3240         struct uio *uiop;
3241 
3242         DTRACE_NFSV4_2(op__read__start, struct compound_state *, cs,
3243             READ4args, args);
3244 
3245         vp = cs->vp;
3246         if (vp == NULL) {
3247                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3248                 goto out;
3249         }
3250         if (cs->access == CS_ACCESS_DENIED) {
3251                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
3252                 goto out;
3253         }
3254 
3255         if ((stat = rfs4_check_stateid(FREAD, vp, &args->stateid, FALSE,
3256             deleg, TRUE, &ct)) != NFS4_OK) {
3257                 *cs->statusp = resp->status = stat;
3258                 goto out;
3259         }
3260 
3261         /*
3262          * Enter the critical region before calling VOP_RWLOCK
3263          * to avoid a deadlock with write requests.
3264          */
3265         if (nbl_need_check(vp)) {
3266                 nbl_start_crit(vp, RW_READER);
3267                 in_crit = 1;
3268                 if (nbl_conflict(vp, NBL_READ, args->offset, args->count, 0,
3269                     &ct)) {
3270                         *cs->statusp = resp->status = NFS4ERR_LOCKED;
3271                         goto out;
3272                 }
3273         }
3274 
3275         if (args->wlist) {
3276                 if (args->count > clist_len(args->wlist)) {
3277                         *cs->statusp = resp->status = NFS4ERR_INVAL;
3278                         goto out;
3279                 }
3280                 rdma_used = 1;
3281         }
3282 
3283         /* use loaned buffers for TCP */
3284         loaned_buffers = (nfs_loaned_buffers && !rdma_used) ? 1 : 0;
3285 
3286         va.va_mask = AT_MODE|AT_SIZE|AT_UID;
3287         verror = VOP_GETATTR(vp, &va, 0, cs->cr, &ct);
3288 
3289         /*
3290          * If we can't get the attributes, then we can't do the
3291          * right access checking.  So, we'll fail the request.
3292          */
3293         if (verror) {
3294                 *cs->statusp = resp->status = puterrno4(verror);
3295                 goto out;
3296         }
3297 
3298         if (vp->v_type != VREG) {
3299                 *cs->statusp = resp->status =
3300                     ((vp->v_type == VDIR) ? NFS4ERR_ISDIR : NFS4ERR_INVAL);
3301                 goto out;
3302         }
3303 
3304         if (crgetuid(cs->cr) != va.va_uid &&
3305             (error = VOP_ACCESS(vp, VREAD, 0, cs->cr, &ct)) &&
3306             (error = VOP_ACCESS(vp, VEXEC, 0, cs->cr, &ct))) {
3307                 *cs->statusp = resp->status = puterrno4(error);
3308                 goto out;
3309         }
3310 
3311         if (MANDLOCK(vp, va.va_mode)) { /* XXX - V4 supports mand locking */
3312                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
3313                 goto out;
3314         }
3315 
3316         offset = args->offset;
3317         if (offset >= va.va_size) {
3318                 *cs->statusp = resp->status = NFS4_OK;
3319                 resp->eof = TRUE;
3320                 resp->data_len = 0;
3321                 resp->data_val = NULL;
3322                 resp->mblk = NULL;
3323                 /* RDMA */
3324                 resp->wlist = args->wlist;
3325                 resp->wlist_len = resp->data_len;
3326                 *cs->statusp = resp->status = NFS4_OK;
3327                 if (resp->wlist)
3328                         clist_zero_len(resp->wlist);
3329                 goto out;
3330         }
3331 
3332         if (args->count == 0) {
3333                 *cs->statusp = resp->status = NFS4_OK;
3334                 resp->eof = FALSE;
3335                 resp->data_len = 0;
3336                 resp->data_val = NULL;
3337                 resp->mblk = NULL;
3338                 /* RDMA */
3339                 resp->wlist = args->wlist;
3340                 resp->wlist_len = resp->data_len;
3341                 if (resp->wlist)
3342                         clist_zero_len(resp->wlist);
3343                 goto out;
3344         }
3345 
3346         /*
3347          * Do not allocate memory more than maximum allowed
3348          * transfer size
3349          */
3350         if (args->count > rfs4_tsize(req))
3351                 args->count = rfs4_tsize(req);
3352 
3353         if (loaned_buffers) {
3354                 uiop = (uio_t *)rfs_setup_xuio(vp);
3355                 ASSERT(uiop != NULL);
3356                 uiop->uio_segflg = UIO_SYSSPACE;
3357                 uiop->uio_loffset = args->offset;
3358                 uiop->uio_resid = args->count;
3359 
3360                 /* Jump to do the read if successful */
3361                 if (!VOP_REQZCBUF(vp, UIO_READ, (xuio_t *)uiop, cs->cr, &ct)) {
3362                         /*
3363                          * Need to hold the vnode until after VOP_RETZCBUF()
3364                          * is called.
3365                          */
3366                         VN_HOLD(vp);
3367                         goto doio_read;
3368                 }
3369 
3370                 DTRACE_PROBE2(nfss__i__reqzcbuf_failed, int,
3371                     uiop->uio_loffset, int, uiop->uio_resid);
3372 
3373                 uiop->uio_extflg = 0;
3374 
3375                 /* failure to setup for zero copy */
3376                 rfs_free_xuio((void *)uiop);
3377                 loaned_buffers = 0;
3378         }
3379 
3380         /*
3381          * If returning data via RDMA Write, then grab the chunk list. If we
3382          * aren't returning READ data w/RDMA_WRITE, then grab a mblk.
3383          */
3384         if (rdma_used) {
3385                 mp = NULL;
3386                 (void) rdma_get_wchunk(req, &iov, args->wlist);
3387                 uio.uio_iov = &iov;
3388                 uio.uio_iovcnt = 1;
3389         } else {
3390                 /*
3391                  * mp will contain the data to be sent out in the read reply.
3392                  * It will be freed after the reply has been sent.
3393                  */
3394                 mp = rfs_read_alloc(args->count, &iovp, &iovcnt);
3395                 ASSERT(mp != NULL);
3396                 ASSERT(alloc_err == 0);
3397                 uio.uio_iov = iovp;
3398                 uio.uio_iovcnt = iovcnt;
3399         }
3400 
3401         uio.uio_segflg = UIO_SYSSPACE;
3402         uio.uio_extflg = UIO_COPY_CACHED;
3403         uio.uio_loffset = args->offset;
3404         uio.uio_resid = args->count;
3405         uiop = &uio;
3406 
3407 doio_read:
3408         error = do_io(FREAD, vp, uiop, 0, cs->cr, &ct);
3409 
3410         va.va_mask = AT_SIZE;
3411         verror = VOP_GETATTR(vp, &va, 0, cs->cr, &ct);
3412 
3413         if (error) {
3414                 if (mp)
3415                         freemsg(mp);
3416                 *cs->statusp = resp->status = puterrno4(error);
3417                 goto out;
3418         }
3419 
3420         /* make mblk using zc buffers */
3421         if (loaned_buffers) {
3422                 mp = uio_to_mblk(uiop);
3423                 ASSERT(mp != NULL);
3424         }
3425 
3426         *cs->statusp = resp->status = NFS4_OK;
3427 
3428         ASSERT(uiop->uio_resid >= 0);
3429         resp->data_len = args->count - uiop->uio_resid;
3430         if (mp) {
3431                 resp->data_val = (char *)mp->b_datap->db_base;
3432                 rfs_rndup_mblks(mp, resp->data_len, loaned_buffers);
3433         } else {
3434                 resp->data_val = (caddr_t)iov.iov_base;
3435         }
3436 
3437         resp->mblk = mp;
3438 
3439         if (!verror && offset + resp->data_len == va.va_size)
3440                 resp->eof = TRUE;
3441         else
3442                 resp->eof = FALSE;
3443 
3444         if (rdma_used) {
3445                 if (!rdma_setup_read_data4(args, resp)) {
3446                         *cs->statusp = resp->status = NFS4ERR_INVAL;
3447                 }
3448         } else {
3449                 resp->wlist = NULL;
3450         }
3451 
3452 out:
3453         if (in_crit)
3454                 nbl_end_crit(vp);
3455 
3456         if (iovp != NULL)
3457                 kmem_free(iovp, iovcnt * sizeof (struct iovec));
3458 
3459         DTRACE_NFSV4_2(op__read__done, struct compound_state *, cs,
3460             READ4res *, resp);
3461 }
3462 
3463 static void
3464 rfs4_op_read_free(nfs_resop4 *resop)
3465 {
3466         READ4res        *resp = &resop->nfs_resop4_u.opread;
3467 
3468         if (resp->status == NFS4_OK && resp->mblk != NULL) {
3469                 freemsg(resp->mblk);
3470                 resp->mblk = NULL;
3471                 resp->data_val = NULL;
3472                 resp->data_len = 0;
3473         }
3474 }
3475 
3476 static void
3477 rfs4_op_readdir_free(nfs_resop4 * resop)
3478 {
3479         READDIR4res    *resp = &resop->nfs_resop4_u.opreaddir;
3480 
3481         if (resp->status == NFS4_OK && resp->mblk != NULL) {
3482                 freeb(resp->mblk);
3483                 resp->mblk = NULL;
3484                 resp->data_len = 0;
3485         }
3486 }
3487 
3488 
3489 /* ARGSUSED */
3490 static void
3491 rfs4_op_putpubfh(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req,
3492     struct compound_state *cs)
3493 {
3494         PUTPUBFH4res    *resp = &resop->nfs_resop4_u.opputpubfh;
3495         int             error;
3496         vnode_t         *vp;
3497         struct exportinfo *exi, *sav_exi;
3498         nfs_fh4_fmt_t   *fh_fmtp;
3499         nfs_export_t *ne = nfs_get_export();
3500 
3501         DTRACE_NFSV4_1(op__putpubfh__start, struct compound_state *, cs);
3502 
3503         if (cs->vp) {
3504                 VN_RELE(cs->vp);
3505                 cs->vp = NULL;
3506         }
3507 
3508         if (cs->cr)
3509                 crfree(cs->cr);
3510 
3511         cs->cr = crdup(cs->basecr);
3512 
3513         vp = ne->exi_public->exi_vp;
3514         if (vp == NULL) {
3515                 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
3516                 goto out;
3517         }
3518 
3519         error = makefh4(&cs->fh, vp, ne->exi_public);
3520         if (error != 0) {
3521                 *cs->statusp = resp->status = puterrno4(error);
3522                 goto out;
3523         }
3524         sav_exi = cs->exi;
3525         if (ne->exi_public == ne->exi_root) {
3526                 /*
3527                  * No filesystem is actually shared public, so we default
3528                  * to exi_root. In this case, we must check whether root
3529                  * is exported.
3530                  */
3531                 fh_fmtp = (nfs_fh4_fmt_t *)cs->fh.nfs_fh4_val;
3532 
3533                 /*
3534                  * if root filesystem is exported, the exportinfo struct that we
3535                  * should use is what checkexport4 returns, because root_exi is
3536                  * actually a mostly empty struct.
3537                  */
3538                 exi = checkexport4(&fh_fmtp->fh4_fsid,
3539                     (fid_t *)&fh_fmtp->fh4_xlen, NULL);
3540                 cs->exi = ((exi != NULL) ? exi : ne->exi_public);
3541         } else {
3542                 /*
3543                  * it's a properly shared filesystem
3544                  */
3545                 cs->exi = ne->exi_public;
3546         }
3547 
3548         if (is_system_labeled()) {
3549                 bslabel_t *clabel;
3550 
3551                 ASSERT(req->rq_label != NULL);
3552                 clabel = req->rq_label;
3553                 DTRACE_PROBE2(tx__rfs4__log__info__opputpubfh__clabel, char *,
3554                     "got client label from request(1)",
3555                     struct svc_req *, req);
3556                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3557                         if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3558                             cs->exi)) {
3559                                 *cs->statusp = resp->status =
3560                                     NFS4ERR_SERVERFAULT;
3561                                 goto out;
3562                         }
3563                 }
3564         }
3565 
3566         VN_HOLD(vp);
3567         cs->vp = vp;
3568 
3569         if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) {
3570                 VN_RELE(cs->vp);
3571                 cs->vp = NULL;
3572                 cs->exi = sav_exi;
3573                 goto out;
3574         }
3575 
3576         *cs->statusp = resp->status = NFS4_OK;
3577 out:
3578         DTRACE_NFSV4_2(op__putpubfh__done, struct compound_state *, cs,
3579             PUTPUBFH4res *, resp);
3580 }
3581 
3582 /*
3583  * XXX - issue with put*fh operations. Suppose /export/home is exported.
3584  * Suppose an NFS client goes to mount /export/home/joe. If /export, home,
3585  * or joe have restrictive search permissions, then we shouldn't let
3586  * the client get a file handle. This is easy to enforce. However, we
3587  * don't know what security flavor should be used until we resolve the
3588  * path name. Another complication is uid mapping. If root is
3589  * the user, then it will be mapped to the anonymous user by default,
3590  * but we won't know that till we've resolved the path name. And we won't
3591  * know what the anonymous user is.
3592  * Luckily, SECINFO is specified to take a full filename.
3593  * So what we will have to in rfs4_op_lookup is check that flavor of
3594  * the target object matches that of the request, and if root was the
3595  * caller, check for the root= and anon= options, and if necessary,
3596  * repeat the lookup using the right cred_t. But that's not done yet.
3597  */
3598 /* ARGSUSED */
3599 static void
3600 rfs4_op_putfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3601     struct compound_state *cs)
3602 {
3603         PUTFH4args *args = &argop->nfs_argop4_u.opputfh;
3604         PUTFH4res *resp = &resop->nfs_resop4_u.opputfh;
3605         nfs_fh4_fmt_t *fh_fmtp;
3606 
3607         DTRACE_NFSV4_2(op__putfh__start, struct compound_state *, cs,
3608             PUTFH4args *, args);
3609 
3610         if (cs->vp) {
3611                 VN_RELE(cs->vp);
3612                 cs->vp = NULL;
3613         }
3614 
3615         if (cs->cr) {
3616                 crfree(cs->cr);
3617                 cs->cr = NULL;
3618         }
3619 
3620 
3621         if (args->object.nfs_fh4_len < NFS_FH4_LEN) {
3622                 *cs->statusp = resp->status = NFS4ERR_BADHANDLE;
3623                 goto out;
3624         }
3625 
3626         fh_fmtp = (nfs_fh4_fmt_t *)args->object.nfs_fh4_val;
3627         cs->exi = checkexport4(&fh_fmtp->fh4_fsid, (fid_t *)&fh_fmtp->fh4_xlen,
3628             NULL);
3629 
3630         if (cs->exi == NULL) {
3631                 *cs->statusp = resp->status = NFS4ERR_STALE;
3632                 goto out;
3633         }
3634 
3635         cs->cr = crdup(cs->basecr);
3636 
3637         ASSERT(cs->cr != NULL);
3638 
3639         if (! (cs->vp = nfs4_fhtovp(&args->object, cs->exi, &resp->status))) {
3640                 *cs->statusp = resp->status;
3641                 goto out;
3642         }
3643 
3644         if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) {
3645                 VN_RELE(cs->vp);
3646                 cs->vp = NULL;
3647                 goto out;
3648         }
3649 
3650         nfs_fh4_copy(&args->object, &cs->fh);
3651         *cs->statusp = resp->status = NFS4_OK;
3652         cs->deleg = FALSE;
3653 
3654 out:
3655         DTRACE_NFSV4_2(op__putfh__done, struct compound_state *, cs,
3656             PUTFH4res *, resp);
3657 }
3658 
3659 /* ARGSUSED */
3660 static void
3661 rfs4_op_putrootfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3662     struct compound_state *cs)
3663 {
3664         PUTROOTFH4res *resp = &resop->nfs_resop4_u.opputrootfh;
3665         int error;
3666         fid_t fid;
3667         struct exportinfo *exi, *sav_exi;
3668 
3669         DTRACE_NFSV4_1(op__putrootfh__start, struct compound_state *, cs);
3670 
3671         if (cs->vp) {
3672                 VN_RELE(cs->vp);
3673                 cs->vp = NULL;
3674         }
3675 
3676         if (cs->cr)
3677                 crfree(cs->cr);
3678 
3679         cs->cr = crdup(cs->basecr);
3680 
3681         /*
3682          * Using rootdir, the system root vnode,
3683          * get its fid.
3684          */
3685         bzero(&fid, sizeof (fid));
3686         fid.fid_len = MAXFIDSZ;
3687         error = vop_fid_pseudo(ZONE_ROOTVP(), &fid);
3688         if (error != 0) {
3689                 *cs->statusp = resp->status = puterrno4(error);
3690                 goto out;
3691         }
3692 
3693         /*
3694          * Then use the root fsid & fid it to find out if it's exported
3695          *
3696          * If the server root isn't exported directly, then
3697          * it should at least be a pseudo export based on
3698          * one or more exports further down in the server's
3699          * file tree.
3700          */
3701         exi = checkexport4(&ZONE_ROOTVP()->v_vfsp->vfs_fsid, &fid, NULL);
3702         if (exi == NULL || exi->exi_export.ex_flags & EX_PUBLIC) {
3703                 NFS4_DEBUG(rfs4_debug,
3704                     (CE_WARN, "rfs4_op_putrootfh: export check failure"));
3705                 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
3706                 goto out;
3707         }
3708 
3709         /*
3710          * Now make a filehandle based on the root
3711          * export and root vnode.
3712          */
3713         error = makefh4(&cs->fh, ZONE_ROOTVP(), exi);
3714         if (error != 0) {
3715                 *cs->statusp = resp->status = puterrno4(error);
3716                 goto out;
3717         }
3718 
3719         sav_exi = cs->exi;
3720         cs->exi = exi;
3721 
3722         VN_HOLD(ZONE_ROOTVP());
3723         cs->vp = ZONE_ROOTVP();
3724 
3725         if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) {
3726                 VN_RELE(cs->vp);
3727                 cs->vp = NULL;
3728                 cs->exi = sav_exi;
3729                 goto out;
3730         }
3731 
3732         *cs->statusp = resp->status = NFS4_OK;
3733         cs->deleg = FALSE;
3734 out:
3735         DTRACE_NFSV4_2(op__putrootfh__done, struct compound_state *, cs,
3736             PUTROOTFH4res *, resp);
3737 }
3738 
3739 /*
3740  * readlink: args: CURRENT_FH.
3741  *      res: status. If success - CURRENT_FH unchanged, return linktext.
3742  */
3743 
3744 /* ARGSUSED */
3745 static void
3746 rfs4_op_readlink(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3747     struct compound_state *cs)
3748 {
3749         READLINK4res *resp = &resop->nfs_resop4_u.opreadlink;
3750         int error;
3751         vnode_t *vp;
3752         struct iovec iov;
3753         struct vattr va;
3754         struct uio uio;
3755         char *data;
3756         struct sockaddr *ca;
3757         char *name = NULL;
3758         int is_referral;
3759 
3760         DTRACE_NFSV4_1(op__readlink__start, struct compound_state *, cs);
3761 
3762         /* CURRENT_FH: directory */
3763         vp = cs->vp;
3764         if (vp == NULL) {
3765                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3766                 goto out;
3767         }
3768 
3769         if (cs->access == CS_ACCESS_DENIED) {
3770                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
3771                 goto out;
3772         }
3773 
3774         /* Is it a referral? */
3775         if (vn_is_nfs_reparse(vp, cs->cr) && client_is_downrev(req)) {
3776 
3777                 is_referral = 1;
3778 
3779         } else {
3780 
3781                 is_referral = 0;
3782 
3783                 if (vp->v_type == VDIR) {
3784                         *cs->statusp = resp->status = NFS4ERR_ISDIR;
3785                         goto out;
3786                 }
3787 
3788                 if (vp->v_type != VLNK) {
3789                         *cs->statusp = resp->status = NFS4ERR_INVAL;
3790                         goto out;
3791                 }
3792 
3793         }
3794 
3795         va.va_mask = AT_MODE;
3796         error = VOP_GETATTR(vp, &va, 0, cs->cr, NULL);
3797         if (error) {
3798                 *cs->statusp = resp->status = puterrno4(error);
3799                 goto out;
3800         }
3801 
3802         if (MANDLOCK(vp, va.va_mode)) {
3803                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
3804                 goto out;
3805         }
3806 
3807         data = kmem_alloc(MAXPATHLEN + 1, KM_SLEEP);
3808 
3809         if (is_referral) {
3810                 char *s;
3811                 size_t strsz;
3812 
3813                 /* Get an artificial symlink based on a referral */
3814                 s = build_symlink(vp, cs->cr, &strsz);
3815                 global_svstat_ptr[4][NFS_REFERLINKS].value.ui64++;
3816                 DTRACE_PROBE2(nfs4serv__func__referral__reflink,
3817                     vnode_t *, vp, char *, s);
3818                 if (s == NULL)
3819                         error = EINVAL;
3820                 else {
3821                         error = 0;
3822                         (void) strlcpy(data, s, MAXPATHLEN + 1);
3823                         kmem_free(s, strsz);
3824                 }
3825 
3826         } else {
3827 
3828                 iov.iov_base = data;
3829                 iov.iov_len = MAXPATHLEN;
3830                 uio.uio_iov = &iov;
3831                 uio.uio_iovcnt = 1;
3832                 uio.uio_segflg = UIO_SYSSPACE;
3833                 uio.uio_extflg = UIO_COPY_CACHED;
3834                 uio.uio_loffset = 0;
3835                 uio.uio_resid = MAXPATHLEN;
3836 
3837                 error = VOP_READLINK(vp, &uio, cs->cr, NULL);
3838 
3839                 if (!error)
3840                         *(data + MAXPATHLEN - uio.uio_resid) = '\0';
3841         }
3842 
3843         if (error) {
3844                 kmem_free((caddr_t)data, (uint_t)MAXPATHLEN + 1);
3845                 *cs->statusp = resp->status = puterrno4(error);
3846                 goto out;
3847         }
3848 
3849         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
3850         name = nfscmd_convname(ca, cs->exi, data, NFSCMD_CONV_OUTBOUND,
3851             MAXPATHLEN  + 1);
3852 
3853         if (name == NULL) {
3854                 /*
3855                  * Even though the conversion failed, we return
3856                  * something. We just don't translate it.
3857                  */
3858                 name = data;
3859         }
3860 
3861         /*
3862          * treat link name as data
3863          */
3864         (void) str_to_utf8(name, (utf8string *)&resp->link);
3865 
3866         if (name != data)
3867                 kmem_free(name, MAXPATHLEN + 1);
3868         kmem_free((caddr_t)data, (uint_t)MAXPATHLEN + 1);
3869         *cs->statusp = resp->status = NFS4_OK;
3870 
3871 out:
3872         DTRACE_NFSV4_2(op__readlink__done, struct compound_state *, cs,
3873             READLINK4res *, resp);
3874 }
3875 
3876 static void
3877 rfs4_op_readlink_free(nfs_resop4 *resop)
3878 {
3879         READLINK4res *resp = &resop->nfs_resop4_u.opreadlink;
3880         utf8string *symlink = (utf8string *)&resp->link;
3881 
3882         if (symlink->utf8string_val) {
3883                 UTF8STRING_FREE(*symlink)
3884         }
3885 }
3886 
3887 /*
3888  * release_lockowner:
3889  *      Release any state associated with the supplied
3890  *      lockowner. Note if any lo_state is holding locks we will not
3891  *      rele that lo_state and thus the lockowner will not be destroyed.
3892  *      A client using lock after the lock owner stateid has been released
3893  *      will suffer the consequence of NFS4ERR_BAD_STATEID and would have
3894  *      to reissue the lock with new_lock_owner set to TRUE.
3895  *      args: lock_owner
3896  *      res:  status
3897  */
3898 /* ARGSUSED */
3899 static void
3900 rfs4_op_release_lockowner(nfs_argop4 *argop, nfs_resop4 *resop,
3901     struct svc_req *req, struct compound_state *cs)
3902 {
3903         RELEASE_LOCKOWNER4args *ap = &argop->nfs_argop4_u.oprelease_lockowner;
3904         RELEASE_LOCKOWNER4res *resp = &resop->nfs_resop4_u.oprelease_lockowner;
3905         rfs4_lockowner_t *lo;
3906         rfs4_openowner_t *oo;
3907         rfs4_state_t *sp;
3908         rfs4_lo_state_t *lsp;
3909         rfs4_client_t *cp;
3910         bool_t create = FALSE;
3911         locklist_t *llist;
3912         sysid_t sysid;
3913 
3914         DTRACE_NFSV4_2(op__release__lockowner__start, struct compound_state *,
3915             cs, RELEASE_LOCKOWNER4args *, ap);
3916 
3917         /* Make sure there is a clientid around for this request */
3918         cp = rfs4_findclient_by_id(ap->lock_owner.clientid, FALSE);
3919 
3920         if (cp == NULL) {
3921                 *cs->statusp = resp->status =
3922                     rfs4_check_clientid(&ap->lock_owner.clientid, 0);
3923                 goto out;
3924         }
3925         rfs4_client_rele(cp);
3926 
3927         lo = rfs4_findlockowner(&ap->lock_owner, &create);
3928         if (lo == NULL) {
3929                 *cs->statusp = resp->status = NFS4_OK;
3930                 goto out;
3931         }
3932         ASSERT(lo->rl_client != NULL);
3933 
3934         /*
3935          * Check for EXPIRED client. If so will reap state with in a lease
3936          * period or on next set_clientid_confirm step
3937          */
3938         if (rfs4_lease_expired(lo->rl_client)) {
3939                 rfs4_lockowner_rele(lo);
3940                 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
3941                 goto out;
3942         }
3943 
3944         /*
3945          * If no sysid has been assigned, then no locks exist; just return.
3946          */
3947         rfs4_dbe_lock(lo->rl_client->rc_dbe);
3948         if (lo->rl_client->rc_sysidt == LM_NOSYSID) {
3949                 rfs4_lockowner_rele(lo);
3950                 rfs4_dbe_unlock(lo->rl_client->rc_dbe);
3951                 goto out;
3952         }
3953 
3954         sysid = lo->rl_client->rc_sysidt;
3955         rfs4_dbe_unlock(lo->rl_client->rc_dbe);
3956 
3957         /*
3958          * Mark the lockowner invalid.
3959          */
3960         rfs4_dbe_hide(lo->rl_dbe);
3961 
3962         /*
3963          * sysid-pid pair should now not be used since the lockowner is
3964          * invalid. If the client were to instantiate the lockowner again
3965          * it would be assigned a new pid. Thus we can get the list of
3966          * current locks.
3967          */
3968 
3969         llist = flk_get_active_locks(sysid, lo->rl_pid);
3970         /* If we are still holding locks fail */
3971         if (llist != NULL) {
3972 
3973                 *cs->statusp = resp->status = NFS4ERR_LOCKS_HELD;
3974 
3975                 flk_free_locklist(llist);
3976                 /*
3977                  * We need to unhide the lockowner so the client can
3978                  * try it again. The bad thing here is if the client
3979                  * has a logic error that took it here in the first place
3980                  * they probably have lost accounting of the locks that it
3981                  * is holding. So we may have dangling state until the
3982                  * open owner state is reaped via close. One scenario
3983                  * that could possibly occur is that the client has
3984                  * sent the unlock request(s) in separate threads
3985                  * and has not waited for the replies before sending the
3986                  * RELEASE_LOCKOWNER request. Presumably, it would expect
3987                  * and deal appropriately with NFS4ERR_LOCKS_HELD, by
3988                  * reissuing the request.
3989                  */
3990                 rfs4_dbe_unhide(lo->rl_dbe);
3991                 rfs4_lockowner_rele(lo);
3992                 goto out;
3993         }
3994 
3995         /*
3996          * For the corresponding client we need to check each open
3997          * owner for any opens that have lockowner state associated
3998          * with this lockowner.
3999          */
4000 
4001         rfs4_dbe_lock(lo->rl_client->rc_dbe);
4002         for (oo = list_head(&lo->rl_client->rc_openownerlist); oo != NULL;
4003             oo = list_next(&lo->rl_client->rc_openownerlist, oo)) {
4004 
4005                 rfs4_dbe_lock(oo->ro_dbe);
4006                 for (sp = list_head(&oo->ro_statelist); sp != NULL;
4007                     sp = list_next(&oo->ro_statelist, sp)) {
4008 
4009                         rfs4_dbe_lock(sp->rs_dbe);
4010                         for (lsp = list_head(&sp->rs_lostatelist);
4011                             lsp != NULL;
4012                             lsp = list_next(&sp->rs_lostatelist, lsp)) {
4013                                 if (lsp->rls_locker == lo) {
4014                                         rfs4_dbe_lock(lsp->rls_dbe);
4015                                         rfs4_dbe_invalidate(lsp->rls_dbe);
4016                                         rfs4_dbe_unlock(lsp->rls_dbe);
4017                                 }
4018                         }
4019                         rfs4_dbe_unlock(sp->rs_dbe);
4020                 }
4021                 rfs4_dbe_unlock(oo->ro_dbe);
4022         }
4023         rfs4_dbe_unlock(lo->rl_client->rc_dbe);
4024 
4025         rfs4_lockowner_rele(lo);
4026 
4027         *cs->statusp = resp->status = NFS4_OK;
4028 
4029 out:
4030         DTRACE_NFSV4_2(op__release__lockowner__done, struct compound_state *,
4031             cs, RELEASE_LOCKOWNER4res *, resp);
4032 }
4033 
4034 /*
4035  * short utility function to lookup a file and recall the delegation
4036  */
4037 static rfs4_file_t *
4038 rfs4_lookup_and_findfile(vnode_t *dvp, char *nm, vnode_t **vpp,
4039     int *lkup_error, cred_t *cr)
4040 {
4041         vnode_t *vp;
4042         rfs4_file_t *fp = NULL;
4043         bool_t fcreate = FALSE;
4044         int error;
4045 
4046         if (vpp)
4047                 *vpp = NULL;
4048 
4049         if ((error = VOP_LOOKUP(dvp, nm, &vp, NULL, 0, NULL, cr, NULL, NULL,
4050             NULL)) == 0) {
4051                 if (vp->v_type == VREG)
4052                         fp = rfs4_findfile(vp, NULL, &fcreate);
4053                 if (vpp)
4054                         *vpp = vp;
4055                 else
4056                         VN_RELE(vp);
4057         }
4058 
4059         if (lkup_error)
4060                 *lkup_error = error;
4061 
4062         return (fp);
4063 }
4064 
4065 /*
4066  * remove: args: CURRENT_FH: directory; name.
4067  *      res: status. If success - CURRENT_FH unchanged, return change_info
4068  *              for directory.
4069  */
4070 /* ARGSUSED */
4071 static void
4072 rfs4_op_remove(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4073     struct compound_state *cs)
4074 {
4075         REMOVE4args *args = &argop->nfs_argop4_u.opremove;
4076         REMOVE4res *resp = &resop->nfs_resop4_u.opremove;
4077         int error;
4078         vnode_t *dvp, *vp;
4079         struct vattr bdva, idva, adva;
4080         char *nm;
4081         uint_t len;
4082         rfs4_file_t *fp;
4083         int in_crit = 0;
4084         bslabel_t *clabel;
4085         struct sockaddr *ca;
4086         char *name = NULL;
4087         nfsstat4 status;
4088 
4089         DTRACE_NFSV4_2(op__remove__start, struct compound_state *, cs,
4090             REMOVE4args *, args);
4091 
4092         /* CURRENT_FH: directory */
4093         dvp = cs->vp;
4094         if (dvp == NULL) {
4095                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4096                 goto out;
4097         }
4098 
4099         if (cs->access == CS_ACCESS_DENIED) {
4100                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4101                 goto out;
4102         }
4103 
4104         /*
4105          * If there is an unshared filesystem mounted on this vnode,
4106          * Do not allow to remove anything in this directory.
4107          */
4108         if (vn_ismntpt(dvp)) {
4109                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4110                 goto out;
4111         }
4112 
4113         if (dvp->v_type != VDIR) {
4114                 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
4115                 goto out;
4116         }
4117 
4118         status = utf8_dir_verify(&args->target);
4119         if (status != NFS4_OK) {
4120                 *cs->statusp = resp->status = status;
4121                 goto out;
4122         }
4123 
4124         /*
4125          * Lookup the file so that we can check if it's a directory
4126          */
4127         nm = utf8_to_fn(&args->target, &len, NULL);
4128         if (nm == NULL) {
4129                 *cs->statusp = resp->status = NFS4ERR_INVAL;
4130                 goto out;
4131         }
4132 
4133         if (len > MAXNAMELEN) {
4134                 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
4135                 kmem_free(nm, len);
4136                 goto out;
4137         }
4138 
4139         if (rdonly4(req, cs)) {
4140                 *cs->statusp = resp->status = NFS4ERR_ROFS;
4141                 kmem_free(nm, len);
4142                 goto out;
4143         }
4144 
4145         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
4146         name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
4147             MAXPATHLEN  + 1);
4148 
4149         if (name == NULL) {
4150                 *cs->statusp = resp->status = NFS4ERR_INVAL;
4151                 kmem_free(nm, len);
4152                 goto out;
4153         }
4154 
4155         /*
4156          * Lookup the file to determine type and while we are see if
4157          * there is a file struct around and check for delegation.
4158          * We don't need to acquire va_seq before this lookup, if
4159          * it causes an update, cinfo.before will not match, which will
4160          * trigger a cache flush even if atomic is TRUE.
4161          */
4162         if (fp = rfs4_lookup_and_findfile(dvp, name, &vp, &error, cs->cr)) {
4163                 if (rfs4_check_delegated_byfp(FWRITE, fp, TRUE, TRUE, TRUE,
4164                     NULL)) {
4165                         VN_RELE(vp);
4166                         rfs4_file_rele(fp);
4167                         *cs->statusp = resp->status = NFS4ERR_DELAY;
4168                         if (nm != name)
4169                                 kmem_free(name, MAXPATHLEN + 1);
4170                         kmem_free(nm, len);
4171                         goto out;
4172                 }
4173         }
4174 
4175         /* Didn't find anything to remove */
4176         if (vp == NULL) {
4177                 *cs->statusp = resp->status = error;
4178                 if (nm != name)
4179                         kmem_free(name, MAXPATHLEN + 1);
4180                 kmem_free(nm, len);
4181                 goto out;
4182         }
4183 
4184         if (nbl_need_check(vp)) {
4185                 nbl_start_crit(vp, RW_READER);
4186                 in_crit = 1;
4187                 if (nbl_conflict(vp, NBL_REMOVE, 0, 0, 0, NULL)) {
4188                         *cs->statusp = resp->status = NFS4ERR_FILE_OPEN;
4189                         if (nm != name)
4190                                 kmem_free(name, MAXPATHLEN + 1);
4191                         kmem_free(nm, len);
4192                         nbl_end_crit(vp);
4193                         VN_RELE(vp);
4194                         if (fp) {
4195                                 rfs4_clear_dont_grant(fp);
4196                                 rfs4_file_rele(fp);
4197                         }
4198                         goto out;
4199                 }
4200         }
4201 
4202         /* check label before allowing removal */
4203         if (is_system_labeled()) {
4204                 ASSERT(req->rq_label != NULL);
4205                 clabel = req->rq_label;
4206                 DTRACE_PROBE2(tx__rfs4__log__info__opremove__clabel, char *,
4207                     "got client label from request(1)",
4208                     struct svc_req *, req);
4209                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
4210                         if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
4211                             cs->exi)) {
4212                                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4213                                 if (name != nm)
4214                                         kmem_free(name, MAXPATHLEN + 1);
4215                                 kmem_free(nm, len);
4216                                 if (in_crit)
4217                                         nbl_end_crit(vp);
4218                                 VN_RELE(vp);
4219                                 if (fp) {
4220                                         rfs4_clear_dont_grant(fp);
4221                                         rfs4_file_rele(fp);
4222                                 }
4223                                 goto out;
4224                         }
4225                 }
4226         }
4227 
4228         /* Get dir "before" change value */
4229         bdva.va_mask = AT_CTIME|AT_SEQ;
4230         error = VOP_GETATTR(dvp, &bdva, 0, cs->cr, NULL);
4231         if (error) {
4232                 *cs->statusp = resp->status = puterrno4(error);
4233                 if (nm != name)
4234                         kmem_free(name, MAXPATHLEN + 1);
4235                 kmem_free(nm, len);
4236                 if (in_crit)
4237                         nbl_end_crit(vp);
4238                 VN_RELE(vp);
4239                 if (fp) {
4240                         rfs4_clear_dont_grant(fp);
4241                         rfs4_file_rele(fp);
4242                 }
4243                 goto out;
4244         }
4245         NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bdva.va_ctime)
4246 
4247         /* Actually do the REMOVE operation */
4248         if (vp->v_type == VDIR) {
4249                 /*
4250                  * Can't remove a directory that has a mounted-on filesystem.
4251                  */
4252                 if (vn_ismntpt(vp)) {
4253                         error = EACCES;
4254                 } else {
4255                         /*
4256                          * System V defines rmdir to return EEXIST,
4257                          * not ENOTEMPTY, if the directory is not
4258                          * empty.  A System V NFS server needs to map
4259                          * NFS4ERR_EXIST to NFS4ERR_NOTEMPTY to
4260                          * transmit over the wire.
4261                          */
4262                         if ((error = VOP_RMDIR(dvp, name, ZONE_ROOTVP(), cs->cr,
4263                             NULL, 0)) == EEXIST)
4264                                 error = ENOTEMPTY;
4265                 }
4266         } else {
4267                 if ((error = VOP_REMOVE(dvp, name, cs->cr, NULL, 0)) == 0 &&
4268                     fp != NULL) {
4269                         struct vattr va;
4270                         vnode_t *tvp;
4271 
4272                         rfs4_dbe_lock(fp->rf_dbe);
4273                         tvp = fp->rf_vp;
4274                         if (tvp)
4275                                 VN_HOLD(tvp);
4276                         rfs4_dbe_unlock(fp->rf_dbe);
4277 
4278                         if (tvp) {
4279                                 /*
4280                                  * This is va_seq safe because we are not
4281                                  * manipulating dvp.
4282                                  */
4283                                 va.va_mask = AT_NLINK;
4284                                 if (!VOP_GETATTR(tvp, &va, 0, cs->cr, NULL) &&
4285                                     va.va_nlink == 0) {
4286                                         /* Remove state on file remove */
4287                                         if (in_crit) {
4288                                                 nbl_end_crit(vp);
4289                                                 in_crit = 0;
4290                                         }
4291                                         rfs4_close_all_state(fp);
4292                                 }
4293                                 VN_RELE(tvp);
4294                         }
4295                 }
4296         }
4297 
4298         if (in_crit)
4299                 nbl_end_crit(vp);
4300         VN_RELE(vp);
4301 
4302         if (fp) {
4303                 rfs4_clear_dont_grant(fp);
4304                 rfs4_file_rele(fp);
4305         }
4306         if (nm != name)
4307                 kmem_free(name, MAXPATHLEN + 1);
4308         kmem_free(nm, len);
4309 
4310         if (error) {
4311                 *cs->statusp = resp->status = puterrno4(error);
4312                 goto out;
4313         }
4314 
4315         /*
4316          * Get the initial "after" sequence number, if it fails, set to zero
4317          */
4318         idva.va_mask = AT_SEQ;
4319         if (VOP_GETATTR(dvp, &idva, 0, cs->cr, NULL))
4320                 idva.va_seq = 0;
4321 
4322         /*
4323          * Force modified data and metadata out to stable storage.
4324          */
4325         (void) VOP_FSYNC(dvp, 0, cs->cr, NULL);
4326 
4327         /*
4328          * Get "after" change value, if it fails, simply return the
4329          * before value.
4330          */
4331         adva.va_mask = AT_CTIME|AT_SEQ;
4332         if (VOP_GETATTR(dvp, &adva, 0, cs->cr, NULL)) {
4333                 adva.va_ctime = bdva.va_ctime;
4334                 adva.va_seq = 0;
4335         }
4336 
4337         NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, adva.va_ctime)
4338 
4339         /*
4340          * The cinfo.atomic = TRUE only if we have
4341          * non-zero va_seq's, and it has incremented by exactly one
4342          * during the VOP_REMOVE/RMDIR and it didn't change during
4343          * the VOP_FSYNC.
4344          */
4345         if (bdva.va_seq && idva.va_seq && adva.va_seq &&
4346             idva.va_seq == (bdva.va_seq + 1) && idva.va_seq == adva.va_seq)
4347                 resp->cinfo.atomic = TRUE;
4348         else
4349                 resp->cinfo.atomic = FALSE;
4350 
4351         *cs->statusp = resp->status = NFS4_OK;
4352 
4353 out:
4354         DTRACE_NFSV4_2(op__remove__done, struct compound_state *, cs,
4355             REMOVE4res *, resp);
4356 }
4357 
4358 /*
4359  * rename: args: SAVED_FH: from directory, CURRENT_FH: target directory,
4360  *              oldname and newname.
4361  *      res: status. If success - CURRENT_FH unchanged, return change_info
4362  *              for both from and target directories.
4363  */
4364 /* ARGSUSED */
4365 static void
4366 rfs4_op_rename(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4367     struct compound_state *cs)
4368 {
4369         RENAME4args *args = &argop->nfs_argop4_u.oprename;
4370         RENAME4res *resp = &resop->nfs_resop4_u.oprename;
4371         int error;
4372         vnode_t *odvp;
4373         vnode_t *ndvp;
4374         vnode_t *srcvp, *targvp, *tvp;
4375         struct vattr obdva, oidva, oadva;
4376         struct vattr nbdva, nidva, nadva;
4377         char *onm, *nnm;
4378         uint_t olen, nlen;
4379         rfs4_file_t *fp, *sfp;
4380         int in_crit_src, in_crit_targ;
4381         int fp_rele_grant_hold, sfp_rele_grant_hold;
4382         int unlinked;
4383         bslabel_t *clabel;
4384         struct sockaddr *ca;
4385         char *converted_onm = NULL;
4386         char *converted_nnm = NULL;
4387         nfsstat4 status;
4388 
4389         DTRACE_NFSV4_2(op__rename__start, struct compound_state *, cs,
4390             RENAME4args *, args);
4391 
4392         fp = sfp = NULL;
4393         srcvp = targvp = tvp = NULL;
4394         in_crit_src = in_crit_targ = 0;
4395         fp_rele_grant_hold = sfp_rele_grant_hold = 0;
4396         unlinked = 0;
4397 
4398         /* CURRENT_FH: target directory */
4399         ndvp = cs->vp;
4400         if (ndvp == NULL) {
4401                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4402                 goto out;
4403         }
4404 
4405         /* SAVED_FH: from directory */
4406         odvp = cs->saved_vp;
4407         if (odvp == NULL) {
4408                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4409                 goto out;
4410         }
4411 
4412         if (cs->access == CS_ACCESS_DENIED) {
4413                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4414                 goto out;
4415         }
4416 
4417         /*
4418          * If there is an unshared filesystem mounted on this vnode,
4419          * do not allow to rename objects in this directory.
4420          */
4421         if (vn_ismntpt(odvp)) {
4422                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4423                 goto out;
4424         }
4425 
4426         /*
4427          * If there is an unshared filesystem mounted on this vnode,
4428          * do not allow to rename to this directory.
4429          */
4430         if (vn_ismntpt(ndvp)) {
4431                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4432                 goto out;
4433         }
4434 
4435         if (odvp->v_type != VDIR || ndvp->v_type != VDIR) {
4436                 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
4437                 goto out;
4438         }
4439 
4440         if (cs->saved_exi != cs->exi) {
4441                 *cs->statusp = resp->status = NFS4ERR_XDEV;
4442                 goto out;
4443         }
4444 
4445         status = utf8_dir_verify(&args->oldname);
4446         if (status != NFS4_OK) {
4447                 *cs->statusp = resp->status = status;
4448                 goto out;
4449         }
4450 
4451         status = utf8_dir_verify(&args->newname);
4452         if (status != NFS4_OK) {
4453                 *cs->statusp = resp->status = status;
4454                 goto out;
4455         }
4456 
4457         onm = utf8_to_fn(&args->oldname, &olen, NULL);
4458         if (onm == NULL) {
4459                 *cs->statusp = resp->status = NFS4ERR_INVAL;
4460                 goto out;
4461         }
4462         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
4463         nlen = MAXPATHLEN + 1;
4464         converted_onm = nfscmd_convname(ca, cs->exi, onm, NFSCMD_CONV_INBOUND,
4465             nlen);
4466 
4467         if (converted_onm == NULL) {
4468                 *cs->statusp = resp->status = NFS4ERR_INVAL;
4469                 kmem_free(onm, olen);
4470                 goto out;
4471         }
4472 
4473         nnm = utf8_to_fn(&args->newname, &nlen, NULL);
4474         if (nnm == NULL) {
4475                 *cs->statusp = resp->status = NFS4ERR_INVAL;
4476                 if (onm != converted_onm)
4477                         kmem_free(converted_onm, MAXPATHLEN + 1);
4478                 kmem_free(onm, olen);
4479                 goto out;
4480         }
4481         converted_nnm = nfscmd_convname(ca, cs->exi, nnm, NFSCMD_CONV_INBOUND,
4482             MAXPATHLEN  + 1);
4483 
4484         if (converted_nnm == NULL) {
4485                 *cs->statusp = resp->status = NFS4ERR_INVAL;
4486                 kmem_free(nnm, nlen);
4487                 nnm = NULL;
4488                 if (onm != converted_onm)
4489                         kmem_free(converted_onm, MAXPATHLEN + 1);
4490                 kmem_free(onm, olen);
4491                 goto out;
4492         }
4493 
4494 
4495         if (olen > MAXNAMELEN || nlen > MAXNAMELEN) {
4496                 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
4497                 kmem_free(onm, olen);
4498                 kmem_free(nnm, nlen);
4499                 goto out;
4500         }
4501 
4502 
4503         if (rdonly4(req, cs)) {
4504                 *cs->statusp = resp->status = NFS4ERR_ROFS;
4505                 if (onm != converted_onm)
4506                         kmem_free(converted_onm, MAXPATHLEN + 1);
4507                 kmem_free(onm, olen);
4508                 if (nnm != converted_nnm)
4509                         kmem_free(converted_nnm, MAXPATHLEN + 1);
4510                 kmem_free(nnm, nlen);
4511                 goto out;
4512         }
4513 
4514         /* check label of the target dir */
4515         if (is_system_labeled()) {
4516                 ASSERT(req->rq_label != NULL);
4517                 clabel = req->rq_label;
4518                 DTRACE_PROBE2(tx__rfs4__log__info__oprename__clabel, char *,
4519                     "got client label from request(1)",
4520                     struct svc_req *, req);
4521                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
4522                         if (!do_rfs_label_check(clabel, ndvp,
4523                             EQUALITY_CHECK, cs->exi)) {
4524                                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4525                                 goto err_out;
4526                         }
4527                 }
4528         }
4529 
4530         /*
4531          * Is the source a file and have a delegation?
4532          * We don't need to acquire va_seq before these lookups, if
4533          * it causes an update, cinfo.before will not match, which will
4534          * trigger a cache flush even if atomic is TRUE.
4535          */
4536         if (sfp = rfs4_lookup_and_findfile(odvp, converted_onm, &srcvp,
4537             &error, cs->cr)) {
4538                 if (rfs4_check_delegated_byfp(FWRITE, sfp, TRUE, TRUE, TRUE,
4539                     NULL)) {
4540                         *cs->statusp = resp->status = NFS4ERR_DELAY;
4541                         goto err_out;
4542                 }
4543         }
4544 
4545         if (srcvp == NULL) {
4546                 *cs->statusp = resp->status = puterrno4(error);
4547                 if (onm != converted_onm)
4548                         kmem_free(converted_onm, MAXPATHLEN + 1);
4549                 kmem_free(onm, olen);
4550                 if (nnm != converted_nnm)
4551                         kmem_free(converted_nnm, MAXPATHLEN + 1);
4552                 kmem_free(nnm, nlen);
4553                 goto out;
4554         }
4555 
4556         sfp_rele_grant_hold = 1;
4557 
4558         /* Does the destination exist and a file and have a delegation? */
4559         if (fp = rfs4_lookup_and_findfile(ndvp, converted_nnm, &targvp,
4560             NULL, cs->cr)) {
4561                 if (rfs4_check_delegated_byfp(FWRITE, fp, TRUE, TRUE, TRUE,
4562                     NULL)) {
4563                         *cs->statusp = resp->status = NFS4ERR_DELAY;
4564                         goto err_out;
4565                 }
4566         }
4567         fp_rele_grant_hold = 1;
4568 
4569         /* Check for NBMAND lock on both source and target */
4570         if (nbl_need_check(srcvp)) {
4571                 nbl_start_crit(srcvp, RW_READER);
4572                 in_crit_src = 1;
4573                 if (nbl_conflict(srcvp, NBL_RENAME, 0, 0, 0, NULL)) {
4574                         *cs->statusp = resp->status = NFS4ERR_FILE_OPEN;
4575                         goto err_out;
4576                 }
4577         }
4578 
4579         if (targvp && nbl_need_check(targvp)) {
4580                 nbl_start_crit(targvp, RW_READER);
4581                 in_crit_targ = 1;
4582                 if (nbl_conflict(targvp, NBL_REMOVE, 0, 0, 0, NULL)) {
4583                         *cs->statusp = resp->status = NFS4ERR_FILE_OPEN;
4584                         goto err_out;
4585                 }
4586         }
4587 
4588         /* Get source "before" change value */
4589         obdva.va_mask = AT_CTIME|AT_SEQ;
4590         error = VOP_GETATTR(odvp, &obdva, 0, cs->cr, NULL);
4591         if (!error) {
4592                 nbdva.va_mask = AT_CTIME|AT_SEQ;
4593                 error = VOP_GETATTR(ndvp, &nbdva, 0, cs->cr, NULL);
4594         }
4595         if (error) {
4596                 *cs->statusp = resp->status = puterrno4(error);
4597                 goto err_out;
4598         }
4599 
4600         NFS4_SET_FATTR4_CHANGE(resp->source_cinfo.before, obdva.va_ctime)
4601         NFS4_SET_FATTR4_CHANGE(resp->target_cinfo.before, nbdva.va_ctime)
4602 
4603         error = VOP_RENAME(odvp, converted_onm, ndvp, converted_nnm, cs->cr,
4604             NULL, 0);
4605 
4606         /*
4607          * If target existed and was unlinked by VOP_RENAME, state will need
4608          * closed. To avoid deadlock, rfs4_close_all_state will be done after
4609          * any necessary nbl_end_crit on srcvp and tgtvp.
4610          */
4611         if (error == 0 && fp != NULL) {
4612                 rfs4_dbe_lock(fp->rf_dbe);
4613                 tvp = fp->rf_vp;
4614                 if (tvp)
4615                         VN_HOLD(tvp);
4616                 rfs4_dbe_unlock(fp->rf_dbe);
4617 
4618                 if (tvp) {
4619                         struct vattr va;
4620                         va.va_mask = AT_NLINK;
4621 
4622                         if (!VOP_GETATTR(tvp, &va, 0, cs->cr, NULL) &&
4623                             va.va_nlink == 0) {
4624                                 unlinked = 1;
4625 
4626                                 /* DEBUG data */
4627                                 if ((srcvp == targvp) || (tvp != targvp)) {
4628                                         cmn_err(CE_WARN, "rfs4_op_rename: "
4629                                             "srcvp %p, targvp: %p, tvp: %p",
4630                                             (void *)srcvp, (void *)targvp,
4631                                             (void *)tvp);
4632                                 }
4633                         } else {
4634                                 VN_RELE(tvp);
4635                         }
4636                 }
4637         }
4638         if (error == 0)
4639                 vn_renamepath(ndvp, srcvp, nnm, nlen - 1);
4640 
4641         if (in_crit_src)
4642                 nbl_end_crit(srcvp);
4643         if (srcvp)
4644                 VN_RELE(srcvp);
4645         if (in_crit_targ)
4646                 nbl_end_crit(targvp);
4647         if (targvp)
4648                 VN_RELE(targvp);
4649 
4650         if (unlinked) {
4651                 ASSERT(fp != NULL);
4652                 ASSERT(tvp != NULL);
4653 
4654                 /* DEBUG data */
4655                 if (RW_READ_HELD(&tvp->v_nbllock)) {
4656                         cmn_err(CE_WARN, "rfs4_op_rename: "
4657                             "RW_READ_HELD(%p)", (void *)tvp);
4658                 }
4659 
4660                 /* The file is gone and so should the state */
4661                 rfs4_close_all_state(fp);
4662                 VN_RELE(tvp);
4663         }
4664 
4665         if (sfp) {
4666                 rfs4_clear_dont_grant(sfp);
4667                 rfs4_file_rele(sfp);
4668         }
4669         if (fp) {
4670                 rfs4_clear_dont_grant(fp);
4671                 rfs4_file_rele(fp);
4672         }
4673 
4674         if (converted_onm != onm)
4675                 kmem_free(converted_onm, MAXPATHLEN + 1);
4676         kmem_free(onm, olen);
4677         if (converted_nnm != nnm)
4678                 kmem_free(converted_nnm, MAXPATHLEN + 1);
4679         kmem_free(nnm, nlen);
4680 
4681         /*
4682          * Get the initial "after" sequence number, if it fails, set to zero
4683          */
4684         oidva.va_mask = AT_SEQ;
4685         if (VOP_GETATTR(odvp, &oidva, 0, cs->cr, NULL))
4686                 oidva.va_seq = 0;
4687 
4688         nidva.va_mask = AT_SEQ;
4689         if (VOP_GETATTR(ndvp, &nidva, 0, cs->cr, NULL))
4690                 nidva.va_seq = 0;
4691 
4692         /*
4693          * Force modified data and metadata out to stable storage.
4694          */
4695         (void) VOP_FSYNC(odvp, 0, cs->cr, NULL);
4696         (void) VOP_FSYNC(ndvp, 0, cs->cr, NULL);
4697 
4698         if (error) {
4699                 *cs->statusp = resp->status = puterrno4(error);
4700                 goto out;
4701         }
4702 
4703         /*
4704          * Get "after" change values, if it fails, simply return the
4705          * before value.
4706          */
4707         oadva.va_mask = AT_CTIME|AT_SEQ;
4708         if (VOP_GETATTR(odvp, &oadva, 0, cs->cr, NULL)) {
4709                 oadva.va_ctime = obdva.va_ctime;
4710                 oadva.va_seq = 0;
4711         }
4712 
4713         nadva.va_mask = AT_CTIME|AT_SEQ;
4714         if (VOP_GETATTR(odvp, &nadva, 0, cs->cr, NULL)) {
4715                 nadva.va_ctime = nbdva.va_ctime;
4716                 nadva.va_seq = 0;
4717         }
4718 
4719         NFS4_SET_FATTR4_CHANGE(resp->source_cinfo.after, oadva.va_ctime)
4720         NFS4_SET_FATTR4_CHANGE(resp->target_cinfo.after, nadva.va_ctime)
4721 
4722         /*
4723          * The cinfo.atomic = TRUE only if we have
4724          * non-zero va_seq's, and it has incremented by exactly one
4725          * during the VOP_RENAME and it didn't change during the VOP_FSYNC.
4726          */
4727         if (obdva.va_seq && oidva.va_seq && oadva.va_seq &&
4728             oidva.va_seq == (obdva.va_seq + 1) && oidva.va_seq == oadva.va_seq)
4729                 resp->source_cinfo.atomic = TRUE;
4730         else
4731                 resp->source_cinfo.atomic = FALSE;
4732 
4733         if (nbdva.va_seq && nidva.va_seq && nadva.va_seq &&
4734             nidva.va_seq == (nbdva.va_seq + 1) && nidva.va_seq == nadva.va_seq)
4735                 resp->target_cinfo.atomic = TRUE;
4736         else
4737                 resp->target_cinfo.atomic = FALSE;
4738 
4739 #ifdef  VOLATILE_FH_TEST
4740         {
4741         extern void add_volrnm_fh(struct exportinfo *, vnode_t *);
4742 
4743         /*
4744          * Add the renamed file handle to the volatile rename list
4745          */
4746         if (cs->exi->exi_export.ex_flags & EX_VOLRNM) {
4747                 /* file handles may expire on rename */
4748                 vnode_t *vp;
4749 
4750                 nnm = utf8_to_fn(&args->newname, &nlen, NULL);
4751                 /*
4752                  * Already know that nnm will be a valid string
4753                  */
4754                 error = VOP_LOOKUP(ndvp, nnm, &vp, NULL, 0, NULL, cs->cr,
4755                     NULL, NULL, NULL);
4756                 kmem_free(nnm, nlen);
4757                 if (!error) {
4758                         add_volrnm_fh(cs->exi, vp);
4759                         VN_RELE(vp);
4760                 }
4761         }
4762         }
4763 #endif  /* VOLATILE_FH_TEST */
4764 
4765         *cs->statusp = resp->status = NFS4_OK;
4766 out:
4767         DTRACE_NFSV4_2(op__rename__done, struct compound_state *, cs,
4768             RENAME4res *, resp);
4769         return;
4770 
4771 err_out:
4772         if (onm != converted_onm)
4773                 kmem_free(converted_onm, MAXPATHLEN + 1);
4774         if (onm != NULL)
4775                 kmem_free(onm, olen);
4776         if (nnm != converted_nnm)
4777                 kmem_free(converted_nnm, MAXPATHLEN + 1);
4778         if (nnm != NULL)
4779                 kmem_free(nnm, nlen);
4780 
4781         if (in_crit_src) nbl_end_crit(srcvp);
4782         if (in_crit_targ) nbl_end_crit(targvp);
4783         if (targvp) VN_RELE(targvp);
4784         if (srcvp) VN_RELE(srcvp);
4785         if (sfp) {
4786                 if (sfp_rele_grant_hold) rfs4_clear_dont_grant(sfp);
4787                 rfs4_file_rele(sfp);
4788         }
4789         if (fp) {
4790                 if (fp_rele_grant_hold) rfs4_clear_dont_grant(fp);
4791                 rfs4_file_rele(fp);
4792         }
4793 
4794         DTRACE_NFSV4_2(op__rename__done, struct compound_state *, cs,
4795             RENAME4res *, resp);
4796 }
4797 
4798 /* ARGSUSED */
4799 static void
4800 rfs4_op_renew(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4801     struct compound_state *cs)
4802 {
4803         RENEW4args *args = &argop->nfs_argop4_u.oprenew;
4804         RENEW4res *resp = &resop->nfs_resop4_u.oprenew;
4805         rfs4_client_t *cp;
4806 
4807         DTRACE_NFSV4_2(op__renew__start, struct compound_state *, cs,
4808             RENEW4args *, args);
4809 
4810         if ((cp = rfs4_findclient_by_id(args->clientid, FALSE)) == NULL) {
4811                 *cs->statusp = resp->status =
4812                     rfs4_check_clientid(&args->clientid, 0);
4813                 goto out;
4814         }
4815 
4816         if (rfs4_lease_expired(cp)) {
4817                 rfs4_client_rele(cp);
4818                 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
4819                 goto out;
4820         }
4821 
4822         rfs4_update_lease(cp);
4823 
4824         mutex_enter(cp->rc_cbinfo.cb_lock);
4825         if (cp->rc_cbinfo.cb_notified_of_cb_path_down == FALSE) {
4826                 cp->rc_cbinfo.cb_notified_of_cb_path_down = TRUE;
4827                 *cs->statusp = resp->status = NFS4ERR_CB_PATH_DOWN;
4828         } else {
4829                 *cs->statusp = resp->status = NFS4_OK;
4830         }
4831         mutex_exit(cp->rc_cbinfo.cb_lock);
4832 
4833         rfs4_client_rele(cp);
4834 
4835 out:
4836         DTRACE_NFSV4_2(op__renew__done, struct compound_state *, cs,
4837             RENEW4res *, resp);
4838 }
4839 
4840 /* ARGSUSED */
4841 static void
4842 rfs4_op_restorefh(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req,
4843     struct compound_state *cs)
4844 {
4845         RESTOREFH4res *resp = &resop->nfs_resop4_u.oprestorefh;
4846 
4847         DTRACE_NFSV4_1(op__restorefh__start, struct compound_state *, cs);
4848 
4849         /* No need to check cs->access - we are not accessing any object */
4850         if ((cs->saved_vp == NULL) || (cs->saved_fh.nfs_fh4_val == NULL)) {
4851                 *cs->statusp = resp->status = NFS4ERR_RESTOREFH;
4852                 goto out;
4853         }
4854         if (cs->vp != NULL) {
4855                 VN_RELE(cs->vp);
4856         }
4857         cs->vp = cs->saved_vp;
4858         cs->saved_vp = NULL;
4859         cs->exi = cs->saved_exi;
4860         nfs_fh4_copy(&cs->saved_fh, &cs->fh);
4861         *cs->statusp = resp->status = NFS4_OK;
4862         cs->deleg = FALSE;
4863 
4864 out:
4865         DTRACE_NFSV4_2(op__restorefh__done, struct compound_state *, cs,
4866             RESTOREFH4res *, resp);
4867 }
4868 
4869 /* ARGSUSED */
4870 static void
4871 rfs4_op_savefh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4872     struct compound_state *cs)
4873 {
4874         SAVEFH4res *resp = &resop->nfs_resop4_u.opsavefh;
4875 
4876         DTRACE_NFSV4_1(op__savefh__start, struct compound_state *, cs);
4877 
4878         /* No need to check cs->access - we are not accessing any object */
4879         if (cs->vp == NULL) {
4880                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4881                 goto out;
4882         }
4883         if (cs->saved_vp != NULL) {
4884                 VN_RELE(cs->saved_vp);
4885         }
4886         cs->saved_vp = cs->vp;
4887         VN_HOLD(cs->saved_vp);
4888         cs->saved_exi = cs->exi;
4889         /*
4890          * since SAVEFH is fairly rare, don't alloc space for its fh
4891          * unless necessary.
4892          */
4893         if (cs->saved_fh.nfs_fh4_val == NULL) {
4894                 cs->saved_fh.nfs_fh4_val = kmem_alloc(NFS4_FHSIZE, KM_SLEEP);
4895         }
4896         nfs_fh4_copy(&cs->fh, &cs->saved_fh);
4897         *cs->statusp = resp->status = NFS4_OK;
4898 
4899 out:
4900         DTRACE_NFSV4_2(op__savefh__done, struct compound_state *, cs,
4901             SAVEFH4res *, resp);
4902 }
4903 
4904 /*
4905  * rfs4_verify_attr is called when nfsv4 Setattr failed, but we wish to
4906  * return the bitmap of attrs that were set successfully. It is also
4907  * called by Verify/Nverify to test the vattr/vfsstat attrs. It should
4908  * always be called only after rfs4_do_set_attrs().
4909  *
4910  * Verify that the attributes are same as the expected ones. sargp->vap
4911  * and sargp->sbp contain the input attributes as translated from fattr4.
4912  *
4913  * This function verifies only the attrs that correspond to a vattr or
4914  * vfsstat struct. That is because of the extra step needed to get the
4915  * corresponding system structs. Other attributes have already been set or
4916  * verified by do_rfs4_set_attrs.
4917  *
4918  * Return 0 if all attrs match, -1 if some don't, error if error processing.
4919  */
4920 static int
4921 rfs4_verify_attr(struct nfs4_svgetit_arg *sargp,
4922     bitmap4 *resp, struct nfs4_ntov_table *ntovp)
4923 {
4924         int error, ret_error = 0;
4925         int i, k;
4926         uint_t sva_mask = sargp->vap->va_mask;
4927         uint_t vbit;
4928         union nfs4_attr_u *na;
4929         uint8_t *amap;
4930         bool_t getsb = ntovp->vfsstat;
4931 
4932         if (sva_mask != 0) {
4933                 /*
4934                  * Okay to overwrite sargp->vap because we verify based
4935                  * on the incoming values.
4936                  */
4937                 ret_error = VOP_GETATTR(sargp->cs->vp, sargp->vap, 0,
4938                     sargp->cs->cr, NULL);
4939                 if (ret_error) {
4940                         if (resp == NULL)
4941                                 return (ret_error);
4942                         /*
4943                          * Must return bitmap of successful attrs
4944                          */
4945                         sva_mask = 0;   /* to prevent checking vap later */
4946                 } else {
4947                         /*
4948                          * Some file systems clobber va_mask. it is probably
4949                          * wrong of them to do so, nonethless we practice
4950                          * defensive coding.
4951                          * See bug id 4276830.
4952                          */
4953                         sargp->vap->va_mask = sva_mask;
4954                 }
4955         }
4956 
4957         if (getsb) {
4958                 /*
4959                  * Now get the superblock and loop on the bitmap, as there is
4960                  * no simple way of translating from superblock to bitmap4.
4961                  */
4962                 ret_error = VFS_STATVFS(sargp->cs->vp->v_vfsp, sargp->sbp);
4963                 if (ret_error) {
4964                         if (resp == NULL)
4965                                 goto errout;
4966                         getsb = FALSE;
4967                 }
4968         }
4969 
4970         /*
4971          * Now loop and verify each attribute which getattr returned
4972          * whether it's the same as the input.
4973          */
4974         if (resp == NULL && !getsb && (sva_mask == 0))
4975                 goto errout;
4976 
4977         na = ntovp->na;
4978         amap = ntovp->amap;
4979         k = 0;
4980         for (i = 0; i < ntovp->attrcnt; i++, na++, amap++) {
4981                 k = *amap;
4982                 ASSERT(nfs4_ntov_map[k].nval == k);
4983                 vbit = nfs4_ntov_map[k].vbit;
4984 
4985                 /*
4986                  * If vattr attribute but VOP_GETATTR failed, or it's
4987                  * superblock attribute but VFS_STATVFS failed, skip
4988                  */
4989                 if (vbit) {
4990                         if ((vbit & sva_mask) == 0)
4991                                 continue;
4992                 } else if (!(getsb && nfs4_ntov_map[k].vfsstat)) {
4993                         continue;
4994                 }
4995                 error = (*nfs4_ntov_map[k].sv_getit)(NFS4ATTR_VERIT, sargp, na);
4996                 if (resp != NULL) {
4997                         if (error)
4998                                 ret_error = -1; /* not all match */
4999                         else    /* update response bitmap */
5000                                 *resp |= nfs4_ntov_map[k].fbit;
5001                         continue;
5002                 }
5003                 if (error) {
5004                         ret_error = -1; /* not all match */
5005                         break;
5006                 }
5007         }
5008 errout:
5009         return (ret_error);
5010 }
5011 
5012 /*
5013  * Decode the attribute to be set/verified. If the attr requires a sys op
5014  * (VOP_GETATTR, VFS_VFSSTAT), and the request is to verify, then don't
5015  * call the sv_getit function for it, because the sys op hasn't yet been done.
5016  * Return 0 for success, error code if failed.
5017  *
5018  * Note: the decoded arg is not freed here but in nfs4_ntov_table_free.
5019  */
5020 static int
5021 decode_fattr4_attr(nfs4_attr_cmd_t cmd, struct nfs4_svgetit_arg *sargp,
5022     int k, XDR *xdrp, bitmap4 *resp_bval, union nfs4_attr_u *nap)
5023 {
5024         int error = 0;
5025         bool_t set_later;
5026 
5027         sargp->vap->va_mask |= nfs4_ntov_map[k].vbit;
5028 
5029         if ((*nfs4_ntov_map[k].xfunc)(xdrp, nap)) {
5030                 set_later = nfs4_ntov_map[k].vbit || nfs4_ntov_map[k].vfsstat;
5031                 /*
5032                  * don't verify yet if a vattr or sb dependent attr,
5033                  * because we don't have their sys values yet.
5034                  * Will be done later.
5035                  */
5036                 if (! (set_later && (cmd == NFS4ATTR_VERIT))) {
5037                         /*
5038                          * ACLs are a special case, since setting the MODE
5039                          * conflicts with setting the ACL.  We delay setting
5040                          * the ACL until all other attributes have been set.
5041                          * The ACL gets set in do_rfs4_op_setattr().
5042                          */
5043                         if (nfs4_ntov_map[k].fbit != FATTR4_ACL_MASK) {
5044                                 error = (*nfs4_ntov_map[k].sv_getit)(cmd,
5045                                     sargp, nap);
5046                                 if (error) {
5047                                         xdr_free(nfs4_ntov_map[k].xfunc,
5048                                             (caddr_t)nap);
5049                                 }
5050                         }
5051                 }
5052         } else {
5053 #ifdef  DEBUG
5054                 cmn_err(CE_NOTE, "decode_fattr4_attr: error "
5055                     "decoding attribute %d\n", k);
5056 #endif
5057                 error = EINVAL;
5058         }
5059         if (!error && resp_bval && !set_later) {
5060                 *resp_bval |= nfs4_ntov_map[k].fbit;
5061         }
5062 
5063         return (error);
5064 }
5065 
5066 /*
5067  * Set vattr based on incoming fattr4 attrs - used by setattr.
5068  * Set response mask. Ignore any values that are not writable vattr attrs.
5069  */
5070 static nfsstat4
5071 do_rfs4_set_attrs(bitmap4 *resp, fattr4 *fattrp, struct compound_state *cs,
5072     struct nfs4_svgetit_arg *sargp, struct nfs4_ntov_table *ntovp,
5073     nfs4_attr_cmd_t cmd)
5074 {
5075         int error = 0;
5076         int i;
5077         char *attrs = fattrp->attrlist4;
5078         uint32_t attrslen = fattrp->attrlist4_len;
5079         XDR xdr;
5080         nfsstat4 status = NFS4_OK;
5081         vnode_t *vp = cs->vp;
5082         union nfs4_attr_u *na;
5083         uint8_t *amap;
5084 
5085 #ifndef lint
5086         /*
5087          * Make sure that maximum attribute number can be expressed as an
5088          * 8 bit quantity.
5089          */
5090         ASSERT(NFS4_MAXNUM_ATTRS <= (UINT8_MAX + 1));
5091 #endif
5092 
5093         if (vp == NULL) {
5094                 if (resp)
5095                         *resp = 0;
5096                 return (NFS4ERR_NOFILEHANDLE);
5097         }
5098         if (cs->access == CS_ACCESS_DENIED) {
5099                 if (resp)
5100                         *resp = 0;
5101                 return (NFS4ERR_ACCESS);
5102         }
5103 
5104         sargp->op = cmd;
5105         sargp->cs = cs;
5106         sargp->flag = 0;     /* may be set later */
5107         sargp->vap->va_mask = 0;
5108         sargp->rdattr_error = NFS4_OK;
5109         sargp->rdattr_error_req = FALSE;
5110         /* sargp->sbp is set by the caller */
5111 
5112         xdrmem_create(&xdr, attrs, attrslen, XDR_DECODE);
5113 
5114         na = ntovp->na;
5115         amap = ntovp->amap;
5116 
5117         /*
5118          * The following loop iterates on the nfs4_ntov_map checking
5119          * if the fbit is set in the requested bitmap.
5120          * If set then we process the arguments using the
5121          * rfs4_fattr4 conversion functions to populate the setattr
5122          * vattr and va_mask. Any settable attrs that are not using vattr
5123          * will be set in this loop.
5124          */
5125         for (i = 0; i < nfs4_ntov_map_size; i++) {
5126                 if (!(fattrp->attrmask & nfs4_ntov_map[i].fbit)) {
5127                         continue;
5128                 }
5129                 /*
5130                  * If setattr, must be a writable attr.
5131                  * If verify/nverify, must be a readable attr.
5132                  */
5133                 if ((error = (*nfs4_ntov_map[i].sv_getit)(
5134                     NFS4ATTR_SUPPORTED, sargp, NULL)) != 0) {
5135                         /*
5136                          * Client tries to set/verify an
5137                          * unsupported attribute, tries to set
5138                          * a read only attr or verify a write
5139                          * only one - error!
5140                          */
5141                         break;
5142                 }
5143                 /*
5144                  * Decode the attribute to set/verify
5145                  */
5146                 error = decode_fattr4_attr(cmd, sargp, nfs4_ntov_map[i].nval,
5147                     &xdr, resp ? resp : NULL, na);
5148                 if (error)
5149                         break;
5150                 *amap++ = (uint8_t)nfs4_ntov_map[i].nval;
5151                 na++;
5152                 (ntovp->attrcnt)++;
5153                 if (nfs4_ntov_map[i].vfsstat)
5154                         ntovp->vfsstat = TRUE;
5155         }
5156 
5157         if (error != 0)
5158                 status = (error == ENOTSUP ? NFS4ERR_ATTRNOTSUPP :
5159                     puterrno4(error));
5160         /* xdrmem_destroy(&xdrs); */        /* NO-OP */
5161         return (status);
5162 }
5163 
5164 static nfsstat4
5165 do_rfs4_op_setattr(bitmap4 *resp, fattr4 *fattrp, struct compound_state *cs,
5166     stateid4 *stateid)
5167 {
5168         int error = 0;
5169         struct nfs4_svgetit_arg sarg;
5170         bool_t trunc;
5171 
5172         nfsstat4 status = NFS4_OK;
5173         cred_t *cr = cs->cr;
5174         vnode_t *vp = cs->vp;
5175         struct nfs4_ntov_table ntov;
5176         struct statvfs64 sb;
5177         struct vattr bva;
5178         struct flock64 bf;
5179         int in_crit = 0;
5180         uint_t saved_mask = 0;
5181         caller_context_t ct;
5182 
5183         *resp = 0;
5184         sarg.sbp = &sb;
5185         sarg.is_referral = B_FALSE;
5186         nfs4_ntov_table_init(&ntov);
5187         status = do_rfs4_set_attrs(resp, fattrp, cs, &sarg, &ntov,
5188             NFS4ATTR_SETIT);
5189         if (status != NFS4_OK) {
5190                 /*
5191                  * failed set attrs
5192                  */
5193                 goto done;
5194         }
5195         if ((sarg.vap->va_mask == 0) &&
5196             (! (fattrp->attrmask & FATTR4_ACL_MASK))) {
5197                 /*
5198                  * no further work to be done
5199                  */
5200                 goto done;
5201         }
5202 
5203         /*
5204          * If we got a request to set the ACL and the MODE, only
5205          * allow changing VSUID, VSGID, and VSVTX.  Attempting
5206          * to change any other bits, along with setting an ACL,
5207          * gives NFS4ERR_INVAL.
5208          */
5209         if ((fattrp->attrmask & FATTR4_ACL_MASK) &&
5210             (fattrp->attrmask & FATTR4_MODE_MASK)) {
5211                 vattr_t va;
5212 
5213                 va.va_mask = AT_MODE;
5214                 error = VOP_GETATTR(vp, &va, 0, cs->cr, NULL);
5215                 if (error) {
5216                         status = puterrno4(error);
5217                         goto done;
5218                 }
5219                 if ((sarg.vap->va_mode ^ va.va_mode) &
5220                     ~(VSUID | VSGID | VSVTX)) {
5221                         status = NFS4ERR_INVAL;
5222                         goto done;
5223                 }
5224         }
5225 
5226         /* Check stateid only if size has been set */
5227         if (sarg.vap->va_mask & AT_SIZE) {
5228                 trunc = (sarg.vap->va_size == 0);
5229                 status = rfs4_check_stateid(FWRITE, cs->vp, stateid,
5230                     trunc, &cs->deleg, sarg.vap->va_mask & AT_SIZE, &ct);
5231                 if (status != NFS4_OK)
5232                         goto done;
5233         } else {
5234                 ct.cc_sysid = 0;
5235                 ct.cc_pid = 0;
5236                 ct.cc_caller_id = nfs4_srv_caller_id;
5237                 ct.cc_flags = CC_DONTBLOCK;
5238         }
5239 
5240         /* XXX start of possible race with delegations */
5241 
5242         /*
5243          * We need to specially handle size changes because it is
5244          * possible for the client to create a file with read-only
5245          * modes, but with the file opened for writing. If the client
5246          * then tries to set the file size, e.g. ftruncate(3C),
5247          * fcntl(F_FREESP), the normal access checking done in
5248          * VOP_SETATTR would prevent the client from doing it even though
5249          * it should be allowed to do so.  To get around this, we do the
5250          * access checking for ourselves and use VOP_SPACE which doesn't
5251          * do the access checking.
5252          * Also the client should not be allowed to change the file
5253          * size if there is a conflicting non-blocking mandatory lock in
5254          * the region of the change.
5255          */
5256         if (vp->v_type == VREG && (sarg.vap->va_mask & AT_SIZE)) {
5257                 u_offset_t offset;
5258                 ssize_t length;
5259 
5260                 /*
5261                  * ufs_setattr clears AT_SIZE from vap->va_mask, but
5262                  * before returning, sarg.vap->va_mask is used to
5263                  * generate the setattr reply bitmap.  We also clear
5264                  * AT_SIZE below before calling VOP_SPACE.  For both
5265                  * of these cases, the va_mask needs to be saved here
5266                  * and restored after calling VOP_SETATTR.
5267                  */
5268                 saved_mask = sarg.vap->va_mask;
5269 
5270                 /*
5271                  * Check any possible conflict due to NBMAND locks.
5272                  * Get into critical region before VOP_GETATTR, so the
5273                  * size attribute is valid when checking conflicts.
5274                  */
5275                 if (nbl_need_check(vp)) {
5276                         nbl_start_crit(vp, RW_READER);
5277                         in_crit = 1;
5278                 }
5279 
5280                 bva.va_mask = AT_UID|AT_SIZE;
5281                 if (error = VOP_GETATTR(vp, &bva, 0, cr, &ct)) {
5282                         status = puterrno4(error);
5283                         goto done;
5284                 }
5285 
5286                 if (in_crit) {
5287                         if (sarg.vap->va_size < bva.va_size) {
5288                                 offset = sarg.vap->va_size;
5289                                 length = bva.va_size - sarg.vap->va_size;
5290                         } else {
5291                                 offset = bva.va_size;
5292                                 length = sarg.vap->va_size - bva.va_size;
5293                         }
5294                         if (nbl_conflict(vp, NBL_WRITE, offset, length, 0,
5295                             &ct)) {
5296                                 status = NFS4ERR_LOCKED;
5297                                 goto done;
5298                         }
5299                 }
5300 
5301                 if (crgetuid(cr) == bva.va_uid) {
5302                         sarg.vap->va_mask &= ~AT_SIZE;
5303                         bf.l_type = F_WRLCK;
5304                         bf.l_whence = 0;
5305                         bf.l_start = (off64_t)sarg.vap->va_size;
5306                         bf.l_len = 0;
5307                         bf.l_sysid = 0;
5308                         bf.l_pid = 0;
5309                         error = VOP_SPACE(vp, F_FREESP, &bf, FWRITE,
5310                             (offset_t)sarg.vap->va_size, cr, &ct);
5311                 }
5312         }
5313 
5314         if (!error && sarg.vap->va_mask != 0)
5315                 error = VOP_SETATTR(vp, sarg.vap, sarg.flag, cr, &ct);
5316 
5317         /* restore va_mask -- ufs_setattr clears AT_SIZE */
5318         if (saved_mask & AT_SIZE)
5319                 sarg.vap->va_mask |= AT_SIZE;
5320 
5321         /*
5322          * If an ACL was being set, it has been delayed until now,
5323          * in order to set the mode (via the VOP_SETATTR() above) first.
5324          */
5325         if ((! error) && (fattrp->attrmask & FATTR4_ACL_MASK)) {
5326                 int i;
5327 
5328                 for (i = 0; i < NFS4_MAXNUM_ATTRS; i++)
5329                         if (ntov.amap[i] == FATTR4_ACL)
5330                                 break;
5331                 if (i < NFS4_MAXNUM_ATTRS) {
5332                         error = (*nfs4_ntov_map[FATTR4_ACL].sv_getit)(
5333                             NFS4ATTR_SETIT, &sarg, &ntov.na[i]);
5334                         if (error == 0) {
5335                                 *resp |= FATTR4_ACL_MASK;
5336                         } else if (error == ENOTSUP) {
5337                                 (void) rfs4_verify_attr(&sarg, resp, &ntov);
5338                                 status = NFS4ERR_ATTRNOTSUPP;
5339                                 goto done;
5340                         }
5341                 } else {
5342                         NFS4_DEBUG(rfs4_debug,
5343                             (CE_NOTE, "do_rfs4_op_setattr: "
5344                             "unable to find ACL in fattr4"));
5345                         error = EINVAL;
5346                 }
5347         }
5348 
5349         if (error) {
5350                 /* check if a monitor detected a delegation conflict */
5351                 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK))
5352                         status = NFS4ERR_DELAY;
5353                 else
5354                         status = puterrno4(error);
5355 
5356                 /*
5357                  * Set the response bitmap when setattr failed.
5358                  * If VOP_SETATTR partially succeeded, test by doing a
5359                  * VOP_GETATTR on the object and comparing the data
5360                  * to the setattr arguments.
5361                  */
5362                 (void) rfs4_verify_attr(&sarg, resp, &ntov);
5363         } else {
5364                 /*
5365                  * Force modified metadata out to stable storage.
5366                  */
5367                 (void) VOP_FSYNC(vp, FNODSYNC, cr, &ct);
5368                 /*
5369                  * Set response bitmap
5370                  */
5371                 nfs4_vmask_to_nmask_set(sarg.vap->va_mask, resp);
5372         }
5373 
5374 /* Return early and already have a NFSv4 error */
5375 done:
5376         /*
5377          * Except for nfs4_vmask_to_nmask_set(), vattr --> fattr
5378          * conversion sets both readable and writeable NFS4 attrs
5379          * for AT_MTIME and AT_ATIME.  The line below masks out
5380          * unrequested attrs from the setattr result bitmap.  This
5381          * is placed after the done: label to catch the ATTRNOTSUP
5382          * case.
5383          */
5384         *resp &= fattrp->attrmask;
5385 
5386         if (in_crit)
5387                 nbl_end_crit(vp);
5388 
5389         nfs4_ntov_table_free(&ntov, &sarg);
5390 
5391         return (status);
5392 }
5393 
5394 /* ARGSUSED */
5395 static void
5396 rfs4_op_setattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5397     struct compound_state *cs)
5398 {
5399         SETATTR4args *args = &argop->nfs_argop4_u.opsetattr;
5400         SETATTR4res *resp = &resop->nfs_resop4_u.opsetattr;
5401         bslabel_t *clabel;
5402 
5403         DTRACE_NFSV4_2(op__setattr__start, struct compound_state *, cs,
5404             SETATTR4args *, args);
5405 
5406         if (cs->vp == NULL) {
5407                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5408                 goto out;
5409         }
5410 
5411         /*
5412          * If there is an unshared filesystem mounted on this vnode,
5413          * do not allow to setattr on this vnode.
5414          */
5415         if (vn_ismntpt(cs->vp)) {
5416                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
5417                 goto out;
5418         }
5419 
5420         resp->attrsset = 0;
5421 
5422         if (rdonly4(req, cs)) {
5423                 *cs->statusp = resp->status = NFS4ERR_ROFS;
5424                 goto out;
5425         }
5426 
5427         /* check label before setting attributes */
5428         if (is_system_labeled()) {
5429                 ASSERT(req->rq_label != NULL);
5430                 clabel = req->rq_label;
5431                 DTRACE_PROBE2(tx__rfs4__log__info__opsetattr__clabel, char *,
5432                     "got client label from request(1)",
5433                     struct svc_req *, req);
5434                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
5435                         if (!do_rfs_label_check(clabel, cs->vp,
5436                             EQUALITY_CHECK, cs->exi)) {
5437                                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
5438                                 goto out;
5439                         }
5440                 }
5441         }
5442 
5443         *cs->statusp = resp->status =
5444             do_rfs4_op_setattr(&resp->attrsset, &args->obj_attributes, cs,
5445             &args->stateid);
5446 
5447 out:
5448         DTRACE_NFSV4_2(op__setattr__done, struct compound_state *, cs,
5449             SETATTR4res *, resp);
5450 }
5451 
5452 /* ARGSUSED */
5453 static void
5454 rfs4_op_verify(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5455     struct compound_state *cs)
5456 {
5457         /*
5458          * verify and nverify are exactly the same, except that nverify
5459          * succeeds when some argument changed, and verify succeeds when
5460          * when none changed.
5461          */
5462 
5463         VERIFY4args  *args = &argop->nfs_argop4_u.opverify;
5464         VERIFY4res *resp = &resop->nfs_resop4_u.opverify;
5465 
5466         int error;
5467         struct nfs4_svgetit_arg sarg;
5468         struct statvfs64 sb;
5469         struct nfs4_ntov_table ntov;
5470 
5471         DTRACE_NFSV4_2(op__verify__start, struct compound_state *, cs,
5472             VERIFY4args *, args);
5473 
5474         if (cs->vp == NULL) {
5475                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5476                 goto out;
5477         }
5478 
5479         sarg.sbp = &sb;
5480         sarg.is_referral = B_FALSE;
5481         nfs4_ntov_table_init(&ntov);
5482         resp->status = do_rfs4_set_attrs(NULL, &args->obj_attributes, cs,
5483             &sarg, &ntov, NFS4ATTR_VERIT);
5484         if (resp->status != NFS4_OK) {
5485                 /*
5486                  * do_rfs4_set_attrs will try to verify systemwide attrs,
5487                  * so could return -1 for "no match".
5488                  */
5489                 if (resp->status == -1)
5490                         resp->status = NFS4ERR_NOT_SAME;
5491                 goto done;
5492         }
5493         error = rfs4_verify_attr(&sarg, NULL, &ntov);
5494         switch (error) {
5495         case 0:
5496                 resp->status = NFS4_OK;
5497                 break;
5498         case -1:
5499                 resp->status = NFS4ERR_NOT_SAME;
5500                 break;
5501         default:
5502                 resp->status = puterrno4(error);
5503                 break;
5504         }
5505 done:
5506         *cs->statusp = resp->status;
5507         nfs4_ntov_table_free(&ntov, &sarg);
5508 out:
5509         DTRACE_NFSV4_2(op__verify__done, struct compound_state *, cs,
5510             VERIFY4res *, resp);
5511 }
5512 
5513 /* ARGSUSED */
5514 static void
5515 rfs4_op_nverify(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5516     struct compound_state *cs)
5517 {
5518         /*
5519          * verify and nverify are exactly the same, except that nverify
5520          * succeeds when some argument changed, and verify succeeds when
5521          * when none changed.
5522          */
5523 
5524         NVERIFY4args  *args = &argop->nfs_argop4_u.opnverify;
5525         NVERIFY4res *resp = &resop->nfs_resop4_u.opnverify;
5526 
5527         int error;
5528         struct nfs4_svgetit_arg sarg;
5529         struct statvfs64 sb;
5530         struct nfs4_ntov_table ntov;
5531 
5532         DTRACE_NFSV4_2(op__nverify__start, struct compound_state *, cs,
5533             NVERIFY4args *, args);
5534 
5535         if (cs->vp == NULL) {
5536                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5537                 DTRACE_NFSV4_2(op__nverify__done, struct compound_state *, cs,
5538                     NVERIFY4res *, resp);
5539                 return;
5540         }
5541         sarg.sbp = &sb;
5542         sarg.is_referral = B_FALSE;
5543         nfs4_ntov_table_init(&ntov);
5544         resp->status = do_rfs4_set_attrs(NULL, &args->obj_attributes, cs,
5545             &sarg, &ntov, NFS4ATTR_VERIT);
5546         if (resp->status != NFS4_OK) {
5547                 /*
5548                  * do_rfs4_set_attrs will try to verify systemwide attrs,
5549                  * so could return -1 for "no match".
5550                  */
5551                 if (resp->status == -1)
5552                         resp->status = NFS4_OK;
5553                 goto done;
5554         }
5555         error = rfs4_verify_attr(&sarg, NULL, &ntov);
5556         switch (error) {
5557         case 0:
5558                 resp->status = NFS4ERR_SAME;
5559                 break;
5560         case -1:
5561                 resp->status = NFS4_OK;
5562                 break;
5563         default:
5564                 resp->status = puterrno4(error);
5565                 break;
5566         }
5567 done:
5568         *cs->statusp = resp->status;
5569         nfs4_ntov_table_free(&ntov, &sarg);
5570 
5571         DTRACE_NFSV4_2(op__nverify__done, struct compound_state *, cs,
5572             NVERIFY4res *, resp);
5573 }
5574 
5575 /*
5576  * XXX - This should live in an NFS header file.
5577  */
5578 #define MAX_IOVECS      12
5579 
5580 /* ARGSUSED */
5581 static void
5582 rfs4_op_write(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5583     struct compound_state *cs)
5584 {
5585         WRITE4args *args = &argop->nfs_argop4_u.opwrite;
5586         WRITE4res *resp = &resop->nfs_resop4_u.opwrite;
5587         int error;
5588         vnode_t *vp;
5589         struct vattr bva;
5590         u_offset_t rlimit;
5591         struct uio uio;
5592         struct iovec iov[MAX_IOVECS];
5593         struct iovec *iovp;
5594         int iovcnt;
5595         int ioflag;
5596         cred_t *savecred, *cr;
5597         bool_t *deleg = &cs->deleg;
5598         nfsstat4 stat;
5599         int in_crit = 0;
5600         caller_context_t ct;
5601         nfs4_srv_t *nsrv4;
5602 
5603         DTRACE_NFSV4_2(op__write__start, struct compound_state *, cs,
5604             WRITE4args *, args);
5605 
5606         vp = cs->vp;
5607         if (vp == NULL) {
5608                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5609                 goto out;
5610         }
5611         if (cs->access == CS_ACCESS_DENIED) {
5612                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
5613                 goto out;
5614         }
5615 
5616         cr = cs->cr;
5617 
5618         if ((stat = rfs4_check_stateid(FWRITE, vp, &args->stateid, FALSE,
5619             deleg, TRUE, &ct)) != NFS4_OK) {
5620                 *cs->statusp = resp->status = stat;
5621                 goto out;
5622         }
5623 
5624         /*
5625          * We have to enter the critical region before calling VOP_RWLOCK
5626          * to avoid a deadlock with ufs.
5627          */
5628         if (nbl_need_check(vp)) {
5629                 nbl_start_crit(vp, RW_READER);
5630                 in_crit = 1;
5631                 if (nbl_conflict(vp, NBL_WRITE,
5632                     args->offset, args->data_len, 0, &ct)) {
5633                         *cs->statusp = resp->status = NFS4ERR_LOCKED;
5634                         goto out;
5635                 }
5636         }
5637 
5638         bva.va_mask = AT_MODE | AT_UID;
5639         error = VOP_GETATTR(vp, &bva, 0, cr, &ct);
5640 
5641         /*
5642          * If we can't get the attributes, then we can't do the
5643          * right access checking.  So, we'll fail the request.
5644          */
5645         if (error) {
5646                 *cs->statusp = resp->status = puterrno4(error);
5647                 goto out;
5648         }
5649 
5650         if (rdonly4(req, cs)) {
5651                 *cs->statusp = resp->status = NFS4ERR_ROFS;
5652                 goto out;
5653         }
5654 
5655         if (vp->v_type != VREG) {
5656                 *cs->statusp = resp->status =
5657                     ((vp->v_type == VDIR) ? NFS4ERR_ISDIR : NFS4ERR_INVAL);
5658                 goto out;
5659         }
5660 
5661         if (crgetuid(cr) != bva.va_uid &&
5662             (error = VOP_ACCESS(vp, VWRITE, 0, cr, &ct))) {
5663                 *cs->statusp = resp->status = puterrno4(error);
5664                 goto out;
5665         }
5666 
5667         if (MANDLOCK(vp, bva.va_mode)) {
5668                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
5669                 goto out;
5670         }
5671 
5672         nsrv4 = nfs4_get_srv();
5673         if (args->data_len == 0) {
5674                 *cs->statusp = resp->status = NFS4_OK;
5675                 resp->count = 0;
5676                 resp->committed = args->stable;
5677                 resp->writeverf = nsrv4->write4verf;
5678                 goto out;
5679         }
5680 
5681         if (args->mblk != NULL) {
5682                 mblk_t *m;
5683                 uint_t bytes, round_len;
5684 
5685                 iovcnt = 0;
5686                 bytes = 0;
5687                 round_len = roundup(args->data_len, BYTES_PER_XDR_UNIT);
5688                 for (m = args->mblk;
5689                     m != NULL && bytes < round_len;
5690                     m = m->b_cont) {
5691                         iovcnt++;
5692                         bytes += MBLKL(m);
5693                 }
5694 #ifdef DEBUG
5695                 /* should have ended on an mblk boundary */
5696                 if (bytes != round_len) {
5697                         printf("bytes=0x%x, round_len=0x%x, req len=0x%x\n",
5698                             bytes, round_len, args->data_len);
5699                         printf("args=%p, args->mblk=%p, m=%p", (void *)args,
5700                             (void *)args->mblk, (void *)m);
5701                         ASSERT(bytes == round_len);
5702                 }
5703 #endif
5704                 if (iovcnt <= MAX_IOVECS) {
5705                         iovp = iov;
5706                 } else {
5707                         iovp = kmem_alloc(sizeof (*iovp) * iovcnt, KM_SLEEP);
5708                 }
5709                 mblk_to_iov(args->mblk, iovcnt, iovp);
5710         } else if (args->rlist != NULL) {
5711                 iovcnt = 1;
5712                 iovp = iov;
5713                 iovp->iov_base = (char *)((args->rlist)->u.c_daddr3);
5714                 iovp->iov_len = args->data_len;
5715         } else {
5716                 iovcnt = 1;
5717                 iovp = iov;
5718                 iovp->iov_base = args->data_val;
5719                 iovp->iov_len = args->data_len;
5720         }
5721 
5722         uio.uio_iov = iovp;
5723         uio.uio_iovcnt = iovcnt;
5724 
5725         uio.uio_segflg = UIO_SYSSPACE;
5726         uio.uio_extflg = UIO_COPY_DEFAULT;
5727         uio.uio_loffset = args->offset;
5728         uio.uio_resid = args->data_len;
5729         uio.uio_llimit = curproc->p_fsz_ctl;
5730         rlimit = uio.uio_llimit - args->offset;
5731         if (rlimit < (u_offset_t)uio.uio_resid)
5732                 uio.uio_resid = (int)rlimit;
5733 
5734         if (args->stable == UNSTABLE4)
5735                 ioflag = 0;
5736         else if (args->stable == FILE_SYNC4)
5737                 ioflag = FSYNC;
5738         else if (args->stable == DATA_SYNC4)
5739                 ioflag = FDSYNC;
5740         else {
5741                 if (iovp != iov)
5742                         kmem_free(iovp, sizeof (*iovp) * iovcnt);
5743                 *cs->statusp = resp->status = NFS4ERR_INVAL;
5744                 goto out;
5745         }
5746 
5747         /*
5748          * We're changing creds because VM may fault and we need
5749          * the cred of the current thread to be used if quota
5750          * checking is enabled.
5751          */
5752         savecred = curthread->t_cred;
5753         curthread->t_cred = cr;
5754         error = do_io(FWRITE, vp, &uio, ioflag, cr, &ct);
5755         curthread->t_cred = savecred;
5756 
5757         if (iovp != iov)
5758                 kmem_free(iovp, sizeof (*iovp) * iovcnt);
5759 
5760         if (error) {
5761                 *cs->statusp = resp->status = puterrno4(error);
5762                 goto out;
5763         }
5764 
5765         *cs->statusp = resp->status = NFS4_OK;
5766         resp->count = args->data_len - uio.uio_resid;
5767 
5768         if (ioflag == 0)
5769                 resp->committed = UNSTABLE4;
5770         else
5771                 resp->committed = FILE_SYNC4;
5772 
5773         resp->writeverf = nsrv4->write4verf;
5774 
5775 out:
5776         if (in_crit)
5777                 nbl_end_crit(vp);
5778 
5779         DTRACE_NFSV4_2(op__write__done, struct compound_state *, cs,
5780             WRITE4res *, resp);
5781 }
5782 
5783 
5784 /* XXX put in a header file */
5785 extern int      sec_svc_getcred(struct svc_req *, cred_t *,  caddr_t *, int *);
5786 
5787 void
5788 rfs4_compound(COMPOUND4args *args, COMPOUND4res *resp, struct exportinfo *exi,
5789     struct svc_req *req, cred_t *cr, int *rv)
5790 {
5791         uint_t i;
5792         struct compound_state cs;
5793         nfs4_srv_t *nsrv4;
5794         nfs_export_t *ne = nfs_get_export();
5795 
5796         if (rv != NULL)
5797                 *rv = 0;
5798         rfs4_init_compound_state(&cs);
5799         /*
5800          * Form a reply tag by copying over the reqeuest tag.
5801          */
5802         resp->tag.utf8string_val =
5803             kmem_alloc(args->tag.utf8string_len, KM_SLEEP);
5804         resp->tag.utf8string_len = args->tag.utf8string_len;
5805         bcopy(args->tag.utf8string_val, resp->tag.utf8string_val,
5806             resp->tag.utf8string_len);
5807 
5808         cs.statusp = &resp->status;
5809         cs.req = req;
5810         resp->array = NULL;
5811         resp->array_len = 0;
5812 
5813         /*
5814          * XXX for now, minorversion should be zero
5815          */
5816         if (args->minorversion != NFS4_MINORVERSION) {
5817                 DTRACE_NFSV4_2(compound__start, struct compound_state *,
5818                     &cs, COMPOUND4args *, args);
5819                 resp->status = NFS4ERR_MINOR_VERS_MISMATCH;
5820                 DTRACE_NFSV4_2(compound__done, struct compound_state *,
5821                     &cs, COMPOUND4res *, resp);
5822                 return;
5823         }
5824 
5825         if (args->array_len == 0) {
5826                 resp->status = NFS4_OK;
5827                 return;
5828         }
5829 
5830         ASSERT(exi == NULL);
5831         ASSERT(cr == NULL);
5832 
5833         cr = crget();
5834         ASSERT(cr != NULL);
5835 
5836         if (sec_svc_getcred(req, cr, &cs.principal, &cs.nfsflavor) == 0) {
5837                 DTRACE_NFSV4_2(compound__start, struct compound_state *,
5838                     &cs, COMPOUND4args *, args);
5839                 crfree(cr);
5840                 DTRACE_NFSV4_2(compound__done, struct compound_state *,
5841                     &cs, COMPOUND4res *, resp);
5842                 svcerr_badcred(req->rq_xprt);
5843                 if (rv != NULL)
5844                         *rv = 1;
5845                 return;
5846         }
5847         resp->array_len = args->array_len;
5848         resp->array = kmem_zalloc(args->array_len * sizeof (nfs_resop4),
5849             KM_SLEEP);
5850 
5851         cs.basecr = cr;
5852         nsrv4 = nfs4_get_srv();
5853 
5854         DTRACE_NFSV4_2(compound__start, struct compound_state *, &cs,
5855             COMPOUND4args *, args);
5856 
5857         /*
5858          * For now, NFS4 compound processing must be protected by
5859          * exported_lock because it can access more than one exportinfo
5860          * per compound and share/unshare can now change multiple
5861          * exinfo structs.  The NFS2/3 code only refs 1 exportinfo
5862          * per proc (excluding public exinfo), and exi_count design
5863          * is sufficient to protect concurrent execution of NFS2/3
5864          * ops along with unexport.  This lock will be removed as
5865          * part of the NFSv4 phase 2 namespace redesign work.
5866          */
5867         rw_enter(&ne->exported_lock, RW_READER);
5868 
5869         /*
5870          * If this is the first compound we've seen, we need to start all
5871          * new instances' grace periods.
5872          */
5873         if (nsrv4->seen_first_compound == 0) {
5874                 rfs4_grace_start_new(nsrv4);
5875                 /*
5876                  * This must be set after rfs4_grace_start_new(), otherwise
5877                  * another thread could proceed past here before the former
5878                  * is finished.
5879                  */
5880                 nsrv4->seen_first_compound = 1;
5881         }
5882 
5883         for (i = 0; i < args->array_len && cs.cont; i++) {
5884                 nfs_argop4 *argop;
5885                 nfs_resop4 *resop;
5886                 uint_t op;
5887 
5888                 argop = &args->array[i];
5889                 resop = &resp->array[i];
5890                 resop->resop = argop->argop;
5891                 op = (uint_t)resop->resop;
5892 
5893                 if (op < rfsv4disp_cnt) {
5894                         /*
5895                          * Count the individual ops here; NULL and COMPOUND
5896                          * are counted in common_dispatch()
5897                          */
5898                         rfsproccnt_v4_ptr[op].value.ui64++;
5899 
5900                         NFS4_DEBUG(rfs4_debug > 1,
5901                             (CE_NOTE, "Executing %s", rfs4_op_string[op]));
5902                         (*rfsv4disptab[op].dis_proc)(argop, resop, req, &cs);
5903                         NFS4_DEBUG(rfs4_debug > 1, (CE_NOTE, "%s returned %d",
5904                             rfs4_op_string[op], *cs.statusp));
5905                         if (*cs.statusp != NFS4_OK)
5906                                 cs.cont = FALSE;
5907                 } else {
5908                         /*
5909                          * This is effectively dead code since XDR code
5910                          * will have already returned BADXDR if op doesn't
5911                          * decode to legal value.  This only done for a
5912                          * day when XDR code doesn't verify v4 opcodes.
5913                          */
5914                         op = OP_ILLEGAL;
5915                         rfsproccnt_v4_ptr[OP_ILLEGAL_IDX].value.ui64++;
5916 
5917                         rfs4_op_illegal(argop, resop, req, &cs);
5918                         cs.cont = FALSE;
5919                 }
5920 
5921                 /*
5922                  * If not at last op, and if we are to stop, then
5923                  * compact the results array.
5924                  */
5925                 if ((i + 1) < args->array_len && !cs.cont) {
5926                         nfs_resop4 *new_res = kmem_alloc(
5927                             (i+1) * sizeof (nfs_resop4), KM_SLEEP);
5928                         bcopy(resp->array,
5929                             new_res, (i+1) * sizeof (nfs_resop4));
5930                         kmem_free(resp->array,
5931                             args->array_len * sizeof (nfs_resop4));
5932 
5933                         resp->array_len =  i + 1;
5934                         resp->array = new_res;
5935                 }
5936         }
5937 
5938         rw_exit(&ne->exported_lock);
5939 
5940         /*
5941          * clear exportinfo and vnode fields from compound_state before dtrace
5942          * probe, to avoid tracing residual values for path and share path.
5943          */
5944         if (cs.vp)
5945                 VN_RELE(cs.vp);
5946         if (cs.saved_vp)
5947                 VN_RELE(cs.saved_vp);
5948         cs.exi = cs.saved_exi = NULL;
5949         cs.vp = cs.saved_vp = NULL;
5950 
5951         DTRACE_NFSV4_2(compound__done, struct compound_state *, &cs,
5952             COMPOUND4res *, resp);
5953 
5954         if (cs.saved_fh.nfs_fh4_val)
5955                 kmem_free(cs.saved_fh.nfs_fh4_val, NFS4_FHSIZE);
5956 
5957         if (cs.basecr)
5958                 crfree(cs.basecr);
5959         if (cs.cr)
5960                 crfree(cs.cr);
5961         /*
5962          * done with this compound request, free the label
5963          */
5964 
5965         if (req->rq_label != NULL) {
5966                 kmem_free(req->rq_label, sizeof (bslabel_t));
5967                 req->rq_label = NULL;
5968         }
5969 }
5970 
5971 /*
5972  * XXX because of what appears to be duplicate calls to rfs4_compound_free
5973  * XXX zero out the tag and array values. Need to investigate why the
5974  * XXX calls occur, but at least prevent the panic for now.
5975  */
5976 void
5977 rfs4_compound_free(COMPOUND4res *resp)
5978 {
5979         uint_t i;
5980 
5981         if (resp->tag.utf8string_val) {
5982                 UTF8STRING_FREE(resp->tag)
5983         }
5984 
5985         for (i = 0; i < resp->array_len; i++) {
5986                 nfs_resop4 *resop;
5987                 uint_t op;
5988 
5989                 resop = &resp->array[i];
5990                 op = (uint_t)resop->resop;
5991                 if (op < rfsv4disp_cnt) {
5992                         (*rfsv4disptab[op].dis_resfree)(resop);
5993                 }
5994         }
5995         if (resp->array != NULL) {
5996                 kmem_free(resp->array, resp->array_len * sizeof (nfs_resop4));
5997         }
5998 }
5999 
6000 /*
6001  * Process the value of the compound request rpc flags, as a bit-AND
6002  * of the individual per-op flags (idempotent, allowork, publicfh_ok)
6003  */
6004 void
6005 rfs4_compound_flagproc(COMPOUND4args *args, int *flagp)
6006 {
6007         int i;
6008         int flag = RPC_ALL;
6009 
6010         for (i = 0; flag && i < args->array_len; i++) {
6011                 uint_t op;
6012 
6013                 op = (uint_t)args->array[i].argop;
6014 
6015                 if (op < rfsv4disp_cnt)
6016                         flag &= rfsv4disptab[op].dis_flags;
6017                 else
6018                         flag = 0;
6019         }
6020         *flagp = flag;
6021 }
6022 
6023 nfsstat4
6024 rfs4_client_sysid(rfs4_client_t *cp, sysid_t *sp)
6025 {
6026         nfsstat4 e;
6027 
6028         rfs4_dbe_lock(cp->rc_dbe);
6029 
6030         if (cp->rc_sysidt != LM_NOSYSID) {
6031                 *sp = cp->rc_sysidt;
6032                 e = NFS4_OK;
6033 
6034         } else if ((cp->rc_sysidt = lm_alloc_sysidt()) != LM_NOSYSID) {
6035                 *sp = cp->rc_sysidt;
6036                 e = NFS4_OK;
6037 
6038                 NFS4_DEBUG(rfs4_debug, (CE_NOTE,
6039                     "rfs4_client_sysid: allocated 0x%x\n", *sp));
6040         } else
6041                 e = NFS4ERR_DELAY;
6042 
6043         rfs4_dbe_unlock(cp->rc_dbe);
6044         return (e);
6045 }
6046 
6047 #if defined(DEBUG) && ! defined(lint)
6048 static void lock_print(char *str, int operation, struct flock64 *flk)
6049 {
6050         char *op, *type;
6051 
6052         switch (operation) {
6053         case F_GETLK: op = "F_GETLK";
6054                 break;
6055         case F_SETLK: op = "F_SETLK";
6056                 break;
6057         case F_SETLK_NBMAND: op = "F_SETLK_NBMAND";
6058                 break;
6059         default: op = "F_UNKNOWN";
6060                 break;
6061         }
6062         switch (flk->l_type) {
6063         case F_UNLCK: type = "F_UNLCK";
6064                 break;
6065         case F_RDLCK: type = "F_RDLCK";
6066                 break;
6067         case F_WRLCK: type = "F_WRLCK";
6068                 break;
6069         default: type = "F_UNKNOWN";
6070                 break;
6071         }
6072 
6073         ASSERT(flk->l_whence == 0);
6074         cmn_err(CE_NOTE, "%s:  %s, type = %s, off = %llx len = %llx pid = %d",
6075             str, op, type, (longlong_t)flk->l_start,
6076             flk->l_len ? (longlong_t)flk->l_len : ~0LL, flk->l_pid);
6077 }
6078 
6079 #define LOCK_PRINT(d, s, t, f) if (d) lock_print(s, t, f)
6080 #else
6081 #define LOCK_PRINT(d, s, t, f)
6082 #endif
6083 
6084 /*ARGSUSED*/
6085 static bool_t
6086 creds_ok(cred_set_t cr_set, struct svc_req *req, struct compound_state *cs)
6087 {
6088         return (TRUE);
6089 }
6090 
6091 /*
6092  * Look up the pathname using the vp in cs as the directory vnode.
6093  * cs->vp will be the vnode for the file on success
6094  */
6095 
6096 static nfsstat4
6097 rfs4_lookup(component4 *component, struct svc_req *req,
6098     struct compound_state *cs)
6099 {
6100         char *nm;
6101         uint32_t len;
6102         nfsstat4 status;
6103         struct sockaddr *ca;
6104         char *name;
6105 
6106         if (cs->vp == NULL) {
6107                 return (NFS4ERR_NOFILEHANDLE);
6108         }
6109         if (cs->vp->v_type != VDIR) {
6110                 return (NFS4ERR_NOTDIR);
6111         }
6112 
6113         status = utf8_dir_verify(component);
6114         if (status != NFS4_OK)
6115                 return (status);
6116 
6117         nm = utf8_to_fn(component, &len, NULL);
6118         if (nm == NULL) {
6119                 return (NFS4ERR_INVAL);
6120         }
6121 
6122         if (len > MAXNAMELEN) {
6123                 kmem_free(nm, len);
6124                 return (NFS4ERR_NAMETOOLONG);
6125         }
6126 
6127         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
6128         name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
6129             MAXPATHLEN + 1);
6130 
6131         if (name == NULL) {
6132                 kmem_free(nm, len);
6133                 return (NFS4ERR_INVAL);
6134         }
6135 
6136         status = do_rfs4_op_lookup(name, req, cs);
6137 
6138         if (name != nm)
6139                 kmem_free(name, MAXPATHLEN + 1);
6140 
6141         kmem_free(nm, len);
6142 
6143         return (status);
6144 }
6145 
6146 static nfsstat4
6147 rfs4_lookupfile(component4 *component, struct svc_req *req,
6148     struct compound_state *cs, uint32_t access, change_info4 *cinfo)
6149 {
6150         nfsstat4 status;
6151         vnode_t *dvp = cs->vp;
6152         vattr_t bva, ava, fva;
6153         int error;
6154 
6155         /* Get "before" change value */
6156         bva.va_mask = AT_CTIME|AT_SEQ;
6157         error = VOP_GETATTR(dvp, &bva, 0, cs->cr, NULL);
6158         if (error)
6159                 return (puterrno4(error));
6160 
6161         /* rfs4_lookup may VN_RELE directory */
6162         VN_HOLD(dvp);
6163 
6164         status = rfs4_lookup(component, req, cs);
6165         if (status != NFS4_OK) {
6166                 VN_RELE(dvp);
6167                 return (status);
6168         }
6169 
6170         /*
6171          * Get "after" change value, if it fails, simply return the
6172          * before value.
6173          */
6174         ava.va_mask = AT_CTIME|AT_SEQ;
6175         if (VOP_GETATTR(dvp, &ava, 0, cs->cr, NULL)) {
6176                 ava.va_ctime = bva.va_ctime;
6177                 ava.va_seq = 0;
6178         }
6179         VN_RELE(dvp);
6180 
6181         /*
6182          * Validate the file is a file
6183          */
6184         fva.va_mask = AT_TYPE|AT_MODE;
6185         error = VOP_GETATTR(cs->vp, &fva, 0, cs->cr, NULL);
6186         if (error)
6187                 return (puterrno4(error));
6188 
6189         if (fva.va_type != VREG) {
6190                 if (fva.va_type == VDIR)
6191                         return (NFS4ERR_ISDIR);
6192                 if (fva.va_type == VLNK)
6193                         return (NFS4ERR_SYMLINK);
6194                 return (NFS4ERR_INVAL);
6195         }
6196 
6197         NFS4_SET_FATTR4_CHANGE(cinfo->before, bva.va_ctime);
6198         NFS4_SET_FATTR4_CHANGE(cinfo->after, ava.va_ctime);
6199 
6200         /*
6201          * It is undefined if VOP_LOOKUP will change va_seq, so
6202          * cinfo.atomic = TRUE only if we have
6203          * non-zero va_seq's, and they have not changed.
6204          */
6205         if (bva.va_seq && ava.va_seq && ava.va_seq == bva.va_seq)
6206                 cinfo->atomic = TRUE;
6207         else
6208                 cinfo->atomic = FALSE;
6209 
6210         /* Check for mandatory locking */
6211         cs->mandlock = MANDLOCK(cs->vp, fva.va_mode);
6212         return (check_open_access(access, cs, req));
6213 }
6214 
6215 static nfsstat4
6216 create_vnode(vnode_t *dvp, char *nm,  vattr_t *vap, createmode4 mode,
6217     cred_t *cr, vnode_t **vpp, bool_t *created)
6218 {
6219         int error;
6220         nfsstat4 status = NFS4_OK;
6221         vattr_t va;
6222 
6223 tryagain:
6224 
6225         /*
6226          * The file open mode used is VWRITE.  If the client needs
6227          * some other semantic, then it should do the access checking
6228          * itself.  It would have been nice to have the file open mode
6229          * passed as part of the arguments.
6230          */
6231 
6232         *created = TRUE;
6233         error = VOP_CREATE(dvp, nm, vap, EXCL, VWRITE, vpp, cr, 0, NULL, NULL);
6234 
6235         if (error) {
6236                 *created = FALSE;
6237 
6238                 /*
6239                  * If we got something other than file already exists
6240                  * then just return this error.  Otherwise, we got
6241                  * EEXIST.  If we were doing a GUARDED create, then
6242                  * just return this error.  Otherwise, we need to
6243                  * make sure that this wasn't a duplicate of an
6244                  * exclusive create request.
6245                  *
6246                  * The assumption is made that a non-exclusive create
6247                  * request will never return EEXIST.
6248                  */
6249 
6250                 if (error != EEXIST || mode == GUARDED4) {
6251                         status = puterrno4(error);
6252                         return (status);
6253                 }
6254                 error = VOP_LOOKUP(dvp, nm, vpp, NULL, 0, NULL, cr,
6255                     NULL, NULL, NULL);
6256 
6257                 if (error) {
6258                         /*
6259                          * We couldn't find the file that we thought that
6260                          * we just created.  So, we'll just try creating
6261                          * it again.
6262                          */
6263                         if (error == ENOENT)
6264                                 goto tryagain;
6265 
6266                         status = puterrno4(error);
6267                         return (status);
6268                 }
6269 
6270                 if (mode == UNCHECKED4) {
6271                         /* existing object must be regular file */
6272                         if ((*vpp)->v_type != VREG) {
6273                                 if ((*vpp)->v_type == VDIR)
6274                                         status = NFS4ERR_ISDIR;
6275                                 else if ((*vpp)->v_type == VLNK)
6276                                         status = NFS4ERR_SYMLINK;
6277                                 else
6278                                         status = NFS4ERR_INVAL;
6279                                 VN_RELE(*vpp);
6280                                 return (status);
6281                         }
6282 
6283                         return (NFS4_OK);
6284                 }
6285 
6286                 /* Check for duplicate request */
6287                 va.va_mask = AT_MTIME;
6288                 error = VOP_GETATTR(*vpp, &va, 0, cr, NULL);
6289                 if (!error) {
6290                         /* We found the file */
6291                         const timestruc_t *mtime = &vap->va_mtime;
6292 
6293                         if (va.va_mtime.tv_sec != mtime->tv_sec ||
6294                             va.va_mtime.tv_nsec != mtime->tv_nsec) {
6295                                 /* but its not our creation */
6296                                 VN_RELE(*vpp);
6297                                 return (NFS4ERR_EXIST);
6298                         }
6299                         *created = TRUE; /* retrans of create == created */
6300                         return (NFS4_OK);
6301                 }
6302                 VN_RELE(*vpp);
6303                 return (NFS4ERR_EXIST);
6304         }
6305 
6306         return (NFS4_OK);
6307 }
6308 
6309 static nfsstat4
6310 check_open_access(uint32_t access, struct compound_state *cs,
6311     struct svc_req *req)
6312 {
6313         int error;
6314         vnode_t *vp;
6315         bool_t readonly;
6316         cred_t *cr = cs->cr;
6317 
6318         /* For now we don't allow mandatory locking as per V2/V3 */
6319         if (cs->access == CS_ACCESS_DENIED || cs->mandlock) {
6320                 return (NFS4ERR_ACCESS);
6321         }
6322 
6323         vp = cs->vp;
6324         ASSERT(cr != NULL && vp->v_type == VREG);
6325 
6326         /*
6327          * If the file system is exported read only and we are trying
6328          * to open for write, then return NFS4ERR_ROFS
6329          */
6330 
6331         readonly = rdonly4(req, cs);
6332 
6333         if ((access & OPEN4_SHARE_ACCESS_WRITE) && readonly)
6334                 return (NFS4ERR_ROFS);
6335 
6336         if (access & OPEN4_SHARE_ACCESS_READ) {
6337                 if ((VOP_ACCESS(vp, VREAD, 0, cr, NULL) != 0) &&
6338                     (VOP_ACCESS(vp, VEXEC, 0, cr, NULL) != 0)) {
6339                         return (NFS4ERR_ACCESS);
6340                 }
6341         }
6342 
6343         if (access & OPEN4_SHARE_ACCESS_WRITE) {
6344                 error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
6345                 if (error)
6346                         return (NFS4ERR_ACCESS);
6347         }
6348 
6349         return (NFS4_OK);
6350 }
6351 
6352 static nfsstat4
6353 rfs4_createfile(OPEN4args *args, struct svc_req *req, struct compound_state *cs,
6354     change_info4 *cinfo, bitmap4 *attrset, clientid4 clientid)
6355 {
6356         struct nfs4_svgetit_arg sarg;
6357         struct nfs4_ntov_table ntov;
6358 
6359         bool_t ntov_table_init = FALSE;
6360         struct statvfs64 sb;
6361         nfsstat4 status;
6362         vnode_t *vp;
6363         vattr_t bva, ava, iva, cva, *vap;
6364         vnode_t *dvp;
6365         timespec32_t *mtime;
6366         char *nm = NULL;
6367         uint_t buflen;
6368         bool_t created;
6369         bool_t setsize = FALSE;
6370         len_t reqsize;
6371         int error;
6372         bool_t trunc;
6373         caller_context_t ct;
6374         component4 *component;
6375         bslabel_t *clabel;
6376         struct sockaddr *ca;
6377         char *name = NULL;
6378 
6379         sarg.sbp = &sb;
6380         sarg.is_referral = B_FALSE;
6381 
6382         dvp = cs->vp;
6383 
6384         /* Check if the file system is read only */
6385         if (rdonly4(req, cs))
6386                 return (NFS4ERR_ROFS);
6387 
6388         /* check the label of including directory */
6389         if (is_system_labeled()) {
6390                 ASSERT(req->rq_label != NULL);
6391                 clabel = req->rq_label;
6392                 DTRACE_PROBE2(tx__rfs4__log__info__opremove__clabel, char *,
6393                     "got client label from request(1)",
6394                     struct svc_req *, req);
6395                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
6396                         if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
6397                             cs->exi)) {
6398                                 return (NFS4ERR_ACCESS);
6399                         }
6400                 }
6401         }
6402 
6403         /*
6404          * Get the last component of path name in nm. cs will reference
6405          * the including directory on success.
6406          */
6407         component = &args->open_claim4_u.file;
6408         status = utf8_dir_verify(component);
6409         if (status != NFS4_OK)
6410                 return (status);
6411 
6412         nm = utf8_to_fn(component, &buflen, NULL);
6413 
6414         if (nm == NULL)
6415                 return (NFS4ERR_RESOURCE);
6416 
6417         if (buflen > MAXNAMELEN) {
6418                 kmem_free(nm, buflen);
6419                 return (NFS4ERR_NAMETOOLONG);
6420         }
6421 
6422         bva.va_mask = AT_TYPE|AT_CTIME|AT_SEQ;
6423         error = VOP_GETATTR(dvp, &bva, 0, cs->cr, NULL);
6424         if (error) {
6425                 kmem_free(nm, buflen);
6426                 return (puterrno4(error));
6427         }
6428 
6429         if (bva.va_type != VDIR) {
6430                 kmem_free(nm, buflen);
6431                 return (NFS4ERR_NOTDIR);
6432         }
6433 
6434         NFS4_SET_FATTR4_CHANGE(cinfo->before, bva.va_ctime)
6435 
6436         switch (args->mode) {
6437         case GUARDED4:
6438                 /*FALLTHROUGH*/
6439         case UNCHECKED4:
6440                 nfs4_ntov_table_init(&ntov);
6441                 ntov_table_init = TRUE;
6442 
6443                 *attrset = 0;
6444                 status = do_rfs4_set_attrs(attrset,
6445                     &args->createhow4_u.createattrs,
6446                     cs, &sarg, &ntov, NFS4ATTR_SETIT);
6447 
6448                 if (status == NFS4_OK && (sarg.vap->va_mask & AT_TYPE) &&
6449                     sarg.vap->va_type != VREG) {
6450                         if (sarg.vap->va_type == VDIR)
6451                                 status = NFS4ERR_ISDIR;
6452                         else if (sarg.vap->va_type == VLNK)
6453                                 status = NFS4ERR_SYMLINK;
6454                         else
6455                                 status = NFS4ERR_INVAL;
6456                 }
6457 
6458                 if (status != NFS4_OK) {
6459                         kmem_free(nm, buflen);
6460                         nfs4_ntov_table_free(&ntov, &sarg);
6461                         *attrset = 0;
6462                         return (status);
6463                 }
6464 
6465                 vap = sarg.vap;
6466                 vap->va_type = VREG;
6467                 vap->va_mask |= AT_TYPE;
6468 
6469                 if ((vap->va_mask & AT_MODE) == 0) {
6470                         vap->va_mask |= AT_MODE;
6471                         vap->va_mode = (mode_t)0600;
6472                 }
6473 
6474                 if (vap->va_mask & AT_SIZE) {
6475 
6476                         /* Disallow create with a non-zero size */
6477 
6478                         if ((reqsize = sarg.vap->va_size) != 0) {
6479                                 kmem_free(nm, buflen);
6480                                 nfs4_ntov_table_free(&ntov, &sarg);
6481                                 *attrset = 0;
6482                                 return (NFS4ERR_INVAL);
6483                         }
6484                         setsize = TRUE;
6485                 }
6486                 break;
6487 
6488         case EXCLUSIVE4:
6489                 /* prohibit EXCL create of named attributes */
6490                 if (dvp->v_flag & V_XATTRDIR) {
6491                         kmem_free(nm, buflen);
6492                         *attrset = 0;
6493                         return (NFS4ERR_INVAL);
6494                 }
6495 
6496                 cva.va_mask = AT_TYPE | AT_MTIME | AT_MODE;
6497                 cva.va_type = VREG;
6498                 /*
6499                  * Ensure no time overflows. Assumes underlying
6500                  * filesystem supports at least 32 bits.
6501                  * Truncate nsec to usec resolution to allow valid
6502                  * compares even if the underlying filesystem truncates.
6503                  */
6504                 mtime = (timespec32_t *)&args->createhow4_u.createverf;
6505                 cva.va_mtime.tv_sec = mtime->tv_sec % TIME32_MAX;
6506                 cva.va_mtime.tv_nsec = (mtime->tv_nsec / 1000) * 1000;
6507                 cva.va_mode = (mode_t)0;
6508                 vap = &cva;
6509 
6510                 /*
6511                  * For EXCL create, attrset is set to the server attr
6512                  * used to cache the client's verifier.
6513                  */
6514                 *attrset = FATTR4_TIME_MODIFY_MASK;
6515                 break;
6516         }
6517 
6518         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
6519         name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
6520             MAXPATHLEN  + 1);
6521 
6522         if (name == NULL) {
6523                 kmem_free(nm, buflen);
6524                 return (NFS4ERR_SERVERFAULT);
6525         }
6526 
6527         status = create_vnode(dvp, name, vap, args->mode,
6528             cs->cr, &vp, &created);
6529         if (nm != name)
6530                 kmem_free(name, MAXPATHLEN + 1);
6531         kmem_free(nm, buflen);
6532 
6533         if (status != NFS4_OK) {
6534                 if (ntov_table_init)
6535                         nfs4_ntov_table_free(&ntov, &sarg);
6536                 *attrset = 0;
6537                 return (status);
6538         }
6539 
6540         trunc = (setsize && !created);
6541 
6542         if (args->mode != EXCLUSIVE4) {
6543                 bitmap4 createmask = args->createhow4_u.createattrs.attrmask;
6544 
6545                 /*
6546                  * True verification that object was created with correct
6547                  * attrs is impossible.  The attrs could have been changed
6548                  * immediately after object creation.  If attributes did
6549                  * not verify, the only recourse for the server is to
6550                  * destroy the object.  Maybe if some attrs (like gid)
6551                  * are set incorrectly, the object should be destroyed;
6552                  * however, seems bad as a default policy.  Do we really
6553                  * want to destroy an object over one of the times not
6554                  * verifying correctly?  For these reasons, the server
6555                  * currently sets bits in attrset for createattrs
6556                  * that were set; however, no verification is done.
6557                  *
6558                  * vmask_to_nmask accounts for vattr bits set on create
6559                  *      [do_rfs4_set_attrs() only sets resp bits for
6560                  *       non-vattr/vfs bits.]
6561                  * Mask off any bits we set by default so as not to return
6562                  * more attrset bits than were requested in createattrs
6563                  */
6564                 if (created) {
6565                         nfs4_vmask_to_nmask(sarg.vap->va_mask, attrset);
6566                         *attrset &= createmask;
6567                 } else {
6568                         /*
6569                          * We did not create the vnode (we tried but it
6570                          * already existed).  In this case, the only createattr
6571                          * that the spec allows the server to set is size,
6572                          * and even then, it can only be set if it is 0.
6573                          */
6574                         *attrset = 0;
6575                         if (trunc)
6576                                 *attrset = FATTR4_SIZE_MASK;
6577                 }
6578         }
6579         if (ntov_table_init)
6580                 nfs4_ntov_table_free(&ntov, &sarg);
6581 
6582         /*
6583          * Get the initial "after" sequence number, if it fails,
6584          * set to zero, time to before.
6585          */
6586         iva.va_mask = AT_CTIME|AT_SEQ;
6587         if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL)) {
6588                 iva.va_seq = 0;
6589                 iva.va_ctime = bva.va_ctime;
6590         }
6591 
6592         /*
6593          * create_vnode attempts to create the file exclusive,
6594          * if it already exists the VOP_CREATE will fail and
6595          * may not increase va_seq. It is atomic if
6596          * we haven't changed the directory, but if it has changed
6597          * we don't know what changed it.
6598          */
6599         if (!created) {
6600                 if (bva.va_seq && iva.va_seq &&
6601                     bva.va_seq == iva.va_seq)
6602                         cinfo->atomic = TRUE;
6603                 else
6604                         cinfo->atomic = FALSE;
6605                 NFS4_SET_FATTR4_CHANGE(cinfo->after, iva.va_ctime);
6606         } else {
6607                 /*
6608                  * The entry was created, we need to sync the
6609                  * directory metadata.
6610                  */
6611                 (void) VOP_FSYNC(dvp, 0, cs->cr, NULL);
6612 
6613                 /*
6614                  * Get "after" change value, if it fails, simply return the
6615                  * before value.
6616                  */
6617                 ava.va_mask = AT_CTIME|AT_SEQ;
6618                 if (VOP_GETATTR(dvp, &ava, 0, cs->cr, NULL)) {
6619                         ava.va_ctime = bva.va_ctime;
6620                         ava.va_seq = 0;
6621                 }
6622 
6623                 NFS4_SET_FATTR4_CHANGE(cinfo->after, ava.va_ctime);
6624 
6625                 /*
6626                  * The cinfo->atomic = TRUE only if we have
6627                  * non-zero va_seq's, and it has incremented by exactly one
6628                  * during the create_vnode and it didn't
6629                  * change during the VOP_FSYNC.
6630                  */
6631                 if (bva.va_seq && iva.va_seq && ava.va_seq &&
6632                     iva.va_seq == (bva.va_seq + 1) && iva.va_seq == ava.va_seq)
6633                         cinfo->atomic = TRUE;
6634                 else
6635                         cinfo->atomic = FALSE;
6636         }
6637 
6638         /* Check for mandatory locking and that the size gets set. */
6639         cva.va_mask = AT_MODE;
6640         if (setsize)
6641                 cva.va_mask |= AT_SIZE;
6642 
6643         /* Assume the worst */
6644         cs->mandlock = TRUE;
6645 
6646         if (VOP_GETATTR(vp, &cva, 0, cs->cr, NULL) == 0) {
6647                 cs->mandlock = MANDLOCK(cs->vp, cva.va_mode);
6648 
6649                 /*
6650                  * Truncate the file if necessary; this would be
6651                  * the case for create over an existing file.
6652                  */
6653 
6654                 if (trunc) {
6655                         int in_crit = 0;
6656                         rfs4_file_t *fp;
6657                         nfs4_srv_t *nsrv4;
6658                         bool_t create = FALSE;
6659 
6660                         /*
6661                          * We are writing over an existing file.
6662                          * Check to see if we need to recall a delegation.
6663                          */
6664                         nsrv4 = nfs4_get_srv();
6665                         rfs4_hold_deleg_policy(nsrv4);
6666                         if ((fp = rfs4_findfile(vp, NULL, &create)) != NULL) {
6667                                 if (rfs4_check_delegated_byfp(FWRITE, fp,
6668                                     (reqsize == 0), FALSE, FALSE, &clientid)) {
6669                                         rfs4_file_rele(fp);
6670                                         rfs4_rele_deleg_policy(nsrv4);
6671                                         VN_RELE(vp);
6672                                         *attrset = 0;
6673                                         return (NFS4ERR_DELAY);
6674                                 }
6675                                 rfs4_file_rele(fp);
6676                         }
6677                         rfs4_rele_deleg_policy(nsrv4);
6678 
6679                         if (nbl_need_check(vp)) {
6680                                 in_crit = 1;
6681 
6682                                 ASSERT(reqsize == 0);
6683 
6684                                 nbl_start_crit(vp, RW_READER);
6685                                 if (nbl_conflict(vp, NBL_WRITE, 0,
6686                                     cva.va_size, 0, NULL)) {
6687                                         in_crit = 0;
6688                                         nbl_end_crit(vp);
6689                                         VN_RELE(vp);
6690                                         *attrset = 0;
6691                                         return (NFS4ERR_ACCESS);
6692                                 }
6693                         }
6694                         ct.cc_sysid = 0;
6695                         ct.cc_pid = 0;
6696                         ct.cc_caller_id = nfs4_srv_caller_id;
6697                         ct.cc_flags = CC_DONTBLOCK;
6698 
6699                         cva.va_mask = AT_SIZE;
6700                         cva.va_size = reqsize;
6701                         (void) VOP_SETATTR(vp, &cva, 0, cs->cr, &ct);
6702                         if (in_crit)
6703                                 nbl_end_crit(vp);
6704                 }
6705         }
6706 
6707         error = makefh4(&cs->fh, vp, cs->exi);
6708 
6709         /*
6710          * Force modified data and metadata out to stable storage.
6711          */
6712         (void) VOP_FSYNC(vp, FNODSYNC, cs->cr, NULL);
6713 
6714         if (error) {
6715                 VN_RELE(vp);
6716                 *attrset = 0;
6717                 return (puterrno4(error));
6718         }
6719 
6720         /* if parent dir is attrdir, set namedattr fh flag */
6721         if (dvp->v_flag & V_XATTRDIR)
6722                 set_fh4_flag(&cs->fh, FH4_NAMEDATTR);
6723 
6724         if (cs->vp)
6725                 VN_RELE(cs->vp);
6726 
6727         cs->vp = vp;
6728 
6729         /*
6730          * if we did not create the file, we will need to check
6731          * the access bits on the file
6732          */
6733 
6734         if (!created) {
6735                 if (setsize)
6736                         args->share_access |= OPEN4_SHARE_ACCESS_WRITE;
6737                 status = check_open_access(args->share_access, cs, req);
6738                 if (status != NFS4_OK)
6739                         *attrset = 0;
6740         }
6741         return (status);
6742 }
6743 
6744 /*ARGSUSED*/
6745 static void
6746 rfs4_do_open(struct compound_state *cs, struct svc_req *req,
6747     rfs4_openowner_t *oo, delegreq_t deleg,
6748     uint32_t access, uint32_t deny,
6749     OPEN4res *resp, int deleg_cur)
6750 {
6751         /* XXX Currently not using req  */
6752         rfs4_state_t *sp;
6753         rfs4_file_t *fp;
6754         bool_t screate = TRUE;
6755         bool_t fcreate = TRUE;
6756         uint32_t open_a, share_a;
6757         uint32_t open_d, share_d;
6758         rfs4_deleg_state_t *dsp;
6759         sysid_t sysid;
6760         nfsstat4 status;
6761         caller_context_t ct;
6762         int fflags = 0;
6763         int recall = 0;
6764         int err;
6765         int first_open;
6766 
6767         /* get the file struct and hold a lock on it during initial open */
6768         fp = rfs4_findfile_withlock(cs->vp, &cs->fh, &fcreate);
6769         if (fp == NULL) {
6770                 resp->status = NFS4ERR_RESOURCE;
6771                 DTRACE_PROBE1(nfss__e__do__open1, nfsstat4, resp->status);
6772                 return;
6773         }
6774 
6775         sp = rfs4_findstate_by_owner_file(oo, fp, &screate);
6776         if (sp == NULL) {
6777                 resp->status = NFS4ERR_RESOURCE;
6778                 DTRACE_PROBE1(nfss__e__do__open2, nfsstat4, resp->status);
6779                 /* No need to keep any reference */
6780                 rw_exit(&fp->rf_file_rwlock);
6781                 rfs4_file_rele(fp);
6782                 return;
6783         }
6784 
6785         /* try to get the sysid before continuing */
6786         if ((status = rfs4_client_sysid(oo->ro_client, &sysid)) != NFS4_OK) {
6787                 resp->status = status;
6788                 rfs4_file_rele(fp);
6789                 /* Not a fully formed open; "close" it */
6790                 if (screate == TRUE)
6791                         rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6792                 rfs4_state_rele(sp);
6793                 return;
6794         }
6795 
6796         /* Calculate the fflags for this OPEN. */
6797         if (access & OPEN4_SHARE_ACCESS_READ)
6798                 fflags |= FREAD;
6799         if (access & OPEN4_SHARE_ACCESS_WRITE)
6800                 fflags |= FWRITE;
6801 
6802         rfs4_dbe_lock(sp->rs_dbe);
6803 
6804         /*
6805          * Calculate the new deny and access mode that this open is adding to
6806          * the file for this open owner;
6807          */
6808         open_d = (deny & ~sp->rs_open_deny);
6809         open_a = (access & ~sp->rs_open_access);
6810 
6811         /*
6812          * Calculate the new share access and share deny modes that this open
6813          * is adding to the file for this open owner;
6814          */
6815         share_a = (access & ~sp->rs_share_access);
6816         share_d = (deny & ~sp->rs_share_deny);
6817 
6818         first_open = (sp->rs_open_access & OPEN4_SHARE_ACCESS_BOTH) == 0;
6819 
6820         /*
6821          * Check to see the client has already sent an open for this
6822          * open owner on this file with the same share/deny modes.
6823          * If so, we don't need to check for a conflict and we don't
6824          * need to add another shrlock.  If not, then we need to
6825          * check for conflicts in deny and access before checking for
6826          * conflicts in delegation.  We don't want to recall a
6827          * delegation based on an open that will eventually fail based
6828          * on shares modes.
6829          */
6830 
6831         if (share_a || share_d) {
6832                 if ((err = rfs4_share(sp, access, deny)) != 0) {
6833                         rfs4_dbe_unlock(sp->rs_dbe);
6834                         resp->status = err;
6835 
6836                         rfs4_file_rele(fp);
6837                         /* Not a fully formed open; "close" it */
6838                         if (screate == TRUE)
6839                                 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6840                         rfs4_state_rele(sp);
6841                         return;
6842                 }
6843         }
6844 
6845         rfs4_dbe_lock(fp->rf_dbe);
6846 
6847         /*
6848          * Check to see if this file is delegated and if so, if a
6849          * recall needs to be done.
6850          */
6851         if (rfs4_check_recall(sp, access)) {
6852                 rfs4_dbe_unlock(fp->rf_dbe);
6853                 rfs4_dbe_unlock(sp->rs_dbe);
6854                 rfs4_recall_deleg(fp, FALSE, sp->rs_owner->ro_client);
6855                 delay(NFS4_DELEGATION_CONFLICT_DELAY);
6856                 rfs4_dbe_lock(sp->rs_dbe);
6857 
6858                 /* if state closed while lock was dropped */
6859                 if (sp->rs_closed) {
6860                         if (share_a || share_d)
6861                                 (void) rfs4_unshare(sp);
6862                         rfs4_dbe_unlock(sp->rs_dbe);
6863                         rfs4_file_rele(fp);
6864                         /* Not a fully formed open; "close" it */
6865                         if (screate == TRUE)
6866                                 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6867                         rfs4_state_rele(sp);
6868                         resp->status = NFS4ERR_OLD_STATEID;
6869                         return;
6870                 }
6871 
6872                 rfs4_dbe_lock(fp->rf_dbe);
6873                 /* Let's see if the delegation was returned */
6874                 if (rfs4_check_recall(sp, access)) {
6875                         rfs4_dbe_unlock(fp->rf_dbe);
6876                         if (share_a || share_d)
6877                                 (void) rfs4_unshare(sp);
6878                         rfs4_dbe_unlock(sp->rs_dbe);
6879                         rfs4_file_rele(fp);
6880                         rfs4_update_lease(sp->rs_owner->ro_client);
6881 
6882                         /* Not a fully formed open; "close" it */
6883                         if (screate == TRUE)
6884                                 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6885                         rfs4_state_rele(sp);
6886                         resp->status = NFS4ERR_DELAY;
6887                         return;
6888                 }
6889         }
6890         /*
6891          * the share check passed and any delegation conflict has been
6892          * taken care of, now call vop_open.
6893          * if this is the first open then call vop_open with fflags.
6894          * if not, call vn_open_upgrade with just the upgrade flags.
6895          *
6896          * if the file has been opened already, it will have the current
6897          * access mode in the state struct.  if it has no share access, then
6898          * this is a new open.
6899          *
6900          * However, if this is open with CLAIM_DLEGATE_CUR, then don't
6901          * call VOP_OPEN(), just do the open upgrade.
6902          */
6903         if (first_open && !deleg_cur) {
6904                 ct.cc_sysid = sysid;
6905                 ct.cc_pid = rfs4_dbe_getid(sp->rs_owner->ro_dbe);
6906                 ct.cc_caller_id = nfs4_srv_caller_id;
6907                 ct.cc_flags = CC_DONTBLOCK;
6908                 err = VOP_OPEN(&cs->vp, fflags, cs->cr, &ct);
6909                 if (err) {
6910                         rfs4_dbe_unlock(fp->rf_dbe);
6911                         if (share_a || share_d)
6912                                 (void) rfs4_unshare(sp);
6913                         rfs4_dbe_unlock(sp->rs_dbe);
6914                         rfs4_file_rele(fp);
6915 
6916                         /* Not a fully formed open; "close" it */
6917                         if (screate == TRUE)
6918                                 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6919                         rfs4_state_rele(sp);
6920                         /* check if a monitor detected a delegation conflict */
6921                         if (err == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK))
6922                                 resp->status = NFS4ERR_DELAY;
6923                         else
6924                                 resp->status = NFS4ERR_SERVERFAULT;
6925                         return;
6926                 }
6927         } else { /* open upgrade */
6928                 /*
6929                  * calculate the fflags for the new mode that is being added
6930                  * by this upgrade.
6931                  */
6932                 fflags = 0;
6933                 if (open_a & OPEN4_SHARE_ACCESS_READ)
6934                         fflags |= FREAD;
6935                 if (open_a & OPEN4_SHARE_ACCESS_WRITE)
6936                         fflags |= FWRITE;
6937                 vn_open_upgrade(cs->vp, fflags);
6938         }
6939         sp->rs_open_access |= access;
6940         sp->rs_open_deny |= deny;
6941 
6942         if (open_d & OPEN4_SHARE_DENY_READ)
6943                 fp->rf_deny_read++;
6944         if (open_d & OPEN4_SHARE_DENY_WRITE)
6945                 fp->rf_deny_write++;
6946         fp->rf_share_deny |= deny;
6947 
6948         if (open_a & OPEN4_SHARE_ACCESS_READ)
6949                 fp->rf_access_read++;
6950         if (open_a & OPEN4_SHARE_ACCESS_WRITE)
6951                 fp->rf_access_write++;
6952         fp->rf_share_access |= access;
6953 
6954         /*
6955          * Check for delegation here. if the deleg argument is not
6956          * DELEG_ANY, then this is a reclaim from a client and
6957          * we must honor the delegation requested. If necessary we can
6958          * set the recall flag.
6959          */
6960 
6961         dsp = rfs4_grant_delegation(deleg, sp, &recall);
6962 
6963         cs->deleg = (fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_WRITE);
6964 
6965         next_stateid(&sp->rs_stateid);
6966 
6967         resp->stateid = sp->rs_stateid.stateid;
6968 
6969         rfs4_dbe_unlock(fp->rf_dbe);
6970         rfs4_dbe_unlock(sp->rs_dbe);
6971 
6972         if (dsp) {
6973                 rfs4_set_deleg_response(dsp, &resp->delegation, NULL, recall);
6974                 rfs4_deleg_state_rele(dsp);
6975         }
6976 
6977         rfs4_file_rele(fp);
6978         rfs4_state_rele(sp);
6979 
6980         resp->status = NFS4_OK;
6981 }
6982 
6983 /*ARGSUSED*/
6984 static void
6985 rfs4_do_opennull(struct compound_state *cs, struct svc_req *req,
6986     OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
6987 {
6988         change_info4 *cinfo = &resp->cinfo;
6989         bitmap4 *attrset = &resp->attrset;
6990 
6991         if (args->opentype == OPEN4_NOCREATE)
6992                 resp->status = rfs4_lookupfile(&args->open_claim4_u.file,
6993                     req, cs, args->share_access, cinfo);
6994         else {
6995                 /* inhibit delegation grants during exclusive create */
6996 
6997                 if (args->mode == EXCLUSIVE4)
6998                         rfs4_disable_delegation();
6999 
7000                 resp->status = rfs4_createfile(args, req, cs, cinfo, attrset,
7001                     oo->ro_client->rc_clientid);
7002         }
7003 
7004         if (resp->status == NFS4_OK) {
7005 
7006                 /* cs->vp cs->fh now reference the desired file */
7007 
7008                 rfs4_do_open(cs, req, oo,
7009                     oo->ro_need_confirm ? DELEG_NONE : DELEG_ANY,
7010                     args->share_access, args->share_deny, resp, 0);
7011 
7012                 /*
7013                  * If rfs4_createfile set attrset, we must
7014                  * clear this attrset before the response is copied.
7015                  */
7016                 if (resp->status != NFS4_OK && resp->attrset) {
7017                         resp->attrset = 0;
7018                 }
7019         }
7020         else
7021                 *cs->statusp = resp->status;
7022 
7023         if (args->mode == EXCLUSIVE4)
7024                 rfs4_enable_delegation();
7025 }
7026 
7027 /*ARGSUSED*/
7028 static void
7029 rfs4_do_openprev(struct compound_state *cs, struct svc_req *req,
7030     OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
7031 {
7032         change_info4 *cinfo = &resp->cinfo;
7033         vattr_t va;
7034         vtype_t v_type = cs->vp->v_type;
7035         int error = 0;
7036 
7037         /* Verify that we have a regular file */
7038         if (v_type != VREG) {
7039                 if (v_type == VDIR)
7040                         resp->status = NFS4ERR_ISDIR;
7041                 else if (v_type == VLNK)
7042                         resp->status = NFS4ERR_SYMLINK;
7043                 else
7044                         resp->status = NFS4ERR_INVAL;
7045                 return;
7046         }
7047 
7048         va.va_mask = AT_MODE|AT_UID;
7049         error = VOP_GETATTR(cs->vp, &va, 0, cs->cr, NULL);
7050         if (error) {
7051                 resp->status = puterrno4(error);
7052                 return;
7053         }
7054 
7055         cs->mandlock = MANDLOCK(cs->vp, va.va_mode);
7056 
7057         /*
7058          * Check if we have access to the file, Note the the file
7059          * could have originally been open UNCHECKED or GUARDED
7060          * with mode bits that will now fail, but there is nothing
7061          * we can really do about that except in the case that the
7062          * owner of the file is the one requesting the open.
7063          */
7064         if (crgetuid(cs->cr) != va.va_uid) {
7065                 resp->status = check_open_access(args->share_access, cs, req);
7066                 if (resp->status != NFS4_OK) {
7067                         return;
7068                 }
7069         }
7070 
7071         /*
7072          * cinfo on a CLAIM_PREVIOUS is undefined, initialize to zero
7073          */
7074         cinfo->before = 0;
7075         cinfo->after = 0;
7076         cinfo->atomic = FALSE;
7077 
7078         rfs4_do_open(cs, req, oo,
7079             NFS4_DELEG4TYPE2REQTYPE(args->open_claim4_u.delegate_type),
7080             args->share_access, args->share_deny, resp, 0);
7081 }
7082 
7083 static void
7084 rfs4_do_opendelcur(struct compound_state *cs, struct svc_req *req,
7085     OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
7086 {
7087         int error;
7088         nfsstat4 status;
7089         stateid4 stateid =
7090             args->open_claim4_u.delegate_cur_info.delegate_stateid;
7091         rfs4_deleg_state_t *dsp;
7092 
7093         /*
7094          * Find the state info from the stateid and confirm that the
7095          * file is delegated.  If the state openowner is the same as
7096          * the supplied openowner we're done. If not, get the file
7097          * info from the found state info. Use that file info to
7098          * create the state for this lock owner. Note solaris doen't
7099          * really need the pathname to find the file. We may want to
7100          * lookup the pathname and make sure that the vp exist and
7101          * matches the vp in the file structure. However it is
7102          * possible that the pathname nolonger exists (local process
7103          * unlinks the file), so this may not be that useful.
7104          */
7105 
7106         status = rfs4_get_deleg_state(&stateid, &dsp);
7107         if (status != NFS4_OK) {
7108                 resp->status = status;
7109                 return;
7110         }
7111 
7112         ASSERT(dsp->rds_finfo->rf_dinfo.rd_dtype != OPEN_DELEGATE_NONE);
7113 
7114         /*
7115          * New lock owner, create state. Since this was probably called
7116          * in response to a CB_RECALL we set deleg to DELEG_NONE
7117          */
7118 
7119         ASSERT(cs->vp != NULL);
7120         VN_RELE(cs->vp);
7121         VN_HOLD(dsp->rds_finfo->rf_vp);
7122         cs->vp = dsp->rds_finfo->rf_vp;
7123 
7124         if (error = makefh4(&cs->fh, cs->vp, cs->exi)) {
7125                 rfs4_deleg_state_rele(dsp);
7126                 *cs->statusp = resp->status = puterrno4(error);
7127                 return;
7128         }
7129 
7130         /* Mark progress for delegation returns */
7131         dsp->rds_finfo->rf_dinfo.rd_time_lastwrite = gethrestime_sec();
7132         rfs4_deleg_state_rele(dsp);
7133         rfs4_do_open(cs, req, oo, DELEG_NONE,
7134             args->share_access, args->share_deny, resp, 1);
7135 }
7136 
7137 /*ARGSUSED*/
7138 static void
7139 rfs4_do_opendelprev(struct compound_state *cs, struct svc_req *req,
7140     OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
7141 {
7142         /*
7143          * Lookup the pathname, it must already exist since this file
7144          * was delegated.
7145          *
7146          * Find the file and state info for this vp and open owner pair.
7147          *      check that they are in fact delegated.
7148          *      check that the state access and deny modes are the same.
7149          *
7150          * Return the delgation possibly seting the recall flag.
7151          */
7152         rfs4_file_t *fp;
7153         rfs4_state_t *sp;
7154         bool_t create = FALSE;
7155         bool_t dcreate = FALSE;
7156         rfs4_deleg_state_t *dsp;
7157         nfsace4 *ace;
7158 
7159         /* Note we ignore oflags */
7160         resp->status = rfs4_lookupfile(&args->open_claim4_u.file_delegate_prev,
7161             req, cs, args->share_access, &resp->cinfo);
7162 
7163         if (resp->status != NFS4_OK) {
7164                 return;
7165         }
7166 
7167         /* get the file struct and hold a lock on it during initial open */
7168         fp = rfs4_findfile_withlock(cs->vp, NULL, &create);
7169         if (fp == NULL) {
7170                 resp->status = NFS4ERR_RESOURCE;
7171                 DTRACE_PROBE1(nfss__e__do_opendelprev1, nfsstat4, resp->status);
7172                 return;
7173         }
7174 
7175         sp = rfs4_findstate_by_owner_file(oo, fp, &create);
7176         if (sp == NULL) {
7177                 resp->status = NFS4ERR_SERVERFAULT;
7178                 DTRACE_PROBE1(nfss__e__do_opendelprev2, nfsstat4, resp->status);
7179                 rw_exit(&fp->rf_file_rwlock);
7180                 rfs4_file_rele(fp);
7181                 return;
7182         }
7183 
7184         rfs4_dbe_lock(sp->rs_dbe);
7185         rfs4_dbe_lock(fp->rf_dbe);
7186         if (args->share_access != sp->rs_share_access ||
7187             args->share_deny != sp->rs_share_deny ||
7188             sp->rs_finfo->rf_dinfo.rd_dtype == OPEN_DELEGATE_NONE) {
7189                 NFS4_DEBUG(rfs4_debug,
7190                     (CE_NOTE, "rfs4_do_opendelprev: state mixup"));
7191                 rfs4_dbe_unlock(fp->rf_dbe);
7192                 rfs4_dbe_unlock(sp->rs_dbe);
7193                 rfs4_file_rele(fp);
7194                 rfs4_state_rele(sp);
7195                 resp->status = NFS4ERR_SERVERFAULT;
7196                 return;
7197         }
7198         rfs4_dbe_unlock(fp->rf_dbe);
7199         rfs4_dbe_unlock(sp->rs_dbe);
7200 
7201         dsp = rfs4_finddeleg(sp, &dcreate);
7202         if (dsp == NULL) {
7203                 rfs4_state_rele(sp);
7204                 rfs4_file_rele(fp);
7205                 resp->status = NFS4ERR_SERVERFAULT;
7206                 return;
7207         }
7208 
7209         next_stateid(&sp->rs_stateid);
7210 
7211         resp->stateid = sp->rs_stateid.stateid;
7212 
7213         resp->delegation.delegation_type = dsp->rds_dtype;
7214 
7215         if (dsp->rds_dtype == OPEN_DELEGATE_READ) {
7216                 open_read_delegation4 *rv =
7217                     &resp->delegation.open_delegation4_u.read;
7218 
7219                 rv->stateid = dsp->rds_delegid.stateid;
7220                 rv->recall = FALSE; /* no policy in place to set to TRUE */
7221                 ace = &rv->permissions;
7222         } else {
7223                 open_write_delegation4 *rv =
7224                     &resp->delegation.open_delegation4_u.write;
7225 
7226                 rv->stateid = dsp->rds_delegid.stateid;
7227                 rv->recall = FALSE;  /* no policy in place to set to TRUE */
7228                 ace = &rv->permissions;
7229                 rv->space_limit.limitby = NFS_LIMIT_SIZE;
7230                 rv->space_limit.nfs_space_limit4_u.filesize = UINT64_MAX;
7231         }
7232 
7233         /* XXX For now */
7234         ace->type = ACE4_ACCESS_ALLOWED_ACE_TYPE;
7235         ace->flag = 0;
7236         ace->access_mask = 0;
7237         ace->who.utf8string_len = 0;
7238         ace->who.utf8string_val = 0;
7239 
7240         rfs4_deleg_state_rele(dsp);
7241         rfs4_state_rele(sp);
7242         rfs4_file_rele(fp);
7243 }
7244 
7245 typedef enum {
7246         NFS4_CHKSEQ_OKAY = 0,
7247         NFS4_CHKSEQ_REPLAY = 1,
7248         NFS4_CHKSEQ_BAD = 2
7249 } rfs4_chkseq_t;
7250 
7251 /*
7252  * Generic function for sequence number checks.
7253  */
7254 static rfs4_chkseq_t
7255 rfs4_check_seqid(seqid4 seqid, nfs_resop4 *lastop,
7256     seqid4 rqst_seq, nfs_resop4 *resop, bool_t copyres)
7257 {
7258         /* Same sequence ids and matching operations? */
7259         if (seqid == rqst_seq && resop->resop == lastop->resop) {
7260                 if (copyres == TRUE) {
7261                         rfs4_free_reply(resop);
7262                         rfs4_copy_reply(resop, lastop);
7263                 }
7264                 NFS4_DEBUG(rfs4_debug, (CE_NOTE,
7265                     "Replayed SEQID %d\n", seqid));
7266                 return (NFS4_CHKSEQ_REPLAY);
7267         }
7268 
7269         /* If the incoming sequence is not the next expected then it is bad */
7270         if (rqst_seq != seqid + 1) {
7271                 if (rqst_seq == seqid) {
7272                         NFS4_DEBUG(rfs4_debug,
7273                             (CE_NOTE, "BAD SEQID: Replayed sequence id "
7274                             "but last op was %d current op is %d\n",
7275                             lastop->resop, resop->resop));
7276                         return (NFS4_CHKSEQ_BAD);
7277                 }
7278                 NFS4_DEBUG(rfs4_debug,
7279                     (CE_NOTE, "BAD SEQID: got %u expecting %u\n",
7280                     rqst_seq, seqid));
7281                 return (NFS4_CHKSEQ_BAD);
7282         }
7283 
7284         /* Everything okay -- next expected */
7285         return (NFS4_CHKSEQ_OKAY);
7286 }
7287 
7288 
7289 static rfs4_chkseq_t
7290 rfs4_check_open_seqid(seqid4 seqid, rfs4_openowner_t *op, nfs_resop4 *resop)
7291 {
7292         rfs4_chkseq_t rc;
7293 
7294         rfs4_dbe_lock(op->ro_dbe);
7295         rc = rfs4_check_seqid(op->ro_open_seqid, &op->ro_reply, seqid, resop,
7296             TRUE);
7297         rfs4_dbe_unlock(op->ro_dbe);
7298 
7299         if (rc == NFS4_CHKSEQ_OKAY)
7300                 rfs4_update_lease(op->ro_client);
7301 
7302         return (rc);
7303 }
7304 
7305 static rfs4_chkseq_t
7306 rfs4_check_olo_seqid(seqid4 olo_seqid, rfs4_openowner_t *op, nfs_resop4 *resop)
7307 {
7308         rfs4_chkseq_t rc;
7309 
7310         rfs4_dbe_lock(op->ro_dbe);
7311         rc = rfs4_check_seqid(op->ro_open_seqid, &op->ro_reply,
7312             olo_seqid, resop, FALSE);
7313         rfs4_dbe_unlock(op->ro_dbe);
7314 
7315         return (rc);
7316 }
7317 
7318 static rfs4_chkseq_t
7319 rfs4_check_lock_seqid(seqid4 seqid, rfs4_lo_state_t *lsp, nfs_resop4 *resop)
7320 {
7321         rfs4_chkseq_t rc = NFS4_CHKSEQ_OKAY;
7322 
7323         rfs4_dbe_lock(lsp->rls_dbe);
7324         if (!lsp->rls_skip_seqid_check)
7325                 rc = rfs4_check_seqid(lsp->rls_seqid, &lsp->rls_reply, seqid,
7326                     resop, TRUE);
7327         rfs4_dbe_unlock(lsp->rls_dbe);
7328 
7329         return (rc);
7330 }
7331 
7332 static void
7333 rfs4_op_open(nfs_argop4 *argop, nfs_resop4 *resop,
7334     struct svc_req *req, struct compound_state *cs)
7335 {
7336         OPEN4args *args = &argop->nfs_argop4_u.opopen;
7337         OPEN4res *resp = &resop->nfs_resop4_u.opopen;
7338         open_owner4 *owner = &args->owner;
7339         open_claim_type4 claim = args->claim;
7340         rfs4_client_t *cp;
7341         rfs4_openowner_t *oo;
7342         bool_t create;
7343         bool_t replay = FALSE;
7344         int can_reclaim;
7345 
7346         DTRACE_NFSV4_2(op__open__start, struct compound_state *, cs,
7347             OPEN4args *, args);
7348 
7349         if (cs->vp == NULL) {
7350                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
7351                 goto end;
7352         }
7353 
7354         /*
7355          * Need to check clientid and lease expiration first based on
7356          * error ordering and incrementing sequence id.
7357          */
7358         cp = rfs4_findclient_by_id(owner->clientid, FALSE);
7359         if (cp == NULL) {
7360                 *cs->statusp = resp->status =
7361                     rfs4_check_clientid(&owner->clientid, 0);
7362                 goto end;
7363         }
7364 
7365         if (rfs4_lease_expired(cp)) {
7366                 rfs4_client_close(cp);
7367                 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
7368                 goto end;
7369         }
7370         can_reclaim = cp->rc_can_reclaim;
7371 
7372         /*
7373          * Find the open_owner for use from this point forward.  Take
7374          * care in updating the sequence id based on the type of error
7375          * being returned.
7376          */
7377 retry:
7378         create = TRUE;
7379         oo = rfs4_findopenowner(owner, &create, args->seqid);
7380         if (oo == NULL) {
7381                 *cs->statusp = resp->status = NFS4ERR_RESOURCE;
7382                 rfs4_client_rele(cp);
7383                 goto end;
7384         }
7385 
7386         /* Hold off access to the sequence space while the open is done */
7387         rfs4_sw_enter(&oo->ro_sw);
7388 
7389         /*
7390          * If the open_owner existed before at the server, then check
7391          * the sequence id.
7392          */
7393         if (!create && !oo->ro_postpone_confirm) {
7394                 switch (rfs4_check_open_seqid(args->seqid, oo, resop)) {
7395                 case NFS4_CHKSEQ_BAD:
7396                         if ((args->seqid > oo->ro_open_seqid) &&
7397                             oo->ro_need_confirm) {
7398                                 rfs4_free_opens(oo, TRUE, FALSE);
7399                                 rfs4_sw_exit(&oo->ro_sw);
7400                                 rfs4_openowner_rele(oo);
7401                                 goto retry;
7402                         }
7403                         resp->status = NFS4ERR_BAD_SEQID;
7404                         goto out;
7405                 case NFS4_CHKSEQ_REPLAY: /* replay of previous request */
7406                         replay = TRUE;
7407                         goto out;
7408                 default:
7409                         break;
7410                 }
7411 
7412                 /*
7413                  * Sequence was ok and open owner exists
7414                  * check to see if we have yet to see an
7415                  * open_confirm.
7416                  */
7417                 if (oo->ro_need_confirm) {
7418                         rfs4_free_opens(oo, TRUE, FALSE);
7419                         rfs4_sw_exit(&oo->ro_sw);
7420                         rfs4_openowner_rele(oo);
7421                         goto retry;
7422                 }
7423         }
7424         /* Grace only applies to regular-type OPENs */
7425         if (rfs4_clnt_in_grace(cp) &&
7426             (claim == CLAIM_NULL || claim == CLAIM_DELEGATE_CUR)) {
7427                 *cs->statusp = resp->status = NFS4ERR_GRACE;
7428                 goto out;
7429         }
7430 
7431         /*
7432          * If previous state at the server existed then can_reclaim
7433          * will be set. If not reply NFS4ERR_NO_GRACE to the
7434          * client.
7435          */
7436         if (rfs4_clnt_in_grace(cp) && claim == CLAIM_PREVIOUS && !can_reclaim) {
7437                 *cs->statusp = resp->status = NFS4ERR_NO_GRACE;
7438                 goto out;
7439         }
7440 
7441 
7442         /*
7443          * Reject the open if the client has missed the grace period
7444          */
7445         if (!rfs4_clnt_in_grace(cp) && claim == CLAIM_PREVIOUS) {
7446                 *cs->statusp = resp->status = NFS4ERR_NO_GRACE;
7447                 goto out;
7448         }
7449 
7450         /* Couple of up-front bookkeeping items */
7451         if (oo->ro_need_confirm) {
7452                 /*
7453                  * If this is a reclaim OPEN then we should not ask
7454                  * for a confirmation of the open_owner per the
7455                  * protocol specification.
7456                  */
7457                 if (claim == CLAIM_PREVIOUS)
7458                         oo->ro_need_confirm = FALSE;
7459                 else
7460                         resp->rflags |= OPEN4_RESULT_CONFIRM;
7461         }
7462         resp->rflags |= OPEN4_RESULT_LOCKTYPE_POSIX;
7463 
7464         /*
7465          * If there is an unshared filesystem mounted on this vnode,
7466          * do not allow to open/create in this directory.
7467          */
7468         if (vn_ismntpt(cs->vp)) {
7469                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
7470                 goto out;
7471         }
7472 
7473         /*
7474          * access must READ, WRITE, or BOTH.  No access is invalid.
7475          * deny can be READ, WRITE, BOTH, or NONE.
7476          * bits not defined for access/deny are invalid.
7477          */
7478         if (! (args->share_access & OPEN4_SHARE_ACCESS_BOTH) ||
7479             (args->share_access & ~OPEN4_SHARE_ACCESS_BOTH) ||
7480             (args->share_deny & ~OPEN4_SHARE_DENY_BOTH)) {
7481                 *cs->statusp = resp->status = NFS4ERR_INVAL;
7482                 goto out;
7483         }
7484 
7485 
7486         /*
7487          * make sure attrset is zero before response is built.
7488          */
7489         resp->attrset = 0;
7490 
7491         switch (claim) {
7492         case CLAIM_NULL:
7493                 rfs4_do_opennull(cs, req, args, oo, resp);
7494                 break;
7495         case CLAIM_PREVIOUS:
7496                 rfs4_do_openprev(cs, req, args, oo, resp);
7497                 break;
7498         case CLAIM_DELEGATE_CUR:
7499                 rfs4_do_opendelcur(cs, req, args, oo, resp);
7500                 break;
7501         case CLAIM_DELEGATE_PREV:
7502                 rfs4_do_opendelprev(cs, req, args, oo, resp);
7503                 break;
7504         default:
7505                 resp->status = NFS4ERR_INVAL;
7506                 break;
7507         }
7508 
7509 out:
7510         rfs4_client_rele(cp);
7511 
7512         /* Catch sequence id handling here to make it a little easier */
7513         switch (resp->status) {
7514         case NFS4ERR_BADXDR:
7515         case NFS4ERR_BAD_SEQID:
7516         case NFS4ERR_BAD_STATEID:
7517         case NFS4ERR_NOFILEHANDLE:
7518         case NFS4ERR_RESOURCE:
7519         case NFS4ERR_STALE_CLIENTID:
7520         case NFS4ERR_STALE_STATEID:
7521                 /*
7522                  * The protocol states that if any of these errors are
7523                  * being returned, the sequence id should not be
7524                  * incremented.  Any other return requires an
7525                  * increment.
7526                  */
7527                 break;
7528         default:
7529                 /* Always update the lease in this case */
7530                 rfs4_update_lease(oo->ro_client);
7531 
7532                 /* Regular response - copy the result */
7533                 if (!replay)
7534                         rfs4_update_open_resp(oo, resop, &cs->fh);
7535 
7536                 /*
7537                  * REPLAY case: Only if the previous response was OK
7538                  * do we copy the filehandle.  If not OK, no
7539                  * filehandle to copy.
7540                  */
7541                 if (replay == TRUE &&
7542                     resp->status == NFS4_OK &&
7543                     oo->ro_reply_fh.nfs_fh4_val) {
7544                         /*
7545                          * If this is a replay, we must restore the
7546                          * current filehandle/vp to that of what was
7547                          * returned originally.  Try our best to do
7548                          * it.
7549                          */
7550                         nfs_fh4_fmt_t *fh_fmtp =
7551                             (nfs_fh4_fmt_t *)oo->ro_reply_fh.nfs_fh4_val;
7552 
7553                         cs->exi = checkexport4(&fh_fmtp->fh4_fsid,
7554                             (fid_t *)&fh_fmtp->fh4_xlen, NULL);
7555 
7556                         if (cs->exi == NULL) {
7557                                 resp->status = NFS4ERR_STALE;
7558                                 goto finish;
7559                         }
7560 
7561                         VN_RELE(cs->vp);
7562 
7563                         cs->vp = nfs4_fhtovp(&oo->ro_reply_fh, cs->exi,
7564                             &resp->status);
7565 
7566                         if (cs->vp == NULL)
7567                                 goto finish;
7568 
7569                         nfs_fh4_copy(&oo->ro_reply_fh, &cs->fh);
7570                 }
7571 
7572                 /*
7573                  * If this was a replay, no need to update the
7574                  * sequence id. If the open_owner was not created on
7575                  * this pass, then update.  The first use of an
7576                  * open_owner will not bump the sequence id.
7577                  */
7578                 if (replay == FALSE && !create)
7579                         rfs4_update_open_sequence(oo);
7580                 /*
7581                  * If the client is receiving an error and the
7582                  * open_owner needs to be confirmed, there is no way
7583                  * to notify the client of this fact ignoring the fact
7584                  * that the server has no method of returning a
7585                  * stateid to confirm.  Therefore, the server needs to
7586                  * mark this open_owner in a way as to avoid the
7587                  * sequence id checking the next time the client uses
7588                  * this open_owner.
7589                  */
7590                 if (resp->status != NFS4_OK && oo->ro_need_confirm)
7591                         oo->ro_postpone_confirm = TRUE;
7592                 /*
7593                  * If OK response then clear the postpone flag and
7594                  * reset the sequence id to keep in sync with the
7595                  * client.
7596                  */
7597                 if (resp->status == NFS4_OK && oo->ro_postpone_confirm) {
7598                         oo->ro_postpone_confirm = FALSE;
7599                         oo->ro_open_seqid = args->seqid;
7600                 }
7601                 break;
7602         }
7603 
7604 finish:
7605         *cs->statusp = resp->status;
7606 
7607         rfs4_sw_exit(&oo->ro_sw);
7608         rfs4_openowner_rele(oo);
7609 
7610 end:
7611         DTRACE_NFSV4_2(op__open__done, struct compound_state *, cs,
7612             OPEN4res *, resp);
7613 }
7614 
7615 /*ARGSUSED*/
7616 void
7617 rfs4_op_open_confirm(nfs_argop4 *argop, nfs_resop4 *resop,
7618     struct svc_req *req, struct compound_state *cs)
7619 {
7620         OPEN_CONFIRM4args *args = &argop->nfs_argop4_u.opopen_confirm;
7621         OPEN_CONFIRM4res *resp = &resop->nfs_resop4_u.opopen_confirm;
7622         rfs4_state_t *sp;
7623         nfsstat4 status;
7624 
7625         DTRACE_NFSV4_2(op__open__confirm__start, struct compound_state *, cs,
7626             OPEN_CONFIRM4args *, args);
7627 
7628         if (cs->vp == NULL) {
7629                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
7630                 goto out;
7631         }
7632 
7633         if (cs->vp->v_type != VREG) {
7634                 *cs->statusp = resp->status =
7635                     cs->vp->v_type == VDIR ? NFS4ERR_ISDIR : NFS4ERR_INVAL;
7636                 return;
7637         }
7638 
7639         status = rfs4_get_state(&args->open_stateid, &sp, RFS4_DBS_VALID);
7640         if (status != NFS4_OK) {
7641                 *cs->statusp = resp->status = status;
7642                 goto out;
7643         }
7644 
7645         /* Ensure specified filehandle matches */
7646         if (cs->vp != sp->rs_finfo->rf_vp) {
7647                 rfs4_state_rele(sp);
7648                 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7649                 goto out;
7650         }
7651 
7652         /* hold off other access to open_owner while we tinker */
7653         rfs4_sw_enter(&sp->rs_owner->ro_sw);
7654 
7655         switch (rfs4_check_stateid_seqid(sp, &args->open_stateid)) {
7656         case NFS4_CHECK_STATEID_OKAY:
7657                 if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7658                     resop) != 0) {
7659                         *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7660                         break;
7661                 }
7662                 /*
7663                  * If it is the appropriate stateid and determined to
7664                  * be "OKAY" then this means that the stateid does not
7665                  * need to be confirmed and the client is in error for
7666                  * sending an OPEN_CONFIRM.
7667                  */
7668                 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7669                 break;
7670         case NFS4_CHECK_STATEID_OLD:
7671                 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7672                 break;
7673         case NFS4_CHECK_STATEID_BAD:
7674                 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7675                 break;
7676         case NFS4_CHECK_STATEID_EXPIRED:
7677                 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
7678                 break;
7679         case NFS4_CHECK_STATEID_CLOSED:
7680                 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7681                 break;
7682         case NFS4_CHECK_STATEID_REPLAY:
7683                 switch (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7684                     resop)) {
7685                 case NFS4_CHKSEQ_OKAY:
7686                         /*
7687                          * This is replayed stateid; if seqid matches
7688                          * next expected, then client is using wrong seqid.
7689                          */
7690                         /* fall through */
7691                 case NFS4_CHKSEQ_BAD:
7692                         *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7693                         break;
7694                 case NFS4_CHKSEQ_REPLAY:
7695                         /*
7696                          * Note this case is the duplicate case so
7697                          * resp->status is already set.
7698                          */
7699                         *cs->statusp = resp->status;
7700                         rfs4_update_lease(sp->rs_owner->ro_client);
7701                         break;
7702                 }
7703                 break;
7704         case NFS4_CHECK_STATEID_UNCONFIRMED:
7705                 if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7706                     resop) != NFS4_CHKSEQ_OKAY) {
7707                         *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7708                         break;
7709                 }
7710                 *cs->statusp = resp->status = NFS4_OK;
7711 
7712                 next_stateid(&sp->rs_stateid);
7713                 resp->open_stateid = sp->rs_stateid.stateid;
7714                 sp->rs_owner->ro_need_confirm = FALSE;
7715                 rfs4_update_lease(sp->rs_owner->ro_client);
7716                 rfs4_update_open_sequence(sp->rs_owner);
7717                 rfs4_update_open_resp(sp->rs_owner, resop, NULL);
7718                 break;
7719         default:
7720                 ASSERT(FALSE);
7721                 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
7722                 break;
7723         }
7724         rfs4_sw_exit(&sp->rs_owner->ro_sw);
7725         rfs4_state_rele(sp);
7726 
7727 out:
7728         DTRACE_NFSV4_2(op__open__confirm__done, struct compound_state *, cs,
7729             OPEN_CONFIRM4res *, resp);
7730 }
7731 
7732 /*ARGSUSED*/
7733 void
7734 rfs4_op_open_downgrade(nfs_argop4 *argop, nfs_resop4 *resop,
7735     struct svc_req *req, struct compound_state *cs)
7736 {
7737         OPEN_DOWNGRADE4args *args = &argop->nfs_argop4_u.opopen_downgrade;
7738         OPEN_DOWNGRADE4res *resp = &resop->nfs_resop4_u.opopen_downgrade;
7739         uint32_t access = args->share_access;
7740         uint32_t deny = args->share_deny;
7741         nfsstat4 status;
7742         rfs4_state_t *sp;
7743         rfs4_file_t *fp;
7744         int fflags = 0;
7745 
7746         DTRACE_NFSV4_2(op__open__downgrade__start, struct compound_state *, cs,
7747             OPEN_DOWNGRADE4args *, args);
7748 
7749         if (cs->vp == NULL) {
7750                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
7751                 goto out;
7752         }
7753 
7754         if (cs->vp->v_type != VREG) {
7755                 *cs->statusp = resp->status = NFS4ERR_INVAL;
7756                 return;
7757         }
7758 
7759         status = rfs4_get_state(&args->open_stateid, &sp, RFS4_DBS_VALID);
7760         if (status != NFS4_OK) {
7761                 *cs->statusp = resp->status = status;
7762                 goto out;
7763         }
7764 
7765         /* Ensure specified filehandle matches */
7766         if (cs->vp != sp->rs_finfo->rf_vp) {
7767                 rfs4_state_rele(sp);
7768                 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7769                 goto out;
7770         }
7771 
7772         /* hold off other access to open_owner while we tinker */
7773         rfs4_sw_enter(&sp->rs_owner->ro_sw);
7774 
7775         switch (rfs4_check_stateid_seqid(sp, &args->open_stateid)) {
7776         case NFS4_CHECK_STATEID_OKAY:
7777                 if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7778                     resop) != NFS4_CHKSEQ_OKAY) {
7779                         *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7780                         goto end;
7781                 }
7782                 break;
7783         case NFS4_CHECK_STATEID_OLD:
7784                 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7785                 goto end;
7786         case NFS4_CHECK_STATEID_BAD:
7787                 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7788                 goto end;
7789         case NFS4_CHECK_STATEID_EXPIRED:
7790                 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
7791                 goto end;
7792         case NFS4_CHECK_STATEID_CLOSED:
7793                 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7794                 goto end;
7795         case NFS4_CHECK_STATEID_UNCONFIRMED:
7796                 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7797                 goto end;
7798         case NFS4_CHECK_STATEID_REPLAY:
7799                 /* Check the sequence id for the open owner */
7800                 switch (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7801                     resop)) {
7802                 case NFS4_CHKSEQ_OKAY:
7803                         /*
7804                          * This is replayed stateid; if seqid matches
7805                          * next expected, then client is using wrong seqid.
7806                          */
7807                         /* fall through */
7808                 case NFS4_CHKSEQ_BAD:
7809                         *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7810                         goto end;
7811                 case NFS4_CHKSEQ_REPLAY:
7812                         /*
7813                          * Note this case is the duplicate case so
7814                          * resp->status is already set.
7815                          */
7816                         *cs->statusp = resp->status;
7817                         rfs4_update_lease(sp->rs_owner->ro_client);
7818                         goto end;
7819                 }
7820                 break;
7821         default:
7822                 ASSERT(FALSE);
7823                 break;
7824         }
7825 
7826         rfs4_dbe_lock(sp->rs_dbe);
7827         /*
7828          * Check that the new access modes and deny modes are valid.
7829          * Check that no invalid bits are set.
7830          */
7831         if ((access & ~(OPEN4_SHARE_ACCESS_READ | OPEN4_SHARE_ACCESS_WRITE)) ||
7832             (deny & ~(OPEN4_SHARE_DENY_READ | OPEN4_SHARE_DENY_WRITE))) {
7833                 *cs->statusp = resp->status = NFS4ERR_INVAL;
7834                 rfs4_update_open_sequence(sp->rs_owner);
7835                 rfs4_dbe_unlock(sp->rs_dbe);
7836                 goto end;
7837         }
7838 
7839         /*
7840          * The new modes must be a subset of the current modes and
7841          * the access must specify at least one mode. To test that
7842          * the new mode is a subset of the current modes we bitwise
7843          * AND them together and check that the result equals the new
7844          * mode. For example:
7845          * New mode, access == R and current mode, sp->rs_open_access  == RW
7846          * access & sp->rs_open_access == R == access, so the new access mode
7847          * is valid. Consider access == RW, sp->rs_open_access = R
7848          * access & sp->rs_open_access == R != access, so the new access mode
7849          * is invalid.
7850          */
7851         if ((access & sp->rs_open_access) != access ||
7852             (deny & sp->rs_open_deny) != deny ||
7853             (access &
7854             (OPEN4_SHARE_ACCESS_READ | OPEN4_SHARE_ACCESS_WRITE)) == 0) {
7855                 *cs->statusp = resp->status = NFS4ERR_INVAL;
7856                 rfs4_update_open_sequence(sp->rs_owner);
7857                 rfs4_dbe_unlock(sp->rs_dbe);
7858                 goto end;
7859         }
7860 
7861         /*
7862          * Release any share locks associated with this stateID.
7863          * Strictly speaking, this violates the spec because the
7864          * spec effectively requires that open downgrade be atomic.
7865          * At present, fs_shrlock does not have this capability.
7866          */
7867         (void) rfs4_unshare(sp);
7868 
7869         status = rfs4_share(sp, access, deny);
7870         if (status != NFS4_OK) {
7871                 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
7872                 rfs4_update_open_sequence(sp->rs_owner);
7873                 rfs4_dbe_unlock(sp->rs_dbe);
7874                 goto end;
7875         }
7876 
7877         fp = sp->rs_finfo;
7878         rfs4_dbe_lock(fp->rf_dbe);
7879 
7880         /*
7881          * If the current mode has deny read and the new mode
7882          * does not, decrement the number of deny read mode bits
7883          * and if it goes to zero turn off the deny read bit
7884          * on the file.
7885          */
7886         if ((sp->rs_open_deny & OPEN4_SHARE_DENY_READ) &&
7887             (deny & OPEN4_SHARE_DENY_READ) == 0) {
7888                 fp->rf_deny_read--;
7889                 if (fp->rf_deny_read == 0)
7890                         fp->rf_share_deny &= ~OPEN4_SHARE_DENY_READ;
7891         }
7892 
7893         /*
7894          * If the current mode has deny write and the new mode
7895          * does not, decrement the number of deny write mode bits
7896          * and if it goes to zero turn off the deny write bit
7897          * on the file.
7898          */
7899         if ((sp->rs_open_deny & OPEN4_SHARE_DENY_WRITE) &&
7900             (deny & OPEN4_SHARE_DENY_WRITE) == 0) {
7901                 fp->rf_deny_write--;
7902                 if (fp->rf_deny_write == 0)
7903                         fp->rf_share_deny &= ~OPEN4_SHARE_DENY_WRITE;
7904         }
7905 
7906         /*
7907          * If the current mode has access read and the new mode
7908          * does not, decrement the number of access read mode bits
7909          * and if it goes to zero turn off the access read bit
7910          * on the file.  set fflags to FREAD for the call to
7911          * vn_open_downgrade().
7912          */
7913         if ((sp->rs_open_access & OPEN4_SHARE_ACCESS_READ) &&
7914             (access & OPEN4_SHARE_ACCESS_READ) == 0) {
7915                 fp->rf_access_read--;
7916                 if (fp->rf_access_read == 0)
7917                         fp->rf_share_access &= ~OPEN4_SHARE_ACCESS_READ;
7918                 fflags |= FREAD;
7919         }
7920 
7921         /*
7922          * If the current mode has access write and the new mode
7923          * does not, decrement the number of access write mode bits
7924          * and if it goes to zero turn off the access write bit
7925          * on the file.  set fflags to FWRITE for the call to
7926          * vn_open_downgrade().
7927          */
7928         if ((sp->rs_open_access & OPEN4_SHARE_ACCESS_WRITE) &&
7929             (access & OPEN4_SHARE_ACCESS_WRITE) == 0) {
7930                 fp->rf_access_write--;
7931                 if (fp->rf_access_write == 0)
7932                         fp->rf_share_deny &= ~OPEN4_SHARE_ACCESS_WRITE;
7933                 fflags |= FWRITE;
7934         }
7935 
7936         /* Check that the file is still accessible */
7937         ASSERT(fp->rf_share_access);
7938 
7939         rfs4_dbe_unlock(fp->rf_dbe);
7940 
7941         /* now set the new open access and deny modes */
7942         sp->rs_open_access = access;
7943         sp->rs_open_deny = deny;
7944 
7945         /*
7946          * we successfully downgraded the share lock, now we need to downgrade
7947          * the open. it is possible that the downgrade was only for a deny
7948          * mode and we have nothing else to do.
7949          */
7950         if ((fflags & (FREAD|FWRITE)) != 0)
7951                 vn_open_downgrade(cs->vp, fflags);
7952 
7953         /* Update the stateid */
7954         next_stateid(&sp->rs_stateid);
7955         resp->open_stateid = sp->rs_stateid.stateid;
7956 
7957         rfs4_dbe_unlock(sp->rs_dbe);
7958 
7959         *cs->statusp = resp->status = NFS4_OK;
7960         /* Update the lease */
7961         rfs4_update_lease(sp->rs_owner->ro_client);
7962         /* And the sequence */
7963         rfs4_update_open_sequence(sp->rs_owner);
7964         rfs4_update_open_resp(sp->rs_owner, resop, NULL);
7965 
7966 end:
7967         rfs4_sw_exit(&sp->rs_owner->ro_sw);
7968         rfs4_state_rele(sp);
7969 out:
7970         DTRACE_NFSV4_2(op__open__downgrade__done, struct compound_state *, cs,
7971             OPEN_DOWNGRADE4res *, resp);
7972 }
7973 
7974 static void *
7975 memstr(const void *s1, const char *s2, size_t n)
7976 {
7977         size_t l = strlen(s2);
7978         char *p = (char *)s1;
7979 
7980         while (n >= l) {
7981                 if (bcmp(p, s2, l) == 0)
7982                         return (p);
7983                 p++;
7984                 n--;
7985         }
7986 
7987         return (NULL);
7988 }
7989 
7990 /*
7991  * The logic behind this function is detailed in the NFSv4 RFC in the
7992  * SETCLIENTID operation description under IMPLEMENTATION.  Refer to
7993  * that section for explicit guidance to server behavior for
7994  * SETCLIENTID.
7995  */
7996 void
7997 rfs4_op_setclientid(nfs_argop4 *argop, nfs_resop4 *resop,
7998     struct svc_req *req, struct compound_state *cs)
7999 {
8000         SETCLIENTID4args *args = &argop->nfs_argop4_u.opsetclientid;
8001         SETCLIENTID4res *res = &resop->nfs_resop4_u.opsetclientid;
8002         rfs4_client_t *cp, *newcp, *cp_confirmed, *cp_unconfirmed;
8003         rfs4_clntip_t *ci;
8004         bool_t create;
8005         char *addr, *netid;
8006         int len;
8007 
8008         DTRACE_NFSV4_2(op__setclientid__start, struct compound_state *, cs,
8009             SETCLIENTID4args *, args);
8010 retry:
8011         newcp = cp_confirmed = cp_unconfirmed = NULL;
8012 
8013         /*
8014          * Save the caller's IP address
8015          */
8016         args->client.cl_addr =
8017             (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
8018 
8019         /*
8020          * Record if it is a Solaris client that cannot handle referrals.
8021          */
8022         if (memstr(args->client.id_val, "Solaris", args->client.id_len) &&
8023             !memstr(args->client.id_val, "+referrals", args->client.id_len)) {
8024                 /* Add a "yes, it's downrev" record */
8025                 create = TRUE;
8026                 ci = rfs4_find_clntip(args->client.cl_addr, &create);
8027                 ASSERT(ci != NULL);
8028                 rfs4_dbe_rele(ci->ri_dbe);
8029         } else {
8030                 /* Remove any previous record */
8031                 rfs4_invalidate_clntip(args->client.cl_addr);
8032         }
8033 
8034         /*
8035          * In search of an EXISTING client matching the incoming
8036          * request to establish a new client identifier at the server
8037          */
8038         create = TRUE;
8039         cp = rfs4_findclient(&args->client, &create, NULL);
8040 
8041         /* Should never happen */
8042         ASSERT(cp != NULL);
8043 
8044         if (cp == NULL) {
8045                 *cs->statusp = res->status = NFS4ERR_SERVERFAULT;
8046                 goto out;
8047         }
8048 
8049         /*
8050          * Easiest case. Client identifier is newly created and is
8051          * unconfirmed.  Also note that for this case, no other
8052          * entries exist for the client identifier.  Nothing else to
8053          * check.  Just setup the response and respond.
8054          */
8055         if (create) {
8056                 *cs->statusp = res->status = NFS4_OK;
8057                 res->SETCLIENTID4res_u.resok4.clientid = cp->rc_clientid;
8058                 res->SETCLIENTID4res_u.resok4.setclientid_confirm =
8059                     cp->rc_confirm_verf;
8060                 /* Setup callback information; CB_NULL confirmation later */
8061                 rfs4_client_setcb(cp, &args->callback, args->callback_ident);
8062 
8063                 rfs4_client_rele(cp);
8064                 goto out;
8065         }
8066 
8067         /*
8068          * An existing, confirmed client may exist but it may not have
8069          * been active for at least one lease period.  If so, then
8070          * "close" the client and create a new client identifier
8071          */
8072         if (rfs4_lease_expired(cp)) {
8073                 rfs4_client_close(cp);
8074                 goto retry;
8075         }
8076 
8077         if (cp->rc_need_confirm == TRUE)
8078                 cp_unconfirmed = cp;
8079         else
8080                 cp_confirmed = cp;
8081 
8082         cp = NULL;
8083 
8084         /*
8085          * We have a confirmed client, now check for an
8086          * unconfimred entry
8087          */
8088         if (cp_confirmed) {
8089                 /* If creds don't match then client identifier is inuse */
8090                 if (!creds_ok(cp_confirmed->rc_cr_set, req, cs)) {
8091                         rfs4_cbinfo_t *cbp;
8092                         /*
8093                          * Some one else has established this client
8094                          * id. Try and say * who they are. We will use
8095                          * the call back address supplied by * the
8096                          * first client.
8097                          */
8098                         *cs->statusp = res->status = NFS4ERR_CLID_INUSE;
8099 
8100                         addr = netid = NULL;
8101 
8102                         cbp = &cp_confirmed->rc_cbinfo;
8103                         if (cbp->cb_callback.cb_location.r_addr &&
8104                             cbp->cb_callback.cb_location.r_netid) {
8105                                 cb_client4 *cbcp = &cbp->cb_callback;
8106 
8107                                 len = strlen(cbcp->cb_location.r_addr)+1;
8108                                 addr = kmem_alloc(len, KM_SLEEP);
8109                                 bcopy(cbcp->cb_location.r_addr, addr, len);
8110                                 len = strlen(cbcp->cb_location.r_netid)+1;
8111                                 netid = kmem_alloc(len, KM_SLEEP);
8112                                 bcopy(cbcp->cb_location.r_netid, netid, len);
8113                         }
8114 
8115                         res->SETCLIENTID4res_u.client_using.r_addr = addr;
8116                         res->SETCLIENTID4res_u.client_using.r_netid = netid;
8117 
8118                         rfs4_client_rele(cp_confirmed);
8119                 }
8120 
8121                 /*
8122                  * Confirmed, creds match, and verifier matches; must
8123                  * be an update of the callback info
8124                  */
8125                 if (cp_confirmed->rc_nfs_client.verifier ==
8126                     args->client.verifier) {
8127                         /* Setup callback information */
8128                         rfs4_client_setcb(cp_confirmed, &args->callback,
8129                             args->callback_ident);
8130 
8131                         /* everything okay -- move ahead */
8132                         *cs->statusp = res->status = NFS4_OK;
8133                         res->SETCLIENTID4res_u.resok4.clientid =
8134                             cp_confirmed->rc_clientid;
8135 
8136                         /* update the confirm_verifier and return it */
8137                         rfs4_client_scv_next(cp_confirmed);
8138                         res->SETCLIENTID4res_u.resok4.setclientid_confirm =
8139                             cp_confirmed->rc_confirm_verf;
8140 
8141                         rfs4_client_rele(cp_confirmed);
8142                         goto out;
8143                 }
8144 
8145                 /*
8146                  * Creds match but the verifier doesn't.  Must search
8147                  * for an unconfirmed client that would be replaced by
8148                  * this request.
8149                  */
8150                 create = FALSE;
8151                 cp_unconfirmed = rfs4_findclient(&args->client, &create,
8152                     cp_confirmed);
8153         }
8154 
8155         /*
8156          * At this point, we have taken care of the brand new client
8157          * struct, INUSE case, update of an existing, and confirmed
8158          * client struct.
8159          */
8160 
8161         /*
8162          * check to see if things have changed while we originally
8163          * picked up the client struct.  If they have, then return and
8164          * retry the processing of this SETCLIENTID request.
8165          */
8166         if (cp_unconfirmed) {
8167                 rfs4_dbe_lock(cp_unconfirmed->rc_dbe);
8168                 if (!cp_unconfirmed->rc_need_confirm) {
8169                         rfs4_dbe_unlock(cp_unconfirmed->rc_dbe);
8170                         rfs4_client_rele(cp_unconfirmed);
8171                         if (cp_confirmed)
8172                                 rfs4_client_rele(cp_confirmed);
8173                         goto retry;
8174                 }
8175                 /* do away with the old unconfirmed one */
8176                 rfs4_dbe_invalidate(cp_unconfirmed->rc_dbe);
8177                 rfs4_dbe_unlock(cp_unconfirmed->rc_dbe);
8178                 rfs4_client_rele(cp_unconfirmed);
8179                 cp_unconfirmed = NULL;
8180         }
8181 
8182         /*
8183          * This search will temporarily hide the confirmed client
8184          * struct while a new client struct is created as the
8185          * unconfirmed one.
8186          */
8187         create = TRUE;
8188         newcp = rfs4_findclient(&args->client, &create, cp_confirmed);
8189 
8190         ASSERT(newcp != NULL);
8191 
8192         if (newcp == NULL) {
8193                 *cs->statusp = res->status = NFS4ERR_SERVERFAULT;
8194                 rfs4_client_rele(cp_confirmed);
8195                 goto out;
8196         }
8197 
8198         /*
8199          * If one was not created, then a similar request must be in
8200          * process so release and start over with this one
8201          */
8202         if (create != TRUE) {
8203                 rfs4_client_rele(newcp);
8204                 if (cp_confirmed)
8205                         rfs4_client_rele(cp_confirmed);
8206                 goto retry;
8207         }
8208 
8209         *cs->statusp = res->status = NFS4_OK;
8210         res->SETCLIENTID4res_u.resok4.clientid = newcp->rc_clientid;
8211         res->SETCLIENTID4res_u.resok4.setclientid_confirm =
8212             newcp->rc_confirm_verf;
8213         /* Setup callback information; CB_NULL confirmation later */
8214         rfs4_client_setcb(newcp, &args->callback, args->callback_ident);
8215 
8216         newcp->rc_cp_confirmed = cp_confirmed;
8217 
8218         rfs4_client_rele(newcp);
8219 
8220 out:
8221         DTRACE_NFSV4_2(op__setclientid__done, struct compound_state *, cs,
8222             SETCLIENTID4res *, res);
8223 }
8224 
8225 /*ARGSUSED*/
8226 void
8227 rfs4_op_setclientid_confirm(nfs_argop4 *argop, nfs_resop4 *resop,
8228     struct svc_req *req, struct compound_state *cs)
8229 {
8230         SETCLIENTID_CONFIRM4args *args =
8231             &argop->nfs_argop4_u.opsetclientid_confirm;
8232         SETCLIENTID_CONFIRM4res *res =
8233             &resop->nfs_resop4_u.opsetclientid_confirm;
8234         rfs4_client_t *cp, *cptoclose = NULL;
8235         nfs4_srv_t *nsrv4;
8236 
8237         DTRACE_NFSV4_2(op__setclientid__confirm__start,
8238             struct compound_state *, cs,
8239             SETCLIENTID_CONFIRM4args *, args);
8240 
8241         nsrv4 = nfs4_get_srv();
8242         *cs->statusp = res->status = NFS4_OK;
8243 
8244         cp = rfs4_findclient_by_id(args->clientid, TRUE);
8245 
8246         if (cp == NULL) {
8247                 *cs->statusp = res->status =
8248                     rfs4_check_clientid(&args->clientid, 1);
8249                 goto out;
8250         }
8251 
8252         if (!creds_ok(cp, req, cs)) {
8253                 *cs->statusp = res->status = NFS4ERR_CLID_INUSE;
8254                 rfs4_client_rele(cp);
8255                 goto out;
8256         }
8257 
8258         /* If the verifier doesn't match, the record doesn't match */
8259         if (cp->rc_confirm_verf != args->setclientid_confirm) {
8260                 *cs->statusp = res->status = NFS4ERR_STALE_CLIENTID;
8261                 rfs4_client_rele(cp);
8262                 goto out;
8263         }
8264 
8265         rfs4_dbe_lock(cp->rc_dbe);
8266         cp->rc_need_confirm = FALSE;
8267         if (cp->rc_cp_confirmed) {
8268                 cptoclose = cp->rc_cp_confirmed;
8269                 cptoclose->rc_ss_remove = 1;
8270                 cp->rc_cp_confirmed = NULL;
8271         }
8272 
8273         /*
8274          * Update the client's associated server instance, if it's changed
8275          * since the client was created.
8276          */
8277         if (rfs4_servinst(cp) != nsrv4->nfs4_cur_servinst)
8278                 rfs4_servinst_assign(nsrv4, cp, nsrv4->nfs4_cur_servinst);
8279 
8280         /*
8281          * Record clientid in stable storage.
8282          * Must be done after server instance has been assigned.
8283          */
8284         rfs4_ss_clid(nsrv4, cp);
8285 
8286         rfs4_dbe_unlock(cp->rc_dbe);
8287 
8288         if (cptoclose)
8289                 /* don't need to rele, client_close does it */
8290                 rfs4_client_close(cptoclose);
8291 
8292         /* If needed, initiate CB_NULL call for callback path */
8293         rfs4_deleg_cb_check(cp);
8294         rfs4_update_lease(cp);
8295 
8296         /*
8297          * Check to see if client can perform reclaims
8298          */
8299         rfs4_ss_chkclid(nsrv4, cp);
8300 
8301         rfs4_client_rele(cp);
8302 
8303 out:
8304         DTRACE_NFSV4_2(op__setclientid__confirm__done,
8305             struct compound_state *, cs,
8306             SETCLIENTID_CONFIRM4 *, res);
8307 }
8308 
8309 
8310 /*ARGSUSED*/
8311 void
8312 rfs4_op_close(nfs_argop4 *argop, nfs_resop4 *resop,
8313     struct svc_req *req, struct compound_state *cs)
8314 {
8315         CLOSE4args *args = &argop->nfs_argop4_u.opclose;
8316         CLOSE4res *resp = &resop->nfs_resop4_u.opclose;
8317         rfs4_state_t *sp;
8318         nfsstat4 status;
8319 
8320         DTRACE_NFSV4_2(op__close__start, struct compound_state *, cs,
8321             CLOSE4args *, args);
8322 
8323         if (cs->vp == NULL) {
8324                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
8325                 goto out;
8326         }
8327 
8328         status = rfs4_get_state(&args->open_stateid, &sp, RFS4_DBS_INVALID);
8329         if (status != NFS4_OK) {
8330                 *cs->statusp = resp->status = status;
8331                 goto out;
8332         }
8333 
8334         /* Ensure specified filehandle matches */
8335         if (cs->vp != sp->rs_finfo->rf_vp) {
8336                 rfs4_state_rele(sp);
8337                 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8338                 goto out;
8339         }
8340 
8341         /* hold off other access to open_owner while we tinker */
8342         rfs4_sw_enter(&sp->rs_owner->ro_sw);
8343 
8344         switch (rfs4_check_stateid_seqid(sp, &args->open_stateid)) {
8345         case NFS4_CHECK_STATEID_OKAY:
8346                 if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
8347                     resop) != NFS4_CHKSEQ_OKAY) {
8348                         *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8349                         goto end;
8350                 }
8351                 break;
8352         case NFS4_CHECK_STATEID_OLD:
8353                 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8354                 goto end;
8355         case NFS4_CHECK_STATEID_BAD:
8356                 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8357                 goto end;
8358         case NFS4_CHECK_STATEID_EXPIRED:
8359                 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
8360                 goto end;
8361         case NFS4_CHECK_STATEID_CLOSED:
8362                 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8363                 goto end;
8364         case NFS4_CHECK_STATEID_UNCONFIRMED:
8365                 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8366                 goto end;
8367         case NFS4_CHECK_STATEID_REPLAY:
8368                 /* Check the sequence id for the open owner */
8369                 switch (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
8370                     resop)) {
8371                 case NFS4_CHKSEQ_OKAY:
8372                         /*
8373                          * This is replayed stateid; if seqid matches
8374                          * next expected, then client is using wrong seqid.
8375                          */
8376                         /* FALL THROUGH */
8377                 case NFS4_CHKSEQ_BAD:
8378                         *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8379                         goto end;
8380                 case NFS4_CHKSEQ_REPLAY:
8381                         /*
8382                          * Note this case is the duplicate case so
8383                          * resp->status is already set.
8384                          */
8385                         *cs->statusp = resp->status;
8386                         rfs4_update_lease(sp->rs_owner->ro_client);
8387                         goto end;
8388                 }
8389                 break;
8390         default:
8391                 ASSERT(FALSE);
8392                 break;
8393         }
8394 
8395         rfs4_dbe_lock(sp->rs_dbe);
8396 
8397         /* Update the stateid. */
8398         next_stateid(&sp->rs_stateid);
8399         resp->open_stateid = sp->rs_stateid.stateid;
8400 
8401         rfs4_dbe_unlock(sp->rs_dbe);
8402 
8403         rfs4_update_lease(sp->rs_owner->ro_client);
8404         rfs4_update_open_sequence(sp->rs_owner);
8405         rfs4_update_open_resp(sp->rs_owner, resop, NULL);
8406 
8407         rfs4_state_close(sp, FALSE, FALSE, cs->cr);
8408 
8409         *cs->statusp = resp->status = status;
8410 
8411 end:
8412         rfs4_sw_exit(&sp->rs_owner->ro_sw);
8413         rfs4_state_rele(sp);
8414 out:
8415         DTRACE_NFSV4_2(op__close__done, struct compound_state *, cs,
8416             CLOSE4res *, resp);
8417 }
8418 
8419 /*
8420  * Manage the counts on the file struct and close all file locks
8421  */
8422 /*ARGSUSED*/
8423 void
8424 rfs4_release_share_lock_state(rfs4_state_t *sp, cred_t *cr,
8425     bool_t close_of_client)
8426 {
8427         rfs4_file_t *fp = sp->rs_finfo;
8428         rfs4_lo_state_t *lsp;
8429         int fflags = 0;
8430 
8431         /*
8432          * If this call is part of the larger closing down of client
8433          * state then it is just easier to release all locks
8434          * associated with this client instead of going through each
8435          * individual file and cleaning locks there.
8436          */
8437         if (close_of_client) {
8438                 if (sp->rs_owner->ro_client->rc_unlksys_completed == FALSE &&
8439                     !list_is_empty(&sp->rs_lostatelist) &&
8440                     sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID) {
8441                         /* Is the PxFS kernel module loaded? */
8442                         if (lm_remove_file_locks != NULL) {
8443                                 int new_sysid;
8444 
8445                                 /* Encode the cluster nodeid in new sysid */
8446                                 new_sysid = sp->rs_owner->ro_client->rc_sysidt;
8447                                 lm_set_nlmid_flk(&new_sysid);
8448 
8449                                 /*
8450                                  * This PxFS routine removes file locks for a
8451                                  * client over all nodes of a cluster.
8452                                  */
8453                                 NFS4_DEBUG(rfs4_debug, (CE_NOTE,
8454                                     "lm_remove_file_locks(sysid=0x%x)\n",
8455                                     new_sysid));
8456                                 (*lm_remove_file_locks)(new_sysid);
8457                         } else {
8458                                 struct flock64 flk;
8459 
8460                                 /* Release all locks for this client */
8461                                 flk.l_type = F_UNLKSYS;
8462                                 flk.l_whence = 0;
8463                                 flk.l_start = 0;
8464                                 flk.l_len = 0;
8465                                 flk.l_sysid =
8466                                     sp->rs_owner->ro_client->rc_sysidt;
8467                                 flk.l_pid = 0;
8468                                 (void) VOP_FRLOCK(sp->rs_finfo->rf_vp, F_SETLK,
8469                                     &flk, F_REMOTELOCK | FREAD | FWRITE,
8470                                     (u_offset_t)0, NULL, CRED(), NULL);
8471                         }
8472 
8473                         sp->rs_owner->ro_client->rc_unlksys_completed = TRUE;
8474                 }
8475         }
8476 
8477         /*
8478          * Release all locks on this file by this lock owner or at
8479          * least mark the locks as having been released
8480          */
8481         for (lsp = list_head(&sp->rs_lostatelist); lsp != NULL;
8482             lsp = list_next(&sp->rs_lostatelist, lsp)) {
8483                 lsp->rls_locks_cleaned = TRUE;
8484 
8485                 /* Was this already taken care of above? */
8486                 if (!close_of_client &&
8487                     sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID)
8488                         (void) cleanlocks(sp->rs_finfo->rf_vp,
8489                             lsp->rls_locker->rl_pid,
8490                             lsp->rls_locker->rl_client->rc_sysidt);
8491         }
8492 
8493         /*
8494          * Release any shrlocks associated with this open state ID.
8495          * This must be done before the rfs4_state gets marked closed.
8496          */
8497         if (sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID)
8498                 (void) rfs4_unshare(sp);
8499 
8500         if (sp->rs_open_access) {
8501                 rfs4_dbe_lock(fp->rf_dbe);
8502 
8503                 /*
8504                  * Decrement the count for each access and deny bit that this
8505                  * state has contributed to the file.
8506                  * If the file counts go to zero
8507                  * clear the appropriate bit in the appropriate mask.
8508                  */
8509                 if (sp->rs_open_access & OPEN4_SHARE_ACCESS_READ) {
8510                         fp->rf_access_read--;
8511                         fflags |= FREAD;
8512                         if (fp->rf_access_read == 0)
8513                                 fp->rf_share_access &= ~OPEN4_SHARE_ACCESS_READ;
8514                 }
8515                 if (sp->rs_open_access & OPEN4_SHARE_ACCESS_WRITE) {
8516                         fp->rf_access_write--;
8517                         fflags |= FWRITE;
8518                         if (fp->rf_access_write == 0)
8519                                 fp->rf_share_access &=
8520                                     ~OPEN4_SHARE_ACCESS_WRITE;
8521                 }
8522                 if (sp->rs_open_deny & OPEN4_SHARE_DENY_READ) {
8523                         fp->rf_deny_read--;
8524                         if (fp->rf_deny_read == 0)
8525                                 fp->rf_share_deny &= ~OPEN4_SHARE_DENY_READ;
8526                 }
8527                 if (sp->rs_open_deny & OPEN4_SHARE_DENY_WRITE) {
8528                         fp->rf_deny_write--;
8529                         if (fp->rf_deny_write == 0)
8530                                 fp->rf_share_deny &= ~OPEN4_SHARE_DENY_WRITE;
8531                 }
8532 
8533                 (void) VOP_CLOSE(fp->rf_vp, fflags, 1, (offset_t)0, cr, NULL);
8534 
8535                 rfs4_dbe_unlock(fp->rf_dbe);
8536 
8537                 sp->rs_open_access = 0;
8538                 sp->rs_open_deny = 0;
8539         }
8540 }
8541 
8542 /*
8543  * lock_denied: Fill in a LOCK4deneid structure given an flock64 structure.
8544  */
8545 static nfsstat4
8546 lock_denied(LOCK4denied *dp, struct flock64 *flk)
8547 {
8548         rfs4_lockowner_t *lo;
8549         rfs4_client_t *cp;
8550         uint32_t len;
8551 
8552         lo = rfs4_findlockowner_by_pid(flk->l_pid);
8553         if (lo != NULL) {
8554                 cp = lo->rl_client;
8555                 if (rfs4_lease_expired(cp)) {
8556                         rfs4_lockowner_rele(lo);
8557                         rfs4_dbe_hold(cp->rc_dbe);
8558                         rfs4_client_close(cp);
8559                         return (NFS4ERR_EXPIRED);
8560                 }
8561                 dp->owner.clientid = lo->rl_owner.clientid;
8562                 len = lo->rl_owner.owner_len;
8563                 dp->owner.owner_val = kmem_alloc(len, KM_SLEEP);
8564                 bcopy(lo->rl_owner.owner_val, dp->owner.owner_val, len);
8565                 dp->owner.owner_len = len;
8566                 rfs4_lockowner_rele(lo);
8567                 goto finish;
8568         }
8569 
8570         /*
8571          * Its not a NFS4 lock. We take advantage that the upper 32 bits
8572          * of the client id contain the boot time for a NFS4 lock. So we
8573          * fabricate and identity by setting clientid to the sysid, and
8574          * the lock owner to the pid.
8575          */
8576         dp->owner.clientid = flk->l_sysid;
8577         len = sizeof (pid_t);
8578         dp->owner.owner_len = len;
8579         dp->owner.owner_val = kmem_alloc(len, KM_SLEEP);
8580         bcopy(&flk->l_pid, dp->owner.owner_val, len);
8581 finish:
8582         dp->offset = flk->l_start;
8583         dp->length = flk->l_len;
8584 
8585         if (flk->l_type == F_RDLCK)
8586                 dp->locktype = READ_LT;
8587         else if (flk->l_type == F_WRLCK)
8588                 dp->locktype = WRITE_LT;
8589         else
8590                 return (NFS4ERR_INVAL); /* no mapping from POSIX ltype to v4 */
8591 
8592         return (NFS4_OK);
8593 }
8594 
8595 /*
8596  * The NFSv4.0 LOCK operation does not support the blocking lock (at the
8597  * NFSv4.0 protocol level) so the client needs to resend the LOCK request in a
8598  * case the lock is denied by the NFSv4.0 server.  NFSv4.0 clients are prepared
8599  * for that (obviously); they are sending the LOCK requests with some delays
8600  * between the attempts.  See nfs4frlock() and nfs4_block_and_wait() for the
8601  * locking and delay implementation at the client side.
8602  *
8603  * To make the life of the clients easier, the NFSv4.0 server tries to do some
8604  * fast retries on its own (the for loop below) in a hope the lock will be
8605  * available soon.  And if not, the client won't need to resend the LOCK
8606  * requests so fast to check the lock availability.  This basically saves some
8607  * network traffic and tries to make sure the client gets the lock ASAP.
8608  */
8609 static int
8610 setlock(vnode_t *vp, struct flock64 *flock, int flag, cred_t *cred)
8611 {
8612         int error;
8613         struct flock64 flk;
8614         int i;
8615         clock_t delaytime;
8616         int cmd;
8617         int spin_cnt = 0;
8618 
8619         cmd = nbl_need_check(vp) ? F_SETLK_NBMAND : F_SETLK;
8620 retry:
8621         delaytime = MSEC_TO_TICK_ROUNDUP(rfs4_lock_delay);
8622 
8623         for (i = 0; i < rfs4_maxlock_tries; i++) {
8624                 LOCK_PRINT(rfs4_debug, "setlock", cmd, flock);
8625                 error = VOP_FRLOCK(vp, cmd,
8626                     flock, flag, (u_offset_t)0, NULL, cred, NULL);
8627 
8628                 if (error != EAGAIN && error != EACCES)
8629                         break;
8630 
8631                 if (i < rfs4_maxlock_tries - 1) {
8632                         delay(delaytime);
8633                         delaytime *= 2;
8634                 }
8635         }
8636 
8637         if (error == EAGAIN || error == EACCES) {
8638                 /* Get the owner of the lock */
8639                 flk = *flock;
8640                 LOCK_PRINT(rfs4_debug, "setlock", F_GETLK, &flk);
8641                 if (VOP_FRLOCK(vp, F_GETLK, &flk, flag, 0, NULL, cred,
8642                     NULL) == 0) {
8643                         /*
8644                          * There's a race inherent in the current VOP_FRLOCK
8645                          * design where:
8646                          * a: "other guy" takes a lock that conflicts with a
8647                          * lock we want
8648                          * b: we attempt to take our lock (non-blocking) and
8649                          * the attempt fails.
8650                          * c: "other guy" releases the conflicting lock
8651                          * d: we ask what lock conflicts with the lock we want,
8652                          * getting F_UNLCK (no lock blocks us)
8653                          *
8654                          * If we retry the non-blocking lock attempt in this
8655                          * case (restart at step 'b') there's some possibility
8656                          * that many such attempts might fail.  However a test
8657                          * designed to actually provoke this race shows that
8658                          * the vast majority of cases require no retry, and
8659                          * only a few took as many as three retries.  Here's
8660                          * the test outcome:
8661                          *
8662                          *         number of retries    how many times we needed
8663                          *                              that many retries
8664                          *         0                    79461
8665                          *         1                      862
8666                          *         2                       49
8667                          *         3                        5
8668                          *
8669                          * Given those empirical results, we arbitrarily limit
8670                          * the retry count to ten.
8671                          *
8672                          * If we actually make to ten retries and give up,
8673                          * nothing catastrophic happens, but we're unable to
8674                          * return the information about the conflicting lock to
8675                          * the NFS client.  That's an acceptable trade off vs.
8676                          * letting this retry loop run forever.
8677                          */
8678                         if (flk.l_type == F_UNLCK) {
8679                                 if (spin_cnt++ < 10) {
8680                                         /* No longer locked, retry */
8681                                         goto retry;
8682                                 }
8683                         } else {
8684                                 *flock = flk;
8685                                 LOCK_PRINT(rfs4_debug, "setlock(blocking lock)",
8686                                     F_GETLK, &flk);
8687                         }
8688                 }
8689         }
8690 
8691         return (error);
8692 }
8693 
8694 /*ARGSUSED*/
8695 static nfsstat4
8696 rfs4_do_lock(rfs4_lo_state_t *lsp, nfs_lock_type4 locktype,
8697     offset4 offset, length4 length, cred_t *cred, nfs_resop4 *resop)
8698 {
8699         nfsstat4 status;
8700         rfs4_lockowner_t *lo = lsp->rls_locker;
8701         rfs4_state_t *sp = lsp->rls_state;
8702         struct flock64 flock;
8703         int16_t ltype;
8704         int flag;
8705         int error;
8706         sysid_t sysid;
8707         LOCK4res *lres;
8708         vnode_t *vp;
8709 
8710         if (rfs4_lease_expired(lo->rl_client)) {
8711                 return (NFS4ERR_EXPIRED);
8712         }
8713 
8714         if ((status = rfs4_client_sysid(lo->rl_client, &sysid)) != NFS4_OK)
8715                 return (status);
8716 
8717         /* Check for zero length. To lock to end of file use all ones for V4 */
8718         if (length == 0)
8719                 return (NFS4ERR_INVAL);
8720         else if (length == (length4)(~0))
8721                 length = 0;             /* Posix to end of file  */
8722 
8723 retry:
8724         rfs4_dbe_lock(sp->rs_dbe);
8725         if (sp->rs_closed == TRUE) {
8726                 rfs4_dbe_unlock(sp->rs_dbe);
8727                 return (NFS4ERR_OLD_STATEID);
8728         }
8729 
8730         if (resop->resop != OP_LOCKU) {
8731                 switch (locktype) {
8732                 case READ_LT:
8733                 case READW_LT:
8734                         if ((sp->rs_share_access
8735                             & OPEN4_SHARE_ACCESS_READ) == 0) {
8736                                 rfs4_dbe_unlock(sp->rs_dbe);
8737 
8738                                 return (NFS4ERR_OPENMODE);
8739                         }
8740                         ltype = F_RDLCK;
8741                         break;
8742                 case WRITE_LT:
8743                 case WRITEW_LT:
8744                         if ((sp->rs_share_access
8745                             & OPEN4_SHARE_ACCESS_WRITE) == 0) {
8746                                 rfs4_dbe_unlock(sp->rs_dbe);
8747 
8748                                 return (NFS4ERR_OPENMODE);
8749                         }
8750                         ltype = F_WRLCK;
8751                         break;
8752                 }
8753         } else
8754                 ltype = F_UNLCK;
8755 
8756         flock.l_type = ltype;
8757         flock.l_whence = 0;             /* SEEK_SET */
8758         flock.l_start = offset;
8759         flock.l_len = length;
8760         flock.l_sysid = sysid;
8761         flock.l_pid = lsp->rls_locker->rl_pid;
8762 
8763         /* Note that length4 is uint64_t but l_len and l_start are off64_t */
8764         if (flock.l_len < 0 || flock.l_start < 0) {
8765                 rfs4_dbe_unlock(sp->rs_dbe);
8766                 return (NFS4ERR_INVAL);
8767         }
8768 
8769         /*
8770          * N.B. FREAD has the same value as OPEN4_SHARE_ACCESS_READ and
8771          * FWRITE has the same value as OPEN4_SHARE_ACCESS_WRITE.
8772          */
8773         flag = (int)sp->rs_share_access | F_REMOTELOCK;
8774 
8775         vp = sp->rs_finfo->rf_vp;
8776         VN_HOLD(vp);
8777 
8778         /*
8779          * We need to unlock sp before we call the underlying filesystem to
8780          * acquire the file lock.
8781          */
8782         rfs4_dbe_unlock(sp->rs_dbe);
8783 
8784         error = setlock(vp, &flock, flag, cred);
8785 
8786         /*
8787          * Make sure the file is still open.  In a case the file was closed in
8788          * the meantime, clean the lock we acquired using the setlock() call
8789          * above, and return the appropriate error.
8790          */
8791         rfs4_dbe_lock(sp->rs_dbe);
8792         if (sp->rs_closed == TRUE) {
8793                 cleanlocks(vp, lsp->rls_locker->rl_pid, sysid);
8794                 rfs4_dbe_unlock(sp->rs_dbe);
8795 
8796                 VN_RELE(vp);
8797 
8798                 return (NFS4ERR_OLD_STATEID);
8799         }
8800         rfs4_dbe_unlock(sp->rs_dbe);
8801 
8802         VN_RELE(vp);
8803 
8804         if (error == 0) {
8805                 rfs4_dbe_lock(lsp->rls_dbe);
8806                 next_stateid(&lsp->rls_lockid);
8807                 rfs4_dbe_unlock(lsp->rls_dbe);
8808         }
8809 
8810         /*
8811          * N.B. We map error values to nfsv4 errors. This is differrent
8812          * than puterrno4 routine.
8813          */
8814         switch (error) {
8815         case 0:
8816                 status = NFS4_OK;
8817                 break;
8818         case EAGAIN:
8819         case EACCES:            /* Old value */
8820                 /* Can only get here if op is OP_LOCK */
8821                 ASSERT(resop->resop == OP_LOCK);
8822                 lres = &resop->nfs_resop4_u.oplock;
8823                 status = NFS4ERR_DENIED;
8824                 if (lock_denied(&lres->LOCK4res_u.denied, &flock)
8825                     == NFS4ERR_EXPIRED)
8826                         goto retry;
8827                 break;
8828         case ENOLCK:
8829                 status = NFS4ERR_DELAY;
8830                 break;
8831         case EOVERFLOW:
8832                 status = NFS4ERR_INVAL;
8833                 break;
8834         case EINVAL:
8835                 status = NFS4ERR_NOTSUPP;
8836                 break;
8837         default:
8838                 status = NFS4ERR_SERVERFAULT;
8839                 break;
8840         }
8841 
8842         return (status);
8843 }
8844 
8845 /*ARGSUSED*/
8846 void
8847 rfs4_op_lock(nfs_argop4 *argop, nfs_resop4 *resop,
8848     struct svc_req *req, struct compound_state *cs)
8849 {
8850         LOCK4args *args = &argop->nfs_argop4_u.oplock;
8851         LOCK4res *resp = &resop->nfs_resop4_u.oplock;
8852         nfsstat4 status;
8853         stateid4 *stateid;
8854         rfs4_lockowner_t *lo;
8855         rfs4_client_t *cp;
8856         rfs4_state_t *sp = NULL;
8857         rfs4_lo_state_t *lsp = NULL;
8858         bool_t ls_sw_held = FALSE;
8859         bool_t create = TRUE;
8860         bool_t lcreate = TRUE;
8861         bool_t dup_lock = FALSE;
8862         int rc;
8863 
8864         DTRACE_NFSV4_2(op__lock__start, struct compound_state *, cs,
8865             LOCK4args *, args);
8866 
8867         if (cs->vp == NULL) {
8868                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
8869                 DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
8870                     cs, LOCK4res *, resp);
8871                 return;
8872         }
8873 
8874         if (args->locker.new_lock_owner) {
8875                 /* Create a new lockowner for this instance */
8876                 open_to_lock_owner4 *olo = &args->locker.locker4_u.open_owner;
8877 
8878                 NFS4_DEBUG(rfs4_debug, (CE_NOTE, "Creating new lock owner"));
8879 
8880                 stateid = &olo->open_stateid;
8881                 status = rfs4_get_state(stateid, &sp, RFS4_DBS_VALID);
8882                 if (status != NFS4_OK) {
8883                         NFS4_DEBUG(rfs4_debug,
8884                             (CE_NOTE, "Get state failed in lock %d", status));
8885                         *cs->statusp = resp->status = status;
8886                         DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
8887                             cs, LOCK4res *, resp);
8888                         return;
8889                 }
8890 
8891                 /* Ensure specified filehandle matches */
8892                 if (cs->vp != sp->rs_finfo->rf_vp) {
8893                         rfs4_state_rele(sp);
8894                         *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8895                         DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
8896                             cs, LOCK4res *, resp);
8897                         return;
8898                 }
8899 
8900                 /* hold off other access to open_owner while we tinker */
8901                 rfs4_sw_enter(&sp->rs_owner->ro_sw);
8902 
8903                 switch (rc = rfs4_check_stateid_seqid(sp, stateid)) {
8904                 case NFS4_CHECK_STATEID_OLD:
8905                         *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8906                         goto end;
8907                 case NFS4_CHECK_STATEID_BAD:
8908                         *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8909                         goto end;
8910                 case NFS4_CHECK_STATEID_EXPIRED:
8911                         *cs->statusp = resp->status = NFS4ERR_EXPIRED;
8912                         goto end;
8913                 case NFS4_CHECK_STATEID_UNCONFIRMED:
8914                         *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8915                         goto end;
8916                 case NFS4_CHECK_STATEID_CLOSED:
8917                         *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8918                         goto end;
8919                 case NFS4_CHECK_STATEID_OKAY:
8920                 case NFS4_CHECK_STATEID_REPLAY:
8921                         switch (rfs4_check_olo_seqid(olo->open_seqid,
8922                             sp->rs_owner, resop)) {
8923                         case NFS4_CHKSEQ_OKAY:
8924                                 if (rc == NFS4_CHECK_STATEID_OKAY)
8925                                         break;
8926                                 /*
8927                                  * This is replayed stateid; if seqid
8928                                  * matches next expected, then client
8929                                  * is using wrong seqid.
8930                                  */
8931                                 /* FALLTHROUGH */
8932                         case NFS4_CHKSEQ_BAD:
8933                                 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8934                                 goto end;
8935                         case NFS4_CHKSEQ_REPLAY:
8936                                 /* This is a duplicate LOCK request */
8937                                 dup_lock = TRUE;
8938 
8939                                 /*
8940                                  * For a duplicate we do not want to
8941                                  * create a new lockowner as it should
8942                                  * already exist.
8943                                  * Turn off the lockowner create flag.
8944                                  */
8945                                 lcreate = FALSE;
8946                         }
8947                         break;
8948                 }
8949 
8950                 lo = rfs4_findlockowner(&olo->lock_owner, &lcreate);
8951                 if (lo == NULL) {
8952                         NFS4_DEBUG(rfs4_debug,
8953                             (CE_NOTE, "rfs4_op_lock: no lock owner"));
8954                         *cs->statusp = resp->status = NFS4ERR_RESOURCE;
8955                         goto end;
8956                 }
8957 
8958                 lsp = rfs4_findlo_state_by_owner(lo, sp, &create);
8959                 if (lsp == NULL) {
8960                         rfs4_update_lease(sp->rs_owner->ro_client);
8961                         /*
8962                          * Only update theh open_seqid if this is not
8963                          * a duplicate request
8964                          */
8965                         if (dup_lock == FALSE) {
8966                                 rfs4_update_open_sequence(sp->rs_owner);
8967                         }
8968 
8969                         NFS4_DEBUG(rfs4_debug,
8970                             (CE_NOTE, "rfs4_op_lock: no state"));
8971                         *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
8972                         rfs4_update_open_resp(sp->rs_owner, resop, NULL);
8973                         rfs4_lockowner_rele(lo);
8974                         goto end;
8975                 }
8976 
8977                 /*
8978                  * This is the new_lock_owner branch and the client is
8979                  * supposed to be associating a new lock_owner with
8980                  * the open file at this point.  If we find that a
8981                  * lock_owner/state association already exists and a
8982                  * successful LOCK request was returned to the client,
8983                  * an error is returned to the client since this is
8984                  * not appropriate.  The client should be using the
8985                  * existing lock_owner branch.
8986                  */
8987                 if (dup_lock == FALSE && create == FALSE) {
8988                         if (lsp->rls_lock_completed == TRUE) {
8989                                 *cs->statusp =
8990                                     resp->status = NFS4ERR_BAD_SEQID;
8991                                 rfs4_lockowner_rele(lo);
8992                                 goto end;
8993                         }
8994                 }
8995 
8996                 rfs4_update_lease(sp->rs_owner->ro_client);
8997 
8998                 /*
8999                  * Only update theh open_seqid if this is not
9000                  * a duplicate request
9001                  */
9002                 if (dup_lock == FALSE) {
9003                         rfs4_update_open_sequence(sp->rs_owner);
9004                 }
9005 
9006                 /*
9007                  * If this is a duplicate lock request, just copy the
9008                  * previously saved reply and return.
9009                  */
9010                 if (dup_lock == TRUE) {
9011                         /* verify that lock_seqid's match */
9012                         if (lsp->rls_seqid != olo->lock_seqid) {
9013                                 NFS4_DEBUG(rfs4_debug,
9014                                     (CE_NOTE, "rfs4_op_lock: Dup-Lock seqid bad"
9015                                     "lsp->seqid=%d old->seqid=%d",
9016                                     lsp->rls_seqid, olo->lock_seqid));
9017                                 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9018                         } else {
9019                                 rfs4_copy_reply(resop, &lsp->rls_reply);
9020                                 /*
9021                                  * Make sure to copy the just
9022                                  * retrieved reply status into the
9023                                  * overall compound status
9024                                  */
9025                                 *cs->statusp = resp->status;
9026                         }
9027                         rfs4_lockowner_rele(lo);
9028                         goto end;
9029                 }
9030 
9031                 rfs4_dbe_lock(lsp->rls_dbe);
9032 
9033                 /* Make sure to update the lock sequence id */
9034                 lsp->rls_seqid = olo->lock_seqid;
9035 
9036                 NFS4_DEBUG(rfs4_debug,
9037                     (CE_NOTE, "Lock seqid established as %d", lsp->rls_seqid));
9038 
9039                 /*
9040                  * This is used to signify the newly created lockowner
9041                  * stateid and its sequence number.  The checks for
9042                  * sequence number and increment don't occur on the
9043                  * very first lock request for a lockowner.
9044                  */
9045                 lsp->rls_skip_seqid_check = TRUE;
9046 
9047                 /* hold off other access to lsp while we tinker */
9048                 rfs4_sw_enter(&lsp->rls_sw);
9049                 ls_sw_held = TRUE;
9050 
9051                 rfs4_dbe_unlock(lsp->rls_dbe);
9052 
9053                 rfs4_lockowner_rele(lo);
9054         } else {
9055                 stateid = &args->locker.locker4_u.lock_owner.lock_stateid;
9056                 /* get lsp and hold the lock on the underlying file struct */
9057                 if ((status = rfs4_get_lo_state(stateid, &lsp, TRUE))
9058                     != NFS4_OK) {
9059                         *cs->statusp = resp->status = status;
9060                         DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
9061                             cs, LOCK4res *, resp);
9062                         return;
9063                 }
9064                 create = FALSE; /* We didn't create lsp */
9065 
9066                 /* Ensure specified filehandle matches */
9067                 if (cs->vp != lsp->rls_state->rs_finfo->rf_vp) {
9068                         rfs4_lo_state_rele(lsp, TRUE);
9069                         *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9070                         DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
9071                             cs, LOCK4res *, resp);
9072                         return;
9073                 }
9074 
9075                 /* hold off other access to lsp while we tinker */
9076                 rfs4_sw_enter(&lsp->rls_sw);
9077                 ls_sw_held = TRUE;
9078 
9079                 switch (rfs4_check_lo_stateid_seqid(lsp, stateid)) {
9080                 /*
9081                  * The stateid looks like it was okay (expected to be
9082                  * the next one)
9083                  */
9084                 case NFS4_CHECK_STATEID_OKAY:
9085                         /*
9086                          * The sequence id is now checked.  Determine
9087                          * if this is a replay or if it is in the
9088                          * expected (next) sequence.  In the case of a
9089                          * replay, there are two replay conditions
9090                          * that may occur.  The first is the normal
9091                          * condition where a LOCK is done with a
9092                          * NFS4_OK response and the stateid is
9093                          * updated.  That case is handled below when
9094                          * the stateid is identified as a REPLAY.  The
9095                          * second is the case where an error is
9096                          * returned, like NFS4ERR_DENIED, and the
9097                          * sequence number is updated but the stateid
9098                          * is not updated.  This second case is dealt
9099                          * with here.  So it may seem odd that the
9100                          * stateid is okay but the sequence id is a
9101                          * replay but it is okay.
9102                          */
9103                         switch (rfs4_check_lock_seqid(
9104                             args->locker.locker4_u.lock_owner.lock_seqid,
9105                             lsp, resop)) {
9106                         case NFS4_CHKSEQ_REPLAY:
9107                                 if (resp->status != NFS4_OK) {
9108                                         /*
9109                                          * Here is our replay and need
9110                                          * to verify that the last
9111                                          * response was an error.
9112                                          */
9113                                         *cs->statusp = resp->status;
9114                                         goto end;
9115                                 }
9116                                 /*
9117                                  * This is done since the sequence id
9118                                  * looked like a replay but it didn't
9119                                  * pass our check so a BAD_SEQID is
9120                                  * returned as a result.
9121                                  */
9122                                 /*FALLTHROUGH*/
9123                         case NFS4_CHKSEQ_BAD:
9124                                 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9125                                 goto end;
9126                         case NFS4_CHKSEQ_OKAY:
9127                                 /* Everything looks okay move ahead */
9128                                 break;
9129                         }
9130                         break;
9131                 case NFS4_CHECK_STATEID_OLD:
9132                         *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9133                         goto end;
9134                 case NFS4_CHECK_STATEID_BAD:
9135                         *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9136                         goto end;
9137                 case NFS4_CHECK_STATEID_EXPIRED:
9138                         *cs->statusp = resp->status = NFS4ERR_EXPIRED;
9139                         goto end;
9140                 case NFS4_CHECK_STATEID_CLOSED:
9141                         *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9142                         goto end;
9143                 case NFS4_CHECK_STATEID_REPLAY:
9144                         switch (rfs4_check_lock_seqid(
9145                             args->locker.locker4_u.lock_owner.lock_seqid,
9146                             lsp, resop)) {
9147                         case NFS4_CHKSEQ_OKAY:
9148                                 /*
9149                                  * This is a replayed stateid; if
9150                                  * seqid matches the next expected,
9151                                  * then client is using wrong seqid.
9152                                  */
9153                         case NFS4_CHKSEQ_BAD:
9154                                 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9155                                 goto end;
9156                         case NFS4_CHKSEQ_REPLAY:
9157                                 rfs4_update_lease(lsp->rls_locker->rl_client);
9158                                 *cs->statusp = status = resp->status;
9159                                 goto end;
9160                         }
9161                         break;
9162                 default:
9163                         ASSERT(FALSE);
9164                         break;
9165                 }
9166 
9167                 rfs4_update_lock_sequence(lsp);
9168                 rfs4_update_lease(lsp->rls_locker->rl_client);
9169         }
9170 
9171         /*
9172          * NFS4 only allows locking on regular files, so
9173          * verify type of object.
9174          */
9175         if (cs->vp->v_type != VREG) {
9176                 if (cs->vp->v_type == VDIR)
9177                         status = NFS4ERR_ISDIR;
9178                 else
9179                         status = NFS4ERR_INVAL;
9180                 goto out;
9181         }
9182 
9183         cp = lsp->rls_state->rs_owner->ro_client;
9184 
9185         if (rfs4_clnt_in_grace(cp) && !args->reclaim) {
9186                 status = NFS4ERR_GRACE;
9187                 goto out;
9188         }
9189 
9190         if (rfs4_clnt_in_grace(cp) && args->reclaim && !cp->rc_can_reclaim) {
9191                 status = NFS4ERR_NO_GRACE;
9192                 goto out;
9193         }
9194 
9195         if (!rfs4_clnt_in_grace(cp) && args->reclaim) {
9196                 status = NFS4ERR_NO_GRACE;
9197                 goto out;
9198         }
9199 
9200         if (lsp->rls_state->rs_finfo->rf_dinfo.rd_dtype == OPEN_DELEGATE_WRITE)
9201                 cs->deleg = TRUE;
9202 
9203         status = rfs4_do_lock(lsp, args->locktype,
9204             args->offset, args->length, cs->cr, resop);
9205 
9206 out:
9207         lsp->rls_skip_seqid_check = FALSE;
9208 
9209         *cs->statusp = resp->status = status;
9210 
9211         if (status == NFS4_OK) {
9212                 resp->LOCK4res_u.lock_stateid = lsp->rls_lockid.stateid;
9213                 lsp->rls_lock_completed = TRUE;
9214         }
9215         /*
9216          * Only update the "OPEN" response here if this was a new
9217          * lock_owner
9218          */
9219         if (sp)
9220                 rfs4_update_open_resp(sp->rs_owner, resop, NULL);
9221 
9222         rfs4_update_lock_resp(lsp, resop);
9223 
9224 end:
9225         if (lsp) {
9226                 if (ls_sw_held)
9227                         rfs4_sw_exit(&lsp->rls_sw);
9228                 /*
9229                  * If an sp obtained, then the lsp does not represent
9230                  * a lock on the file struct.
9231                  */
9232                 if (sp != NULL)
9233                         rfs4_lo_state_rele(lsp, FALSE);
9234                 else
9235                         rfs4_lo_state_rele(lsp, TRUE);
9236         }
9237         if (sp) {
9238                 rfs4_sw_exit(&sp->rs_owner->ro_sw);
9239                 rfs4_state_rele(sp);
9240         }
9241 
9242         DTRACE_NFSV4_2(op__lock__done, struct compound_state *, cs,
9243             LOCK4res *, resp);
9244 }
9245 
9246 /* free function for LOCK/LOCKT */
9247 static void
9248 lock_denied_free(nfs_resop4 *resop)
9249 {
9250         LOCK4denied *dp = NULL;
9251 
9252         switch (resop->resop) {
9253         case OP_LOCK:
9254                 if (resop->nfs_resop4_u.oplock.status == NFS4ERR_DENIED)
9255                         dp = &resop->nfs_resop4_u.oplock.LOCK4res_u.denied;
9256                 break;
9257         case OP_LOCKT:
9258                 if (resop->nfs_resop4_u.oplockt.status == NFS4ERR_DENIED)
9259                         dp = &resop->nfs_resop4_u.oplockt.denied;
9260                 break;
9261         default:
9262                 break;
9263         }
9264 
9265         if (dp)
9266                 kmem_free(dp->owner.owner_val, dp->owner.owner_len);
9267 }
9268 
9269 /*ARGSUSED*/
9270 void
9271 rfs4_op_locku(nfs_argop4 *argop, nfs_resop4 *resop,
9272     struct svc_req *req, struct compound_state *cs)
9273 {
9274         LOCKU4args *args = &argop->nfs_argop4_u.oplocku;
9275         LOCKU4res *resp = &resop->nfs_resop4_u.oplocku;
9276         nfsstat4 status;
9277         stateid4 *stateid = &args->lock_stateid;
9278         rfs4_lo_state_t *lsp;
9279 
9280         DTRACE_NFSV4_2(op__locku__start, struct compound_state *, cs,
9281             LOCKU4args *, args);
9282 
9283         if (cs->vp == NULL) {
9284                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
9285                 DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
9286                     LOCKU4res *, resp);
9287                 return;
9288         }
9289 
9290         if ((status = rfs4_get_lo_state(stateid, &lsp, TRUE)) != NFS4_OK) {
9291                 *cs->statusp = resp->status = status;
9292                 DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
9293                     LOCKU4res *, resp);
9294                 return;
9295         }
9296 
9297         /* Ensure specified filehandle matches */
9298         if (cs->vp != lsp->rls_state->rs_finfo->rf_vp) {
9299                 rfs4_lo_state_rele(lsp, TRUE);
9300                 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9301                 DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
9302                     LOCKU4res *, resp);
9303                 return;
9304         }
9305 
9306         /* hold off other access to lsp while we tinker */
9307         rfs4_sw_enter(&lsp->rls_sw);
9308 
9309         switch (rfs4_check_lo_stateid_seqid(lsp, stateid)) {
9310         case NFS4_CHECK_STATEID_OKAY:
9311                 if (rfs4_check_lock_seqid(args->seqid, lsp, resop)
9312                     != NFS4_CHKSEQ_OKAY) {
9313                         *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9314                         goto end;
9315                 }
9316                 break;
9317         case NFS4_CHECK_STATEID_OLD:
9318                 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9319                 goto end;
9320         case NFS4_CHECK_STATEID_BAD:
9321                 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9322                 goto end;
9323         case NFS4_CHECK_STATEID_EXPIRED:
9324                 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
9325                 goto end;
9326         case NFS4_CHECK_STATEID_CLOSED:
9327                 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9328                 goto end;
9329         case NFS4_CHECK_STATEID_REPLAY:
9330                 switch (rfs4_check_lock_seqid(args->seqid, lsp, resop)) {
9331                 case NFS4_CHKSEQ_OKAY:
9332                                 /*
9333                                  * This is a replayed stateid; if
9334                                  * seqid matches the next expected,
9335                                  * then client is using wrong seqid.
9336                                  */
9337                 case NFS4_CHKSEQ_BAD:
9338                         *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9339                         goto end;
9340                 case NFS4_CHKSEQ_REPLAY:
9341                         rfs4_update_lease(lsp->rls_locker->rl_client);
9342                         *cs->statusp = status = resp->status;
9343                         goto end;
9344                 }
9345                 break;
9346         default:
9347                 ASSERT(FALSE);
9348                 break;
9349         }
9350 
9351         rfs4_update_lock_sequence(lsp);
9352         rfs4_update_lease(lsp->rls_locker->rl_client);
9353 
9354         /*
9355          * NFS4 only allows locking on regular files, so
9356          * verify type of object.
9357          */
9358         if (cs->vp->v_type != VREG) {
9359                 if (cs->vp->v_type == VDIR)
9360                         status = NFS4ERR_ISDIR;
9361                 else
9362                         status = NFS4ERR_INVAL;
9363                 goto out;
9364         }
9365 
9366         if (rfs4_clnt_in_grace(lsp->rls_state->rs_owner->ro_client)) {
9367                 status = NFS4ERR_GRACE;
9368                 goto out;
9369         }
9370 
9371         status = rfs4_do_lock(lsp, args->locktype,
9372             args->offset, args->length, cs->cr, resop);
9373 
9374 out:
9375         *cs->statusp = resp->status = status;
9376 
9377         if (status == NFS4_OK)
9378                 resp->lock_stateid = lsp->rls_lockid.stateid;
9379 
9380         rfs4_update_lock_resp(lsp, resop);
9381 
9382 end:
9383         rfs4_sw_exit(&lsp->rls_sw);
9384         rfs4_lo_state_rele(lsp, TRUE);
9385 
9386         DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
9387             LOCKU4res *, resp);
9388 }
9389 
9390 /*
9391  * LOCKT is a best effort routine, the client can not be guaranteed that
9392  * the status return is still in effect by the time the reply is received.
9393  * They are numerous race conditions in this routine, but we are not required
9394  * and can not be accurate.
9395  */
9396 /*ARGSUSED*/
9397 void
9398 rfs4_op_lockt(nfs_argop4 *argop, nfs_resop4 *resop,
9399     struct svc_req *req, struct compound_state *cs)
9400 {
9401         LOCKT4args *args = &argop->nfs_argop4_u.oplockt;
9402         LOCKT4res *resp = &resop->nfs_resop4_u.oplockt;
9403         rfs4_lockowner_t *lo;
9404         rfs4_client_t *cp;
9405         bool_t create = FALSE;
9406         struct flock64 flk;
9407         int error;
9408         int flag = FREAD | FWRITE;
9409         int ltype;
9410         length4 posix_length;
9411         sysid_t sysid;
9412         pid_t pid;
9413 
9414         DTRACE_NFSV4_2(op__lockt__start, struct compound_state *, cs,
9415             LOCKT4args *, args);
9416 
9417         if (cs->vp == NULL) {
9418                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
9419                 goto out;
9420         }
9421 
9422         /*
9423          * NFS4 only allows locking on regular files, so
9424          * verify type of object.
9425          */
9426         if (cs->vp->v_type != VREG) {
9427                 if (cs->vp->v_type == VDIR)
9428                         *cs->statusp = resp->status = NFS4ERR_ISDIR;
9429                 else
9430                         *cs->statusp = resp->status =  NFS4ERR_INVAL;
9431                 goto out;
9432         }
9433 
9434         /*
9435          * Check out the clientid to ensure the server knows about it
9436          * so that we correctly inform the client of a server reboot.
9437          */
9438         if ((cp = rfs4_findclient_by_id(args->owner.clientid, FALSE))
9439             == NULL) {
9440                 *cs->statusp = resp->status =
9441                     rfs4_check_clientid(&args->owner.clientid, 0);
9442                 goto out;
9443         }
9444         if (rfs4_lease_expired(cp)) {
9445                 rfs4_client_close(cp);
9446                 /*
9447                  * Protocol doesn't allow returning NFS4ERR_STALE as
9448                  * other operations do on this check so STALE_CLIENTID
9449                  * is returned instead
9450                  */
9451                 *cs->statusp = resp->status = NFS4ERR_STALE_CLIENTID;
9452                 goto out;
9453         }
9454 
9455         if (rfs4_clnt_in_grace(cp) && !(cp->rc_can_reclaim)) {
9456                 *cs->statusp = resp->status = NFS4ERR_GRACE;
9457                 rfs4_client_rele(cp);
9458                 goto out;
9459         }
9460         rfs4_client_rele(cp);
9461 
9462         resp->status = NFS4_OK;
9463 
9464         switch (args->locktype) {
9465         case READ_LT:
9466         case READW_LT:
9467                 ltype = F_RDLCK;
9468                 break;
9469         case WRITE_LT:
9470         case WRITEW_LT:
9471                 ltype = F_WRLCK;
9472                 break;
9473         }
9474 
9475         posix_length = args->length;
9476         /* Check for zero length. To lock to end of file use all ones for V4 */
9477         if (posix_length == 0) {
9478                 *cs->statusp = resp->status = NFS4ERR_INVAL;
9479                 goto out;
9480         } else if (posix_length == (length4)(~0)) {
9481                 posix_length = 0;       /* Posix to end of file  */
9482         }
9483 
9484         /* Find or create a lockowner */
9485         lo = rfs4_findlockowner(&args->owner, &create);
9486 
9487         if (lo) {
9488                 pid = lo->rl_pid;
9489                 if ((resp->status =
9490                     rfs4_client_sysid(lo->rl_client, &sysid)) != NFS4_OK)
9491                         goto err;
9492         } else {
9493                 pid = 0;
9494                 sysid = lockt_sysid;
9495         }
9496 retry:
9497         flk.l_type = ltype;
9498         flk.l_whence = 0;               /* SEEK_SET */
9499         flk.l_start = args->offset;
9500         flk.l_len = posix_length;
9501         flk.l_sysid = sysid;
9502         flk.l_pid = pid;
9503         flag |= F_REMOTELOCK;
9504 
9505         LOCK_PRINT(rfs4_debug, "rfs4_op_lockt", F_GETLK, &flk);
9506 
9507         /* Note that length4 is uint64_t but l_len and l_start are off64_t */
9508         if (flk.l_len < 0 || flk.l_start < 0) {
9509                 resp->status = NFS4ERR_INVAL;
9510                 goto err;
9511         }
9512         error = VOP_FRLOCK(cs->vp, F_GETLK, &flk, flag, (u_offset_t)0,
9513             NULL, cs->cr, NULL);
9514 
9515         /*
9516          * N.B. We map error values to nfsv4 errors. This is differrent
9517          * than puterrno4 routine.
9518          */
9519         switch (error) {
9520         case 0:
9521                 if (flk.l_type == F_UNLCK)
9522                         resp->status = NFS4_OK;
9523                 else {
9524                         if (lock_denied(&resp->denied, &flk) == NFS4ERR_EXPIRED)
9525                                 goto retry;
9526                         resp->status = NFS4ERR_DENIED;
9527                 }
9528                 break;
9529         case EOVERFLOW:
9530                 resp->status = NFS4ERR_INVAL;
9531                 break;
9532         case EINVAL:
9533                 resp->status = NFS4ERR_NOTSUPP;
9534                 break;
9535         default:
9536                 cmn_err(CE_WARN, "rfs4_op_lockt: unexpected errno (%d)",
9537                     error);
9538                 resp->status = NFS4ERR_SERVERFAULT;
9539                 break;
9540         }
9541 
9542 err:
9543         if (lo)
9544                 rfs4_lockowner_rele(lo);
9545         *cs->statusp = resp->status;
9546 out:
9547         DTRACE_NFSV4_2(op__lockt__done, struct compound_state *, cs,
9548             LOCKT4res *, resp);
9549 }
9550 
9551 int
9552 rfs4_share(rfs4_state_t *sp, uint32_t access, uint32_t deny)
9553 {
9554         int err;
9555         int cmd;
9556         vnode_t *vp;
9557         struct shrlock shr;
9558         struct shr_locowner shr_loco;
9559         int fflags = 0;
9560 
9561         ASSERT(rfs4_dbe_islocked(sp->rs_dbe));
9562         ASSERT(sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID);
9563 
9564         if (sp->rs_closed)
9565                 return (NFS4ERR_OLD_STATEID);
9566 
9567         vp = sp->rs_finfo->rf_vp;
9568         ASSERT(vp);
9569 
9570         shr.s_access = shr.s_deny = 0;
9571 
9572         if (access & OPEN4_SHARE_ACCESS_READ) {
9573                 fflags |= FREAD;
9574                 shr.s_access |= F_RDACC;
9575         }
9576         if (access & OPEN4_SHARE_ACCESS_WRITE) {
9577                 fflags |= FWRITE;
9578                 shr.s_access |= F_WRACC;
9579         }
9580         ASSERT(shr.s_access);
9581 
9582         if (deny & OPEN4_SHARE_DENY_READ)
9583                 shr.s_deny |= F_RDDNY;
9584         if (deny & OPEN4_SHARE_DENY_WRITE)
9585                 shr.s_deny |= F_WRDNY;
9586 
9587         shr.s_pid = rfs4_dbe_getid(sp->rs_owner->ro_dbe);
9588         shr.s_sysid = sp->rs_owner->ro_client->rc_sysidt;
9589         shr_loco.sl_pid = shr.s_pid;
9590         shr_loco.sl_id = shr.s_sysid;
9591         shr.s_owner = (caddr_t)&shr_loco;
9592         shr.s_own_len = sizeof (shr_loco);
9593 
9594         cmd = nbl_need_check(vp) ? F_SHARE_NBMAND : F_SHARE;
9595 
9596         err = VOP_SHRLOCK(vp, cmd, &shr, fflags, CRED(), NULL);
9597         if (err != 0) {
9598                 if (err == EAGAIN)
9599                         err = NFS4ERR_SHARE_DENIED;
9600                 else
9601                         err = puterrno4(err);
9602                 return (err);
9603         }
9604 
9605         sp->rs_share_access |= access;
9606         sp->rs_share_deny |= deny;
9607 
9608         return (0);
9609 }
9610 
9611 int
9612 rfs4_unshare(rfs4_state_t *sp)
9613 {
9614         int err;
9615         struct shrlock shr;
9616         struct shr_locowner shr_loco;
9617 
9618         ASSERT(rfs4_dbe_islocked(sp->rs_dbe));
9619 
9620         if (sp->rs_closed || sp->rs_share_access == 0)
9621                 return (0);
9622 
9623         ASSERT(sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID);
9624         ASSERT(sp->rs_finfo->rf_vp);
9625 
9626         shr.s_access = shr.s_deny = 0;
9627         shr.s_pid = rfs4_dbe_getid(sp->rs_owner->ro_dbe);
9628         shr.s_sysid = sp->rs_owner->ro_client->rc_sysidt;
9629         shr_loco.sl_pid = shr.s_pid;
9630         shr_loco.sl_id = shr.s_sysid;
9631         shr.s_owner = (caddr_t)&shr_loco;
9632         shr.s_own_len = sizeof (shr_loco);
9633 
9634         err = VOP_SHRLOCK(sp->rs_finfo->rf_vp, F_UNSHARE, &shr, 0, CRED(),
9635             NULL);
9636         if (err != 0) {
9637                 err = puterrno4(err);
9638                 return (err);
9639         }
9640 
9641         sp->rs_share_access = 0;
9642         sp->rs_share_deny = 0;
9643 
9644         return (0);
9645 
9646 }
9647 
9648 static int
9649 rdma_setup_read_data4(READ4args *args, READ4res *rok)
9650 {
9651         struct clist    *wcl;
9652         count4          count = rok->data_len;
9653         int             wlist_len;
9654 
9655         wcl = args->wlist;
9656         if (rdma_setup_read_chunks(wcl, count, &wlist_len) == FALSE) {
9657                 return (FALSE);
9658         }
9659         wcl = args->wlist;
9660         rok->wlist_len = wlist_len;
9661         rok->wlist = wcl;
9662         return (TRUE);
9663 }
9664 
9665 /* tunable to disable server referrals */
9666 int rfs4_no_referrals = 0;
9667 
9668 /*
9669  * Find an NFS record in reparse point data.
9670  * Returns 0 for success and <0 or an errno value on failure.
9671  */
9672 int
9673 vn_find_nfs_record(vnode_t *vp, nvlist_t **nvlp, char **svcp, char **datap)
9674 {
9675         int err;
9676         char *stype, *val;
9677         nvlist_t *nvl;
9678         nvpair_t *curr;
9679 
9680         if ((nvl = reparse_init()) == NULL)
9681                 return (-1);
9682 
9683         if ((err = reparse_vnode_parse(vp, nvl)) != 0) {
9684                 reparse_free(nvl);
9685                 return (err);
9686         }
9687 
9688         curr = NULL;
9689         while ((curr = nvlist_next_nvpair(nvl, curr)) != NULL) {
9690                 if ((stype = nvpair_name(curr)) == NULL) {
9691                         reparse_free(nvl);
9692                         return (-2);
9693                 }
9694                 if (strncasecmp(stype, "NFS", 3) == 0)
9695                         break;
9696         }
9697 
9698         if ((curr == NULL) ||
9699             (nvpair_value_string(curr, &val))) {
9700                 reparse_free(nvl);
9701                 return (-3);
9702         }
9703         *nvlp = nvl;
9704         *svcp = stype;
9705         *datap = val;
9706         return (0);
9707 }
9708 
9709 int
9710 vn_is_nfs_reparse(vnode_t *vp, cred_t *cr)
9711 {
9712         nvlist_t *nvl;
9713         char *s, *d;
9714 
9715         if (rfs4_no_referrals != 0)
9716                 return (B_FALSE);
9717 
9718         if (vn_is_reparse(vp, cr, NULL) == B_FALSE)
9719                 return (B_FALSE);
9720 
9721         if (vn_find_nfs_record(vp, &nvl, &s, &d) != 0)
9722                 return (B_FALSE);
9723 
9724         reparse_free(nvl);
9725 
9726         return (B_TRUE);
9727 }
9728 
9729 /*
9730  * There is a user-level copy of this routine in ref_subr.c.
9731  * Changes should be kept in sync.
9732  */
9733 static int
9734 nfs4_create_components(char *path, component4 *comp4)
9735 {
9736         int slen, plen, ncomp;
9737         char *ori_path, *nxtc, buf[MAXNAMELEN];
9738 
9739         if (path == NULL)
9740                 return (0);
9741 
9742         plen = strlen(path) + 1;        /* include the terminator */
9743         ori_path = path;
9744         ncomp = 0;
9745 
9746         /* count number of components in the path */
9747         for (nxtc = path; nxtc < ori_path + plen; nxtc++) {
9748                 if (*nxtc == '/' || *nxtc == '\0' || *nxtc == '\n') {
9749                         if ((slen = nxtc - path) == 0) {
9750                                 path = nxtc + 1;
9751                                 continue;
9752                         }
9753 
9754                         if (comp4 != NULL) {
9755                                 bcopy(path, buf, slen);
9756                                 buf[slen] = '\0';
9757                                 (void) str_to_utf8(buf, &comp4[ncomp]);
9758                         }
9759 
9760                         ncomp++;        /* 1 valid component */
9761                         path = nxtc + 1;
9762                 }
9763                 if (*nxtc == '\0' || *nxtc == '\n')
9764                         break;
9765         }
9766 
9767         return (ncomp);
9768 }
9769 
9770 /*
9771  * There is a user-level copy of this routine in ref_subr.c.
9772  * Changes should be kept in sync.
9773  */
9774 static int
9775 make_pathname4(char *path, pathname4 *pathname)
9776 {
9777         int ncomp;
9778         component4 *comp4;
9779 
9780         if (pathname == NULL)
9781                 return (0);
9782 
9783         if (path == NULL) {
9784                 pathname->pathname4_val = NULL;
9785                 pathname->pathname4_len = 0;
9786                 return (0);
9787         }
9788 
9789         /* count number of components to alloc buffer */
9790         if ((ncomp = nfs4_create_components(path, NULL)) == 0) {
9791                 pathname->pathname4_val = NULL;
9792                 pathname->pathname4_len = 0;
9793                 return (0);
9794         }
9795         comp4 = kmem_zalloc(ncomp * sizeof (component4), KM_SLEEP);
9796 
9797         /* copy components into allocated buffer */
9798         ncomp = nfs4_create_components(path, comp4);
9799 
9800         pathname->pathname4_val = comp4;
9801         pathname->pathname4_len = ncomp;
9802 
9803         return (ncomp);
9804 }
9805 
9806 #define xdr_fs_locations4 xdr_fattr4_fs_locations
9807 
9808 fs_locations4 *
9809 fetch_referral(vnode_t *vp, cred_t *cr)
9810 {
9811         nvlist_t *nvl;
9812         char *stype, *sdata;
9813         fs_locations4 *result;
9814         char buf[1024];
9815         size_t bufsize;
9816         XDR xdr;
9817         int err;
9818 
9819         /*
9820          * Check attrs to ensure it's a reparse point
9821          */
9822         if (vn_is_reparse(vp, cr, NULL) == B_FALSE)
9823                 return (NULL);
9824 
9825         /*
9826          * Look for an NFS record and get the type and data
9827          */
9828         if (vn_find_nfs_record(vp, &nvl, &stype, &sdata) != 0)
9829                 return (NULL);
9830 
9831         /*
9832          * With the type and data, upcall to get the referral
9833          */
9834         bufsize = sizeof (buf);
9835         bzero(buf, sizeof (buf));
9836         err = reparse_kderef((const char *)stype, (const char *)sdata,
9837             buf, &bufsize);
9838         reparse_free(nvl);
9839 
9840         DTRACE_PROBE4(nfs4serv__func__referral__upcall,
9841             char *, stype, char *, sdata, char *, buf, int, err);
9842         if (err) {
9843                 cmn_err(CE_NOTE,
9844                     "reparsed daemon not running: unable to get referral (%d)",
9845                     err);
9846                 return (NULL);
9847         }
9848 
9849         /*
9850          * We get an XDR'ed record back from the kderef call
9851          */
9852         xdrmem_create(&xdr, buf, bufsize, XDR_DECODE);
9853         result = kmem_alloc(sizeof (fs_locations4), KM_SLEEP);
9854         err = xdr_fs_locations4(&xdr, result);
9855         XDR_DESTROY(&xdr);
9856         if (err != TRUE) {
9857                 DTRACE_PROBE1(nfs4serv__func__referral__upcall__xdrfail,
9858                     int, err);
9859                 return (NULL);
9860         }
9861 
9862         /*
9863          * Look at path to recover fs_root, ignoring the leading '/'
9864          */
9865         (void) make_pathname4(vp->v_path, &result->fs_root);
9866 
9867         return (result);
9868 }
9869 
9870 char *
9871 build_symlink(vnode_t *vp, cred_t *cr, size_t *strsz)
9872 {
9873         fs_locations4 *fsl;
9874         fs_location4 *fs;
9875         char *server, *path, *symbuf;
9876         static char *prefix = "/net/";
9877         int i, size, npaths;
9878         uint_t len;
9879 
9880         /* Get the referral */
9881         if ((fsl = fetch_referral(vp, cr)) == NULL)
9882                 return (NULL);
9883 
9884         /* Deal with only the first location and first server */
9885         fs = &fsl->locations_val[0];
9886         server = utf8_to_str(&fs->server_val[0], &len, NULL);
9887         if (server == NULL) {
9888                 rfs4_free_fs_locations4(fsl);
9889                 kmem_free(fsl, sizeof (fs_locations4));
9890                 return (NULL);
9891         }
9892 
9893         /* Figure out size for "/net/" + host + /path/path/path + NULL */
9894         size = strlen(prefix) + len;
9895         for (i = 0; i < fs->rootpath.pathname4_len; i++)
9896                 size += fs->rootpath.pathname4_val[i].utf8string_len + 1;
9897 
9898         /* Allocate the symlink buffer and fill it */
9899         symbuf = kmem_zalloc(size, KM_SLEEP);
9900         (void) strcat(symbuf, prefix);
9901         (void) strcat(symbuf, server);
9902         kmem_free(server, len);
9903 
9904         npaths = 0;
9905         for (i = 0; i < fs->rootpath.pathname4_len; i++) {
9906                 path = utf8_to_str(&fs->rootpath.pathname4_val[i], &len, NULL);
9907                 if (path == NULL)
9908                         continue;
9909                 (void) strcat(symbuf, "/");
9910                 (void) strcat(symbuf, path);
9911                 npaths++;
9912                 kmem_free(path, len);
9913         }
9914 
9915         rfs4_free_fs_locations4(fsl);
9916         kmem_free(fsl, sizeof (fs_locations4));
9917 
9918         if (strsz != NULL)
9919                 *strsz = size;
9920         return (symbuf);
9921 }
9922 
9923 /*
9924  * Check to see if we have a downrev Solaris client, so that we
9925  * can send it a symlink instead of a referral.
9926  */
9927 int
9928 client_is_downrev(struct svc_req *req)
9929 {
9930         struct sockaddr *ca;
9931         rfs4_clntip_t *ci;
9932         bool_t create = FALSE;
9933         int is_downrev;
9934 
9935         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
9936         ASSERT(ca);
9937         ci = rfs4_find_clntip(ca, &create);
9938         if (ci == NULL)
9939                 return (0);
9940         is_downrev = ci->ri_no_referrals;
9941         rfs4_dbe_rele(ci->ri_dbe);
9942         return (is_downrev);
9943 }
9944 
9945 /*
9946  * Do the main work of handling HA-NFSv4 Resource Group failover on
9947  * Sun Cluster.
9948  * We need to detect whether any RG admin paths have been added or removed,
9949  * and adjust resources accordingly.
9950  * Currently we're using a very inefficient algorithm, ~ 2 * O(n**2). In
9951  * order to scale, the list and array of paths need to be held in more
9952  * suitable data structures.
9953  */
9954 static void
9955 hanfsv4_failover(nfs4_srv_t *nsrv4)
9956 {
9957         int i, start_grace, numadded_paths = 0;
9958         char **added_paths = NULL;
9959         rfs4_dss_path_t *dss_path;
9960 
9961         /*
9962          * Note: currently, dss_pathlist cannot be NULL, since
9963          * it will always include an entry for NFS4_DSS_VAR_DIR. If we
9964          * make the latter dynamically specified too, the following will
9965          * need to be adjusted.
9966          */
9967 
9968         /*
9969          * First, look for removed paths: RGs that have been failed-over
9970          * away from this node.
9971          * Walk the "currently-serving" dss_pathlist and, for each
9972          * path, check if it is on the "passed-in" rfs4_dss_newpaths array
9973          * from nfsd. If not, that RG path has been removed.
9974          *
9975          * Note that nfsd has sorted rfs4_dss_newpaths for us, and removed
9976          * any duplicates.
9977          */
9978         dss_path = nsrv4->dss_pathlist;
9979         do {
9980                 int found = 0;
9981                 char *path = dss_path->path;
9982 
9983                 /* used only for non-HA so may not be removed */
9984                 if (strcmp(path, NFS4_DSS_VAR_DIR) == 0) {
9985                         dss_path = dss_path->next;
9986                         continue;
9987                 }
9988 
9989                 for (i = 0; i < rfs4_dss_numnewpaths; i++) {
9990                         int cmpret;
9991                         char *newpath = rfs4_dss_newpaths[i];
9992 
9993                         /*
9994                          * Since nfsd has sorted rfs4_dss_newpaths for us,
9995                          * once the return from strcmp is negative we know
9996                          * we've passed the point where "path" should be,
9997                          * and can stop searching: "path" has been removed.
9998                          */
9999                         cmpret = strcmp(path, newpath);
10000                         if (cmpret < 0)
10001                                 break;
10002                         if (cmpret == 0) {
10003                                 found = 1;
10004                                 break;
10005                         }
10006                 }
10007 
10008                 if (found == 0) {
10009                         unsigned index = dss_path->index;
10010                         rfs4_servinst_t *sip = dss_path->sip;
10011                         rfs4_dss_path_t *path_next = dss_path->next;
10012 
10013                         /*
10014                          * This path has been removed.
10015                          * We must clear out the servinst reference to
10016                          * it, since it's now owned by another
10017                          * node: we should not attempt to touch it.
10018                          */
10019                         ASSERT(dss_path == sip->dss_paths[index]);
10020                         sip->dss_paths[index] = NULL;
10021 
10022                         /* remove from "currently-serving" list, and destroy */
10023                         remque(dss_path);
10024                         /* allow for NUL */
10025                         kmem_free(dss_path->path, strlen(dss_path->path) + 1);
10026                         kmem_free(dss_path, sizeof (rfs4_dss_path_t));
10027 
10028                         dss_path = path_next;
10029                 } else {
10030                         /* path was found; not removed */
10031                         dss_path = dss_path->next;
10032                 }
10033         } while (dss_path != nsrv4->dss_pathlist);
10034 
10035         /*
10036          * Now, look for added paths: RGs that have been failed-over
10037          * to this node.
10038          * Walk the "passed-in" rfs4_dss_newpaths array from nfsd and,
10039          * for each path, check if it is on the "currently-serving"
10040          * dss_pathlist. If not, that RG path has been added.
10041          *
10042          * Note: we don't do duplicate detection here; nfsd does that for us.
10043          *
10044          * Note: numadded_paths <= rfs4_dss_numnewpaths, which gives us
10045          * an upper bound for the size needed for added_paths[numadded_paths].
10046          */
10047 
10048         /* probably more space than we need, but guaranteed to be enough */
10049         if (rfs4_dss_numnewpaths > 0) {
10050                 size_t sz = rfs4_dss_numnewpaths * sizeof (char *);
10051                 added_paths = kmem_zalloc(sz, KM_SLEEP);
10052         }
10053 
10054         /* walk the "passed-in" rfs4_dss_newpaths array from nfsd */
10055         for (i = 0; i < rfs4_dss_numnewpaths; i++) {
10056                 int found = 0;
10057                 char *newpath = rfs4_dss_newpaths[i];
10058 
10059                 dss_path = nsrv4->dss_pathlist;
10060                 do {
10061                         char *path = dss_path->path;
10062 
10063                         /* used only for non-HA */
10064                         if (strcmp(path, NFS4_DSS_VAR_DIR) == 0) {
10065                                 dss_path = dss_path->next;
10066                                 continue;
10067                         }
10068 
10069                         if (strncmp(path, newpath, strlen(path)) == 0) {
10070                                 found = 1;
10071                                 break;
10072                         }
10073 
10074                         dss_path = dss_path->next;
10075                 } while (dss_path != nsrv4->dss_pathlist);
10076 
10077                 if (found == 0) {
10078                         added_paths[numadded_paths] = newpath;
10079                         numadded_paths++;
10080                 }
10081         }
10082 
10083         /* did we find any added paths? */
10084         if (numadded_paths > 0) {
10085 
10086                 /* create a new server instance, and start its grace period */
10087                 start_grace = 1;
10088                 /* CSTYLED */
10089                 rfs4_servinst_create(nsrv4, start_grace, numadded_paths, added_paths);
10090 
10091                 /* read in the stable storage state from these paths */
10092                 rfs4_dss_readstate(nsrv4, numadded_paths, added_paths);
10093 
10094                 /*
10095                  * Multiple failovers during a grace period will cause
10096                  * clients of the same resource group to be partitioned
10097                  * into different server instances, with different
10098                  * grace periods.  Since clients of the same resource
10099                  * group must be subject to the same grace period,
10100                  * we need to reset all currently active grace periods.
10101                  */
10102                 rfs4_grace_reset_all(nsrv4);
10103         }
10104 
10105         if (rfs4_dss_numnewpaths > 0)
10106                 kmem_free(added_paths, rfs4_dss_numnewpaths * sizeof (char *));
10107 }