1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
  24  */
  25 
  26 /*
  27  *      Copyright (c) 1983,1984,1985,1986,1987,1988,1989  AT&T.
  28  *      All Rights Reserved
  29  */
  30 
  31 /*
  32  * Copyright (c) 2012, 2016 by Delphix. All rights reserved.
  33  * Copyright 2019 Nexenta Systems, Inc.
  34  * Copyright 2019 Nexenta by DDN, Inc.
  35  */
  36 
  37 #include <sys/param.h>
  38 #include <sys/types.h>
  39 #include <sys/systm.h>
  40 #include <sys/cred.h>
  41 #include <sys/buf.h>
  42 #include <sys/vfs.h>
  43 #include <sys/vfs_opreg.h>
  44 #include <sys/vnode.h>
  45 #include <sys/uio.h>
  46 #include <sys/errno.h>
  47 #include <sys/sysmacros.h>
  48 #include <sys/statvfs.h>
  49 #include <sys/kmem.h>
  50 #include <sys/dirent.h>
  51 #include <sys/cmn_err.h>
  52 #include <sys/debug.h>
  53 #include <sys/systeminfo.h>
  54 #include <sys/flock.h>
  55 #include <sys/pathname.h>
  56 #include <sys/nbmlock.h>
  57 #include <sys/share.h>
  58 #include <sys/atomic.h>
  59 #include <sys/policy.h>
  60 #include <sys/fem.h>
  61 #include <sys/sdt.h>
  62 #include <sys/ddi.h>
  63 #include <sys/zone.h>
  64 
  65 #include <fs/fs_reparse.h>
  66 
  67 #include <rpc/types.h>
  68 #include <rpc/auth.h>
  69 #include <rpc/rpcsec_gss.h>
  70 #include <rpc/svc.h>
  71 
  72 #include <nfs/nfs.h>
  73 #include <nfs/nfssys.h>
  74 #include <nfs/export.h>
  75 #include <nfs/nfs_cmd.h>
  76 #include <nfs/lm.h>
  77 #include <nfs/nfs4.h>
  78 #include <nfs/nfs4_drc.h>
  79 
  80 #include <sys/strsubr.h>
  81 #include <sys/strsun.h>
  82 
  83 #include <inet/common.h>
  84 #include <inet/ip.h>
  85 #include <inet/ip6.h>
  86 
  87 #include <sys/tsol/label.h>
  88 #include <sys/tsol/tndb.h>
  89 
  90 #define RFS4_MAXLOCK_TRIES 4    /* Try to get the lock this many times */
  91 static int rfs4_maxlock_tries = RFS4_MAXLOCK_TRIES;
  92 #define RFS4_LOCK_DELAY 10      /* Milliseconds */
  93 static clock_t  rfs4_lock_delay = RFS4_LOCK_DELAY;
  94 extern struct svc_ops rdma_svc_ops;
  95 extern int nfs_loaned_buffers;
  96 /* End of Tunables */
  97 
  98 static int rdma_setup_read_data4(READ4args *, READ4res *);
  99 
 100 /*
 101  * Used to bump the stateid4.seqid value and show changes in the stateid
 102  */
 103 #define next_stateid(sp) (++(sp)->bits.chgseq)
 104 
 105 /*
 106  * RFS4_MINLEN_ENTRY4: XDR-encoded size of smallest possible dirent.
 107  *      This is used to return NFS4ERR_TOOSMALL when clients specify
 108  *      maxcount that isn't large enough to hold the smallest possible
 109  *      XDR encoded dirent.
 110  *
 111  *          sizeof cookie (8 bytes) +
 112  *          sizeof name_len (4 bytes) +
 113  *          sizeof smallest (padded) name (4 bytes) +
 114  *          sizeof bitmap4_len (12 bytes) +   NOTE: we always encode len=2 bm4
 115  *          sizeof attrlist4_len (4 bytes) +
 116  *          sizeof next boolean (4 bytes)
 117  *
 118  * RFS4_MINLEN_RDDIR4: XDR-encoded size of READDIR op reply containing
 119  * the smallest possible entry4 (assumes no attrs requested).
 120  *      sizeof nfsstat4 (4 bytes) +
 121  *      sizeof verifier4 (8 bytes) +
 122  *      sizeof entry4list bool (4 bytes) +
 123  *      sizeof entry4 (36 bytes) +
 124  *      sizeof eof bool (4 bytes)
 125  *
 126  * RFS4_MINLEN_RDDIR_BUF: minimum length of buffer server will provide to
 127  *      VOP_READDIR.  Its value is the size of the maximum possible dirent
 128  *      for solaris.  The DIRENT64_RECLEN macro returns the size of dirent
 129  *      required for a given name length.  MAXNAMELEN is the maximum
 130  *      filename length allowed in Solaris.  The first two DIRENT64_RECLEN()
 131  *      macros are to allow for . and .. entries -- just a minor tweak to try
 132  *      and guarantee that buffer we give to VOP_READDIR will be large enough
 133  *      to hold ., .., and the largest possible solaris dirent64.
 134  */
 135 #define RFS4_MINLEN_ENTRY4 36
 136 #define RFS4_MINLEN_RDDIR4 (4 + NFS4_VERIFIER_SIZE + 4 + RFS4_MINLEN_ENTRY4 + 4)
 137 #define RFS4_MINLEN_RDDIR_BUF \
 138         (DIRENT64_RECLEN(1) + DIRENT64_RECLEN(2) + DIRENT64_RECLEN(MAXNAMELEN))
 139 
 140 /*
 141  * It would be better to pad to 4 bytes since that's what XDR would do,
 142  * but the dirents UFS gives us are already padded to 8, so just take
 143  * what we're given.  Dircount is only a hint anyway.  Currently the
 144  * solaris kernel is ASCII only, so there's no point in calling the
 145  * UTF8 functions.
 146  *
 147  * dirent64: named padded to provide 8 byte struct alignment
 148  *      d_ino(8) + d_off(8) + d_reclen(2) + d_name(namelen + null(1) + pad)
 149  *
 150  * cookie: uint64_t   +  utf8namelen: uint_t  +   utf8name padded to 8 bytes
 151  *
 152  */
 153 #define DIRENT64_TO_DIRCOUNT(dp) \
 154         (3 * BYTES_PER_XDR_UNIT + DIRENT64_NAMELEN((dp)->d_reclen))
 155 
 156 
 157 static sysid_t          lockt_sysid;    /* dummy sysid for all LOCKT calls */
 158 
 159 u_longlong_t    nfs4_srv_caller_id;
 160 uint_t          nfs4_srv_vkey = 0;
 161 
 162 void    rfs4_init_compound_state(struct compound_state *);
 163 
 164 static void     nullfree(caddr_t);
 165 static void     rfs4_op_inval(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 166                     struct compound_state *);
 167 static void     rfs4_op_access(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 168                     struct compound_state *);
 169 static void     rfs4_op_close(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 170                     struct compound_state *);
 171 static void     rfs4_op_commit(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 172                     struct compound_state *);
 173 static void     rfs4_op_create(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 174                     struct compound_state *);
 175 static void     rfs4_op_create_free(nfs_resop4 *resop);
 176 static void     rfs4_op_delegreturn(nfs_argop4 *, nfs_resop4 *,
 177                     struct svc_req *, struct compound_state *);
 178 static void     rfs4_op_delegpurge(nfs_argop4 *, nfs_resop4 *,
 179                     struct svc_req *, struct compound_state *);
 180 static void     rfs4_op_getattr(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 181                     struct compound_state *);
 182 static void     rfs4_op_getattr_free(nfs_resop4 *);
 183 static void     rfs4_op_getfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 184                     struct compound_state *);
 185 static void     rfs4_op_getfh_free(nfs_resop4 *);
 186 static void     rfs4_op_illegal(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 187                     struct compound_state *);
 188 static void     rfs4_op_link(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 189                     struct compound_state *);
 190 static void     rfs4_op_lock(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 191                     struct compound_state *);
 192 static void     lock_denied_free(nfs_resop4 *);
 193 static void     rfs4_op_locku(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 194                     struct compound_state *);
 195 static void     rfs4_op_lockt(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 196                     struct compound_state *);
 197 static void     rfs4_op_lookup(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 198                     struct compound_state *);
 199 static void     rfs4_op_lookupp(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 200                     struct compound_state *);
 201 static void     rfs4_op_openattr(nfs_argop4 *argop, nfs_resop4 *resop,
 202                     struct svc_req *req, struct compound_state *cs);
 203 static void     rfs4_op_nverify(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 204                     struct compound_state *);
 205 static void     rfs4_op_open(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 206                     struct compound_state *);
 207 static void     rfs4_op_open_confirm(nfs_argop4 *, nfs_resop4 *,
 208                     struct svc_req *, struct compound_state *);
 209 static void     rfs4_op_open_downgrade(nfs_argop4 *, nfs_resop4 *,
 210                     struct svc_req *, struct compound_state *);
 211 static void     rfs4_op_putfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 212                     struct compound_state *);
 213 static void     rfs4_op_putpubfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 214                     struct compound_state *);
 215 static void     rfs4_op_putrootfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 216                     struct compound_state *);
 217 static void     rfs4_op_read(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 218                     struct compound_state *);
 219 static void     rfs4_op_read_free(nfs_resop4 *);
 220 static void     rfs4_op_readdir_free(nfs_resop4 *resop);
 221 static void     rfs4_op_readlink(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 222                     struct compound_state *);
 223 static void     rfs4_op_readlink_free(nfs_resop4 *);
 224 static void     rfs4_op_release_lockowner(nfs_argop4 *, nfs_resop4 *,
 225                     struct svc_req *, struct compound_state *);
 226 static void     rfs4_op_remove(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 227                     struct compound_state *);
 228 static void     rfs4_op_rename(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 229                     struct compound_state *);
 230 static void     rfs4_op_renew(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 231                     struct compound_state *);
 232 static void     rfs4_op_restorefh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 233                     struct compound_state *);
 234 static void     rfs4_op_savefh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 235                     struct compound_state *);
 236 static void     rfs4_op_setattr(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 237                     struct compound_state *);
 238 static void     rfs4_op_verify(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 239                     struct compound_state *);
 240 static void     rfs4_op_write(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 241                     struct compound_state *);
 242 static void     rfs4_op_setclientid(nfs_argop4 *, nfs_resop4 *,
 243                     struct svc_req *, struct compound_state *);
 244 static void     rfs4_op_setclientid_confirm(nfs_argop4 *, nfs_resop4 *,
 245                     struct svc_req *req, struct compound_state *);
 246 static void     rfs4_op_secinfo(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 247                     struct compound_state *);
 248 static void     rfs4_op_secinfo_free(nfs_resop4 *);
 249 
 250 static nfsstat4 check_open_access(uint32_t, struct compound_state *,
 251                     struct svc_req *);
 252 nfsstat4        rfs4_client_sysid(rfs4_client_t *, sysid_t *);
 253 void            rfs4_ss_clid(nfs4_srv_t *, rfs4_client_t *);
 254 
 255 
 256 /*
 257  * translation table for attrs
 258  */
 259 struct nfs4_ntov_table {
 260         union nfs4_attr_u *na;
 261         uint8_t amap[NFS4_MAXNUM_ATTRS];
 262         int attrcnt;
 263         bool_t vfsstat;
 264 };
 265 
 266 static void     nfs4_ntov_table_init(struct nfs4_ntov_table *ntovp);
 267 static void     nfs4_ntov_table_free(struct nfs4_ntov_table *ntovp,
 268                     struct nfs4_svgetit_arg *sargp);
 269 
 270 static nfsstat4 do_rfs4_set_attrs(bitmap4 *resp, fattr4 *fattrp,
 271                     struct compound_state *cs, struct nfs4_svgetit_arg *sargp,
 272                     struct nfs4_ntov_table *ntovp, nfs4_attr_cmd_t cmd);
 273 
 274 static void     hanfsv4_failover(nfs4_srv_t *);
 275 
 276 fem_t           *deleg_rdops;
 277 fem_t           *deleg_wrops;
 278 
 279 /*
 280  * NFS4 op dispatch table
 281  */
 282 
 283 struct rfsv4disp {
 284         void    (*dis_proc)();          /* proc to call */
 285         void    (*dis_resfree)();       /* frees space allocated by proc */
 286         int     dis_flags;              /* RPC_IDEMPOTENT, etc... */
 287 };
 288 
 289 static struct rfsv4disp rfsv4disptab[] = {
 290         /*
 291          * NFS VERSION 4
 292          */
 293 
 294         /* RFS_NULL = 0 */
 295         {rfs4_op_illegal, nullfree, 0},
 296 
 297         /* UNUSED = 1 */
 298         {rfs4_op_illegal, nullfree, 0},
 299 
 300         /* UNUSED = 2 */
 301         {rfs4_op_illegal, nullfree, 0},
 302 
 303         /* OP_ACCESS = 3 */
 304         {rfs4_op_access, nullfree, RPC_IDEMPOTENT},
 305 
 306         /* OP_CLOSE = 4 */
 307         {rfs4_op_close, nullfree, 0},
 308 
 309         /* OP_COMMIT = 5 */
 310         {rfs4_op_commit, nullfree, RPC_IDEMPOTENT},
 311 
 312         /* OP_CREATE = 6 */
 313         {rfs4_op_create, nullfree, 0},
 314 
 315         /* OP_DELEGPURGE = 7 */
 316         {rfs4_op_delegpurge, nullfree, 0},
 317 
 318         /* OP_DELEGRETURN = 8 */
 319         {rfs4_op_delegreturn, nullfree, 0},
 320 
 321         /* OP_GETATTR = 9 */
 322         {rfs4_op_getattr, rfs4_op_getattr_free, RPC_IDEMPOTENT},
 323 
 324         /* OP_GETFH = 10 */
 325         {rfs4_op_getfh, rfs4_op_getfh_free, RPC_ALL},
 326 
 327         /* OP_LINK = 11 */
 328         {rfs4_op_link, nullfree, 0},
 329 
 330         /* OP_LOCK = 12 */
 331         {rfs4_op_lock, lock_denied_free, 0},
 332 
 333         /* OP_LOCKT = 13 */
 334         {rfs4_op_lockt, lock_denied_free, 0},
 335 
 336         /* OP_LOCKU = 14 */
 337         {rfs4_op_locku, nullfree, 0},
 338 
 339         /* OP_LOOKUP = 15 */
 340         {rfs4_op_lookup, nullfree, (RPC_IDEMPOTENT | RPC_PUBLICFH_OK)},
 341 
 342         /* OP_LOOKUPP = 16 */
 343         {rfs4_op_lookupp, nullfree, (RPC_IDEMPOTENT | RPC_PUBLICFH_OK)},
 344 
 345         /* OP_NVERIFY = 17 */
 346         {rfs4_op_nverify, nullfree, RPC_IDEMPOTENT},
 347 
 348         /* OP_OPEN = 18 */
 349         {rfs4_op_open, rfs4_free_reply, 0},
 350 
 351         /* OP_OPENATTR = 19 */
 352         {rfs4_op_openattr, nullfree, 0},
 353 
 354         /* OP_OPEN_CONFIRM = 20 */
 355         {rfs4_op_open_confirm, nullfree, 0},
 356 
 357         /* OP_OPEN_DOWNGRADE = 21 */
 358         {rfs4_op_open_downgrade, nullfree, 0},
 359 
 360         /* OP_OPEN_PUTFH = 22 */
 361         {rfs4_op_putfh, nullfree, RPC_ALL},
 362 
 363         /* OP_PUTPUBFH = 23 */
 364         {rfs4_op_putpubfh, nullfree, RPC_ALL},
 365 
 366         /* OP_PUTROOTFH = 24 */
 367         {rfs4_op_putrootfh, nullfree, RPC_ALL},
 368 
 369         /* OP_READ = 25 */
 370         {rfs4_op_read, rfs4_op_read_free, RPC_IDEMPOTENT},
 371 
 372         /* OP_READDIR = 26 */
 373         {rfs4_op_readdir, rfs4_op_readdir_free, RPC_IDEMPOTENT},
 374 
 375         /* OP_READLINK = 27 */
 376         {rfs4_op_readlink, rfs4_op_readlink_free, RPC_IDEMPOTENT},
 377 
 378         /* OP_REMOVE = 28 */
 379         {rfs4_op_remove, nullfree, 0},
 380 
 381         /* OP_RENAME = 29 */
 382         {rfs4_op_rename, nullfree, 0},
 383 
 384         /* OP_RENEW = 30 */
 385         {rfs4_op_renew, nullfree, 0},
 386 
 387         /* OP_RESTOREFH = 31 */
 388         {rfs4_op_restorefh, nullfree, RPC_ALL},
 389 
 390         /* OP_SAVEFH = 32 */
 391         {rfs4_op_savefh, nullfree, RPC_ALL},
 392 
 393         /* OP_SECINFO = 33 */
 394         {rfs4_op_secinfo, rfs4_op_secinfo_free, 0},
 395 
 396         /* OP_SETATTR = 34 */
 397         {rfs4_op_setattr, nullfree, 0},
 398 
 399         /* OP_SETCLIENTID = 35 */
 400         {rfs4_op_setclientid, nullfree, 0},
 401 
 402         /* OP_SETCLIENTID_CONFIRM = 36 */
 403         {rfs4_op_setclientid_confirm, nullfree, 0},
 404 
 405         /* OP_VERIFY = 37 */
 406         {rfs4_op_verify, nullfree, RPC_IDEMPOTENT},
 407 
 408         /* OP_WRITE = 38 */
 409         {rfs4_op_write, nullfree, 0},
 410 
 411         /* OP_RELEASE_LOCKOWNER = 39 */
 412         {rfs4_op_release_lockowner, nullfree, 0},
 413 };
 414 
 415 static uint_t rfsv4disp_cnt = sizeof (rfsv4disptab) / sizeof (rfsv4disptab[0]);
 416 
 417 #define OP_ILLEGAL_IDX (rfsv4disp_cnt)
 418 
 419 #ifdef DEBUG
 420 
 421 int             rfs4_fillone_debug = 0;
 422 int             rfs4_no_stub_access = 1;
 423 int             rfs4_rddir_debug = 0;
 424 
 425 static char    *rfs4_op_string[] = {
 426         "rfs4_op_null",
 427         "rfs4_op_1 unused",
 428         "rfs4_op_2 unused",
 429         "rfs4_op_access",
 430         "rfs4_op_close",
 431         "rfs4_op_commit",
 432         "rfs4_op_create",
 433         "rfs4_op_delegpurge",
 434         "rfs4_op_delegreturn",
 435         "rfs4_op_getattr",
 436         "rfs4_op_getfh",
 437         "rfs4_op_link",
 438         "rfs4_op_lock",
 439         "rfs4_op_lockt",
 440         "rfs4_op_locku",
 441         "rfs4_op_lookup",
 442         "rfs4_op_lookupp",
 443         "rfs4_op_nverify",
 444         "rfs4_op_open",
 445         "rfs4_op_openattr",
 446         "rfs4_op_open_confirm",
 447         "rfs4_op_open_downgrade",
 448         "rfs4_op_putfh",
 449         "rfs4_op_putpubfh",
 450         "rfs4_op_putrootfh",
 451         "rfs4_op_read",
 452         "rfs4_op_readdir",
 453         "rfs4_op_readlink",
 454         "rfs4_op_remove",
 455         "rfs4_op_rename",
 456         "rfs4_op_renew",
 457         "rfs4_op_restorefh",
 458         "rfs4_op_savefh",
 459         "rfs4_op_secinfo",
 460         "rfs4_op_setattr",
 461         "rfs4_op_setclientid",
 462         "rfs4_op_setclient_confirm",
 463         "rfs4_op_verify",
 464         "rfs4_op_write",
 465         "rfs4_op_release_lockowner",
 466         "rfs4_op_illegal"
 467 };
 468 #endif
 469 
 470 void    rfs4_ss_chkclid(nfs4_srv_t *, rfs4_client_t *);
 471 
 472 extern size_t   strlcpy(char *dst, const char *src, size_t dstsize);
 473 
 474 extern void     rfs4_free_fs_locations4(fs_locations4 *);
 475 
 476 #ifdef  nextdp
 477 #undef nextdp
 478 #endif
 479 #define nextdp(dp)      ((struct dirent64 *)((char *)(dp) + (dp)->d_reclen))
 480 
 481 static const fs_operation_def_t nfs4_rd_deleg_tmpl[] = {
 482         VOPNAME_OPEN,           { .femop_open = deleg_rd_open },
 483         VOPNAME_WRITE,          { .femop_write = deleg_rd_write },
 484         VOPNAME_SETATTR,        { .femop_setattr = deleg_rd_setattr },
 485         VOPNAME_RWLOCK,         { .femop_rwlock = deleg_rd_rwlock },
 486         VOPNAME_SPACE,          { .femop_space = deleg_rd_space },
 487         VOPNAME_SETSECATTR,     { .femop_setsecattr = deleg_rd_setsecattr },
 488         VOPNAME_VNEVENT,        { .femop_vnevent = deleg_rd_vnevent },
 489         NULL,                   NULL
 490 };
 491 static const fs_operation_def_t nfs4_wr_deleg_tmpl[] = {
 492         VOPNAME_OPEN,           { .femop_open = deleg_wr_open },
 493         VOPNAME_READ,           { .femop_read = deleg_wr_read },
 494         VOPNAME_WRITE,          { .femop_write = deleg_wr_write },
 495         VOPNAME_SETATTR,        { .femop_setattr = deleg_wr_setattr },
 496         VOPNAME_RWLOCK,         { .femop_rwlock = deleg_wr_rwlock },
 497         VOPNAME_SPACE,          { .femop_space = deleg_wr_space },
 498         VOPNAME_SETSECATTR,     { .femop_setsecattr = deleg_wr_setsecattr },
 499         VOPNAME_VNEVENT,        { .femop_vnevent = deleg_wr_vnevent },
 500         NULL,                   NULL
 501 };
 502 
 503 nfs4_srv_t *
 504 nfs4_get_srv(void)
 505 {
 506         nfs_globals_t *ng = nfs_srv_getzg();
 507         nfs4_srv_t *srv = ng->nfs4_srv;
 508         ASSERT(srv != NULL);
 509         return (srv);
 510 }
 511 
 512 void
 513 rfs4_srv_zone_init(nfs_globals_t *ng)
 514 {
 515         nfs4_srv_t *nsrv4;
 516         timespec32_t verf;
 517 
 518         nsrv4 = kmem_zalloc(sizeof (*nsrv4), KM_SLEEP);
 519 
 520         /*
 521          * The following algorithm attempts to find a unique verifier
 522          * to be used as the write verifier returned from the server
 523          * to the client.  It is important that this verifier change
 524          * whenever the server reboots.  Of secondary importance, it
 525          * is important for the verifier to be unique between two
 526          * different servers.
 527          *
 528          * Thus, an attempt is made to use the system hostid and the
 529          * current time in seconds when the nfssrv kernel module is
 530          * loaded.  It is assumed that an NFS server will not be able
 531          * to boot and then to reboot in less than a second.  If the
 532          * hostid has not been set, then the current high resolution
 533          * time is used.  This will ensure different verifiers each
 534          * time the server reboots and minimize the chances that two
 535          * different servers will have the same verifier.
 536          * XXX - this is broken on LP64 kernels.
 537          */
 538         verf.tv_sec = (time_t)zone_get_hostid(NULL);
 539         if (verf.tv_sec != 0) {
 540                 verf.tv_nsec = gethrestime_sec();
 541         } else {
 542                 timespec_t tverf;
 543 
 544                 gethrestime(&tverf);
 545                 verf.tv_sec = (time_t)tverf.tv_sec;
 546                 verf.tv_nsec = tverf.tv_nsec;
 547         }
 548         nsrv4->write4verf = *(uint64_t *)&verf;
 549 
 550         /* Used to manage create/destroy of server state */
 551         nsrv4->nfs4_server_state = NULL;
 552         nsrv4->nfs4_cur_servinst = NULL;
 553         nsrv4->nfs4_deleg_policy = SRV_NEVER_DELEGATE;
 554         mutex_init(&nsrv4->deleg_lock, NULL, MUTEX_DEFAULT, NULL);
 555         mutex_init(&nsrv4->state_lock, NULL, MUTEX_DEFAULT, NULL);
 556         mutex_init(&nsrv4->servinst_lock, NULL, MUTEX_DEFAULT, NULL);
 557         rw_init(&nsrv4->deleg_policy_lock, NULL, RW_DEFAULT, NULL);
 558 
 559         ng->nfs4_srv = nsrv4;
 560 }
 561 
 562 void
 563 rfs4_srv_zone_fini(nfs_globals_t *ng)
 564 {
 565         nfs4_srv_t *nsrv4 = ng->nfs4_srv;
 566 
 567         ng->nfs4_srv = NULL;
 568 
 569         mutex_destroy(&nsrv4->deleg_lock);
 570         mutex_destroy(&nsrv4->state_lock);
 571         mutex_destroy(&nsrv4->servinst_lock);
 572         rw_destroy(&nsrv4->deleg_policy_lock);
 573 
 574         kmem_free(nsrv4, sizeof (*nsrv4));
 575 }
 576 
 577 void
 578 rfs4_srvrinit(void)
 579 {
 580         extern void rfs4_attr_init();
 581 
 582         rfs4_attr_init();
 583 
 584         if (fem_create("deleg_rdops", nfs4_rd_deleg_tmpl, &deleg_rdops) != 0) {
 585                 rfs4_disable_delegation();
 586         } else if (fem_create("deleg_wrops", nfs4_wr_deleg_tmpl,
 587             &deleg_wrops) != 0) {
 588                 rfs4_disable_delegation();
 589                 fem_free(deleg_rdops);
 590         }
 591 
 592         nfs4_srv_caller_id = fs_new_caller_id();
 593         lockt_sysid = lm_alloc_sysidt();
 594         vsd_create(&nfs4_srv_vkey, NULL);
 595         rfs4_state_g_init();
 596 }
 597 
 598 void
 599 rfs4_srvrfini(void)
 600 {
 601         if (lockt_sysid != LM_NOSYSID) {
 602                 lm_free_sysidt(lockt_sysid);
 603                 lockt_sysid = LM_NOSYSID;
 604         }
 605 
 606         rfs4_state_g_fini();
 607 
 608         fem_free(deleg_rdops);
 609         fem_free(deleg_wrops);
 610 }
 611 
 612 void
 613 rfs4_do_server_start(int server_upordown,
 614     int srv_delegation, int cluster_booted)
 615 {
 616         nfs4_srv_t *nsrv4 = nfs4_get_srv();
 617 
 618         /* Is this a warm start? */
 619         if (server_upordown == NFS_SERVER_QUIESCED) {
 620                 cmn_err(CE_NOTE, "nfs4_srv: "
 621                     "server was previously quiesced; "
 622                     "existing NFSv4 state will be re-used");
 623 
 624                 /*
 625                  * HA-NFSv4: this is also the signal
 626                  * that a Resource Group failover has
 627                  * occurred.
 628                  */
 629                 if (cluster_booted)
 630                         hanfsv4_failover(nsrv4);
 631         } else {
 632                 /* Cold start */
 633                 nsrv4->rfs4_start_time = 0;
 634                 rfs4_state_zone_init(nsrv4);
 635                 nsrv4->nfs4_drc = rfs4_init_drc(nfs4_drc_max,
 636                     nfs4_drc_hash);
 637 
 638                 /*
 639                  * The nfsd service was started with the -s option
 640                  * we need to pull in any state from the paths indicated.
 641                  */
 642                 if (curzone == global_zone && rfs4_dss_numnewpaths > 0) {
 643                         /* read in the stable storage state from these paths */
 644                         rfs4_dss_readstate(nsrv4, rfs4_dss_numnewpaths,
 645                             rfs4_dss_newpaths);
 646                 }
 647         }
 648 
 649         /* Check if delegation is to be enabled */
 650         if (srv_delegation != FALSE)
 651                 rfs4_set_deleg_policy(nsrv4, SRV_NORMAL_DELEGATE);
 652 }
 653 
 654 void
 655 rfs4_init_compound_state(struct compound_state *cs)
 656 {
 657         bzero(cs, sizeof (*cs));
 658         cs->cont = TRUE;
 659         cs->access = CS_ACCESS_DENIED;
 660         cs->deleg = FALSE;
 661         cs->mandlock = FALSE;
 662         cs->fh.nfs_fh4_val = cs->fhbuf;
 663 }
 664 
 665 void
 666 rfs4_grace_start(rfs4_servinst_t *sip)
 667 {
 668         rw_enter(&sip->rwlock, RW_WRITER);
 669         sip->start_time = (time_t)TICK_TO_SEC(ddi_get_lbolt());
 670         sip->grace_period = rfs4_grace_period;
 671         rw_exit(&sip->rwlock);
 672 }
 673 
 674 /*
 675  * returns true if the instance's grace period has never been started
 676  */
 677 int
 678 rfs4_servinst_grace_new(rfs4_servinst_t *sip)
 679 {
 680         time_t start_time;
 681 
 682         rw_enter(&sip->rwlock, RW_READER);
 683         start_time = sip->start_time;
 684         rw_exit(&sip->rwlock);
 685 
 686         return (start_time == 0);
 687 }
 688 
 689 /*
 690  * Indicates if server instance is within the
 691  * grace period.
 692  */
 693 int
 694 rfs4_servinst_in_grace(rfs4_servinst_t *sip)
 695 {
 696         time_t grace_expiry;
 697 
 698         rw_enter(&sip->rwlock, RW_READER);
 699         grace_expiry = sip->start_time + sip->grace_period;
 700         rw_exit(&sip->rwlock);
 701 
 702         return (((time_t)TICK_TO_SEC(ddi_get_lbolt())) < grace_expiry);
 703 }
 704 
 705 int
 706 rfs4_clnt_in_grace(rfs4_client_t *cp)
 707 {
 708         ASSERT(rfs4_dbe_refcnt(cp->rc_dbe) > 0);
 709 
 710         return (rfs4_servinst_in_grace(cp->rc_server_instance));
 711 }
 712 
 713 /*
 714  * reset all currently active grace periods
 715  */
 716 void
 717 rfs4_grace_reset_all(nfs4_srv_t *nsrv4)
 718 {
 719         rfs4_servinst_t *sip;
 720 
 721         mutex_enter(&nsrv4->servinst_lock);
 722         for (sip = nsrv4->nfs4_cur_servinst; sip != NULL; sip = sip->prev)
 723                 if (rfs4_servinst_in_grace(sip))
 724                         rfs4_grace_start(sip);
 725         mutex_exit(&nsrv4->servinst_lock);
 726 }
 727 
 728 /*
 729  * start any new instances' grace periods
 730  */
 731 void
 732 rfs4_grace_start_new(nfs4_srv_t *nsrv4)
 733 {
 734         rfs4_servinst_t *sip;
 735 
 736         mutex_enter(&nsrv4->servinst_lock);
 737         for (sip = nsrv4->nfs4_cur_servinst; sip != NULL; sip = sip->prev)
 738                 if (rfs4_servinst_grace_new(sip))
 739                         rfs4_grace_start(sip);
 740         mutex_exit(&nsrv4->servinst_lock);
 741 }
 742 
 743 static rfs4_dss_path_t *
 744 rfs4_dss_newpath(nfs4_srv_t *nsrv4, rfs4_servinst_t *sip,
 745     char *path, unsigned index)
 746 {
 747         size_t len;
 748         rfs4_dss_path_t *dss_path;
 749 
 750         dss_path = kmem_alloc(sizeof (rfs4_dss_path_t), KM_SLEEP);
 751 
 752         /*
 753          * Take a copy of the string, since the original may be overwritten.
 754          * Sadly, no strdup() in the kernel.
 755          */
 756         /* allow for NUL */
 757         len = strlen(path) + 1;
 758         dss_path->path = kmem_alloc(len, KM_SLEEP);
 759         (void) strlcpy(dss_path->path, path, len);
 760 
 761         /* associate with servinst */
 762         dss_path->sip = sip;
 763         dss_path->index = index;
 764 
 765         /*
 766          * Add to list of served paths.
 767          * No locking required, as we're only ever called at startup.
 768          */
 769         if (nsrv4->dss_pathlist == NULL) {
 770                 /* this is the first dss_path_t */
 771 
 772                 /* needed for insque/remque */
 773                 dss_path->next = dss_path->prev = dss_path;
 774 
 775                 nsrv4->dss_pathlist = dss_path;
 776         } else {
 777                 insque(dss_path, nsrv4->dss_pathlist);
 778         }
 779 
 780         return (dss_path);
 781 }
 782 
 783 /*
 784  * Create a new server instance, and make it the currently active instance.
 785  * Note that starting the grace period too early will reduce the clients'
 786  * recovery window.
 787  */
 788 void
 789 rfs4_servinst_create(nfs4_srv_t *nsrv4, int start_grace,
 790     int dss_npaths, char **dss_paths)
 791 {
 792         unsigned i;
 793         rfs4_servinst_t *sip;
 794         rfs4_oldstate_t *oldstate;
 795 
 796         sip = kmem_alloc(sizeof (rfs4_servinst_t), KM_SLEEP);
 797         rw_init(&sip->rwlock, NULL, RW_DEFAULT, NULL);
 798 
 799         sip->start_time = (time_t)0;
 800         sip->grace_period = (time_t)0;
 801         sip->next = NULL;
 802         sip->prev = NULL;
 803 
 804         rw_init(&sip->oldstate_lock, NULL, RW_DEFAULT, NULL);
 805         /*
 806          * This initial dummy entry is required to setup for insque/remque.
 807          * It must be skipped over whenever the list is traversed.
 808          */
 809         oldstate = kmem_alloc(sizeof (rfs4_oldstate_t), KM_SLEEP);
 810         /* insque/remque require initial list entry to be self-terminated */
 811         oldstate->next = oldstate;
 812         oldstate->prev = oldstate;
 813         sip->oldstate = oldstate;
 814 
 815 
 816         sip->dss_npaths = dss_npaths;
 817         sip->dss_paths = kmem_alloc(dss_npaths *
 818             sizeof (rfs4_dss_path_t *), KM_SLEEP);
 819 
 820         for (i = 0; i < dss_npaths; i++) {
 821                 sip->dss_paths[i] =
 822                     rfs4_dss_newpath(nsrv4, sip, dss_paths[i], i);
 823         }
 824 
 825         mutex_enter(&nsrv4->servinst_lock);
 826         if (nsrv4->nfs4_cur_servinst != NULL) {
 827                 /* add to linked list */
 828                 sip->prev = nsrv4->nfs4_cur_servinst;
 829                 nsrv4->nfs4_cur_servinst->next = sip;
 830         }
 831         if (start_grace)
 832                 rfs4_grace_start(sip);
 833         /* make the new instance "current" */
 834         nsrv4->nfs4_cur_servinst = sip;
 835 
 836         mutex_exit(&nsrv4->servinst_lock);
 837 }
 838 
 839 /*
 840  * In future, we might add a rfs4_servinst_destroy(sip) but, for now, destroy
 841  * all instances directly.
 842  */
 843 void
 844 rfs4_servinst_destroy_all(nfs4_srv_t *nsrv4)
 845 {
 846         rfs4_servinst_t *sip, *prev, *current;
 847 #ifdef DEBUG
 848         int n = 0;
 849 #endif
 850 
 851         mutex_enter(&nsrv4->servinst_lock);
 852         ASSERT(nsrv4->nfs4_cur_servinst != NULL);
 853         current = nsrv4->nfs4_cur_servinst;
 854         nsrv4->nfs4_cur_servinst = NULL;
 855         for (sip = current; sip != NULL; sip = prev) {
 856                 prev = sip->prev;
 857                 rw_destroy(&sip->rwlock);
 858                 if (sip->oldstate)
 859                         kmem_free(sip->oldstate, sizeof (rfs4_oldstate_t));
 860                 if (sip->dss_paths) {
 861                         int i = sip->dss_npaths;
 862 
 863                         while (i > 0) {
 864                                 i--;
 865                                 if (sip->dss_paths[i] != NULL) {
 866                                         char *path = sip->dss_paths[i]->path;
 867 
 868                                         if (path != NULL) {
 869                                                 kmem_free(path,
 870                                                     strlen(path) + 1);
 871                                         }
 872                                         kmem_free(sip->dss_paths[i],
 873                                             sizeof (rfs4_dss_path_t));
 874                                 }
 875                         }
 876                         kmem_free(sip->dss_paths,
 877                             sip->dss_npaths * sizeof (rfs4_dss_path_t *));
 878                 }
 879                 kmem_free(sip, sizeof (rfs4_servinst_t));
 880 #ifdef DEBUG
 881                 n++;
 882 #endif
 883         }
 884         mutex_exit(&nsrv4->servinst_lock);
 885 }
 886 
 887 /*
 888  * Assign the current server instance to a client_t.
 889  * Should be called with cp->rc_dbe held.
 890  */
 891 void
 892 rfs4_servinst_assign(nfs4_srv_t *nsrv4, rfs4_client_t *cp,
 893     rfs4_servinst_t *sip)
 894 {
 895         ASSERT(rfs4_dbe_refcnt(cp->rc_dbe) > 0);
 896 
 897         /*
 898          * The lock ensures that if the current instance is in the process
 899          * of changing, we will see the new one.
 900          */
 901         mutex_enter(&nsrv4->servinst_lock);
 902         cp->rc_server_instance = sip;
 903         mutex_exit(&nsrv4->servinst_lock);
 904 }
 905 
 906 rfs4_servinst_t *
 907 rfs4_servinst(rfs4_client_t *cp)
 908 {
 909         ASSERT(rfs4_dbe_refcnt(cp->rc_dbe) > 0);
 910 
 911         return (cp->rc_server_instance);
 912 }
 913 
 914 /* ARGSUSED */
 915 static void
 916 nullfree(caddr_t resop)
 917 {
 918 }
 919 
 920 /*
 921  * This is a fall-through for invalid or not implemented (yet) ops
 922  */
 923 /* ARGSUSED */
 924 static void
 925 rfs4_op_inval(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
 926     struct compound_state *cs)
 927 {
 928         *cs->statusp = *((nfsstat4 *)&(resop)->nfs_resop4_u) = NFS4ERR_INVAL;
 929 }
 930 
 931 /*
 932  * Check if the security flavor, nfsnum, is in the flavor_list.
 933  */
 934 bool_t
 935 in_flavor_list(int nfsnum, int *flavor_list, int count)
 936 {
 937         int i;
 938 
 939         for (i = 0; i < count; i++) {
 940                 if (nfsnum == flavor_list[i])
 941                         return (TRUE);
 942         }
 943         return (FALSE);
 944 }
 945 
 946 /*
 947  * Used by rfs4_op_secinfo to get the security information from the
 948  * export structure associated with the component.
 949  */
 950 /* ARGSUSED */
 951 static nfsstat4
 952 do_rfs4_op_secinfo(struct compound_state *cs, char *nm, SECINFO4res *resp)
 953 {
 954         int error, different_export = 0;
 955         vnode_t *dvp, *vp;
 956         struct exportinfo *exi;
 957         fid_t fid;
 958         uint_t count, i;
 959         secinfo4 *resok_val;
 960         struct secinfo *secp;
 961         seconfig_t *si;
 962         bool_t did_traverse = FALSE;
 963         int dotdot, walk;
 964         nfs_export_t *ne = nfs_get_export();
 965 
 966         dvp = cs->vp;
 967         exi = cs->exi;
 968         ASSERT(exi != NULL);
 969         dotdot = (nm[0] == '.' && nm[1] == '.' && nm[2] == '\0');
 970 
 971         /*
 972          * If dotdotting, then need to check whether it's above the
 973          * root of a filesystem, or above an export point.
 974          */
 975         if (dotdot) {
 976                 vnode_t *zone_rootvp = ne->exi_root->exi_vp;
 977 
 978                 ASSERT3U(exi->exi_zoneid, ==, ne->exi_root->exi_zoneid);
 979                 /*
 980                  * If dotdotting at the root of a filesystem, then
 981                  * need to traverse back to the mounted-on filesystem
 982                  * and do the dotdot lookup there.
 983                  */
 984                 if ((dvp->v_flag & VROOT) || VN_CMP(dvp, zone_rootvp)) {
 985 
 986                         /*
 987                          * If at the system root, then can
 988                          * go up no further.
 989                          */
 990                         if (VN_CMP(dvp, zone_rootvp))
 991                                 return (puterrno4(ENOENT));
 992 
 993                         /*
 994                          * Traverse back to the mounted-on filesystem
 995                          */
 996                         dvp = untraverse(dvp, zone_rootvp);
 997 
 998                         /*
 999                          * Set the different_export flag so we remember
1000                          * to pick up a new exportinfo entry for
1001                          * this new filesystem.
1002                          */
1003                         different_export = 1;
1004                 } else {
1005 
1006                         /*
1007                          * If dotdotting above an export point then set
1008                          * the different_export to get new export info.
1009                          */
1010                         different_export = nfs_exported(exi, dvp);
1011                 }
1012         }
1013 
1014         /*
1015          * Get the vnode for the component "nm".
1016          */
1017         error = VOP_LOOKUP(dvp, nm, &vp, NULL, 0, NULL, cs->cr,
1018             NULL, NULL, NULL);
1019         if (error)
1020                 return (puterrno4(error));
1021 
1022         /*
1023          * If the vnode is in a pseudo filesystem, or if the security flavor
1024          * used in the request is valid but not an explicitly shared flavor,
1025          * or the access bit indicates that this is a limited access,
1026          * check whether this vnode is visible.
1027          */
1028         if (!different_export &&
1029             (PSEUDO(exi) || !is_exported_sec(cs->nfsflavor, exi) ||
1030             cs->access & CS_ACCESS_LIMITED)) {
1031                 if (! nfs_visible(exi, vp, &different_export)) {
1032                         VN_RELE(vp);
1033                         return (puterrno4(ENOENT));
1034                 }
1035         }
1036 
1037         /*
1038          * If it's a mountpoint, then traverse it.
1039          */
1040         if (vn_ismntpt(vp)) {
1041                 if ((error = traverse(&vp)) != 0) {
1042                         VN_RELE(vp);
1043                         return (puterrno4(error));
1044                 }
1045                 /* remember that we had to traverse mountpoint */
1046                 did_traverse = TRUE;
1047                 different_export = 1;
1048         } else if (vp->v_vfsp != dvp->v_vfsp) {
1049                 /*
1050                  * If vp isn't a mountpoint and the vfs ptrs aren't the same,
1051                  * then vp is probably an LOFS object.  We don't need the
1052                  * realvp, we just need to know that we might have crossed
1053                  * a server fs boundary and need to call checkexport4.
1054                  * (LOFS lookup hides server fs mountpoints, and actually calls
1055                  * traverse)
1056                  */
1057                 different_export = 1;
1058         }
1059 
1060         /*
1061          * Get the export information for it.
1062          */
1063         if (different_export) {
1064 
1065                 bzero(&fid, sizeof (fid));
1066                 fid.fid_len = MAXFIDSZ;
1067                 error = vop_fid_pseudo(vp, &fid);
1068                 if (error) {
1069                         VN_RELE(vp);
1070                         return (puterrno4(error));
1071                 }
1072 
1073                 /* We'll need to reassign "exi". */
1074                 if (dotdot)
1075                         exi = nfs_vptoexi(NULL, vp, cs->cr, &walk, NULL, TRUE);
1076                 else
1077                         exi = checkexport4(&vp->v_vfsp->vfs_fsid, &fid, vp);
1078 
1079                 if (exi == NULL) {
1080                         if (did_traverse == TRUE) {
1081                                 /*
1082                                  * If this vnode is a mounted-on vnode,
1083                                  * but the mounted-on file system is not
1084                                  * exported, send back the secinfo for
1085                                  * the exported node that the mounted-on
1086                                  * vnode lives in.
1087                                  */
1088                                 exi = cs->exi;
1089                         } else {
1090                                 VN_RELE(vp);
1091                                 return (puterrno4(EACCES));
1092                         }
1093                 }
1094         }
1095         ASSERT(exi != NULL);
1096 
1097 
1098         /*
1099          * Create the secinfo result based on the security information
1100          * from the exportinfo structure (exi).
1101          *
1102          * Return all flavors for a pseudo node.
1103          * For a real export node, return the flavor that the client
1104          * has access with.
1105          */
1106         ASSERT(RW_LOCK_HELD(&ne->exported_lock));
1107         if (PSEUDO(exi)) {
1108                 count = exi->exi_export.ex_seccnt; /* total sec count */
1109                 resok_val = kmem_alloc(count * sizeof (secinfo4), KM_SLEEP);
1110                 secp = exi->exi_export.ex_secinfo;
1111 
1112                 for (i = 0; i < count; i++) {
1113                         si = &secp[i].s_secinfo;
1114                         resok_val[i].flavor = si->sc_rpcnum;
1115                         if (resok_val[i].flavor == RPCSEC_GSS) {
1116                                 rpcsec_gss_info *info;
1117 
1118                                 info = &resok_val[i].flavor_info;
1119                                 info->qop = si->sc_qop;
1120                                 info->service = (rpc_gss_svc_t)si->sc_service;
1121 
1122                                 /* get oid opaque data */
1123                                 info->oid.sec_oid4_len =
1124                                     si->sc_gss_mech_type->length;
1125                                 info->oid.sec_oid4_val = kmem_alloc(
1126                                     si->sc_gss_mech_type->length, KM_SLEEP);
1127                                 bcopy(
1128                                     si->sc_gss_mech_type->elements,
1129                                     info->oid.sec_oid4_val,
1130                                     info->oid.sec_oid4_len);
1131                         }
1132                 }
1133                 resp->SECINFO4resok_len = count;
1134                 resp->SECINFO4resok_val = resok_val;
1135         } else {
1136                 int ret_cnt = 0, k = 0;
1137                 int *flavor_list;
1138 
1139                 count = exi->exi_export.ex_seccnt; /* total sec count */
1140                 secp = exi->exi_export.ex_secinfo;
1141 
1142                 flavor_list = kmem_alloc(count * sizeof (int), KM_SLEEP);
1143                 /* find out which flavors to return */
1144                 for (i = 0; i < count; i ++) {
1145                         int access, flavor, perm;
1146 
1147                         flavor = secp[i].s_secinfo.sc_nfsnum;
1148                         perm = secp[i].s_flags;
1149 
1150                         access = nfsauth4_secinfo_access(exi, cs->req,
1151                             flavor, perm, cs->basecr);
1152 
1153                         if (! (access & NFSAUTH_DENIED) &&
1154                             ! (access & NFSAUTH_WRONGSEC)) {
1155                                 flavor_list[ret_cnt] = flavor;
1156                                 ret_cnt++;
1157                         }
1158                 }
1159 
1160                 /* Create the returning SECINFO value */
1161                 resok_val = kmem_alloc(ret_cnt * sizeof (secinfo4), KM_SLEEP);
1162 
1163                 for (i = 0; i < count; i++) {
1164                         /*
1165                          * If the flavor is in the flavor list,
1166                          * fill in resok_val.
1167                          */
1168                         si = &secp[i].s_secinfo;
1169                         if (in_flavor_list(si->sc_nfsnum,
1170                             flavor_list, ret_cnt)) {
1171                                 resok_val[k].flavor = si->sc_rpcnum;
1172                                 if (resok_val[k].flavor == RPCSEC_GSS) {
1173                                         rpcsec_gss_info *info;
1174 
1175                                         info = &resok_val[k].flavor_info;
1176                                         info->qop = si->sc_qop;
1177                                         info->service = (rpc_gss_svc_t)
1178                                             si->sc_service;
1179 
1180                                         /* get oid opaque data */
1181                                         info->oid.sec_oid4_len =
1182                                             si->sc_gss_mech_type->length;
1183                                         info->oid.sec_oid4_val = kmem_alloc(
1184                                             si->sc_gss_mech_type->length,
1185                                             KM_SLEEP);
1186                                         bcopy(si->sc_gss_mech_type->elements,
1187                                             info->oid.sec_oid4_val,
1188                                             info->oid.sec_oid4_len);
1189                                 }
1190                                 k++;
1191                         }
1192                         if (k >= ret_cnt)
1193                                 break;
1194                 }
1195                 resp->SECINFO4resok_len = ret_cnt;
1196                 resp->SECINFO4resok_val = resok_val;
1197                 kmem_free(flavor_list, count * sizeof (int));
1198         }
1199 
1200         VN_RELE(vp);
1201         return (NFS4_OK);
1202 }
1203 
1204 /*
1205  * SECINFO (Operation 33): Obtain required security information on
1206  * the component name in the format of (security-mechanism-oid, qop, service)
1207  * triplets.
1208  */
1209 /* ARGSUSED */
1210 static void
1211 rfs4_op_secinfo(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1212     struct compound_state *cs)
1213 {
1214         SECINFO4args *args = &argop->nfs_argop4_u.opsecinfo;
1215         SECINFO4res *resp = &resop->nfs_resop4_u.opsecinfo;
1216         utf8string *utfnm = &args->name;
1217         uint_t len;
1218         char *nm;
1219         struct sockaddr *ca;
1220         char *name = NULL;
1221         nfsstat4 status = NFS4_OK;
1222 
1223         DTRACE_NFSV4_2(op__secinfo__start, struct compound_state *, cs,
1224             SECINFO4args *, args);
1225 
1226         /*
1227          * Current file handle (cfh) should have been set before getting
1228          * into this function. If not, return error.
1229          */
1230         if (cs->vp == NULL) {
1231                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1232                 goto out;
1233         }
1234 
1235         if (cs->vp->v_type != VDIR) {
1236                 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
1237                 goto out;
1238         }
1239 
1240         /*
1241          * Verify the component name. If failed, error out, but
1242          * do not error out if the component name is a "..".
1243          * SECINFO will return its parents secinfo data for SECINFO "..".
1244          */
1245         status = utf8_dir_verify(utfnm);
1246         if (status != NFS4_OK) {
1247                 if (utfnm->utf8string_len != 2 ||
1248                     utfnm->utf8string_val[0] != '.' ||
1249                     utfnm->utf8string_val[1] != '.') {
1250                         *cs->statusp = resp->status = status;
1251                         goto out;
1252                 }
1253         }
1254 
1255         nm = utf8_to_str(utfnm, &len, NULL);
1256         if (nm == NULL) {
1257                 *cs->statusp = resp->status = NFS4ERR_INVAL;
1258                 goto out;
1259         }
1260 
1261         if (len > MAXNAMELEN) {
1262                 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
1263                 kmem_free(nm, len);
1264                 goto out;
1265         }
1266 
1267         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
1268         name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
1269             MAXPATHLEN  + 1);
1270 
1271         if (name == NULL) {
1272                 *cs->statusp = resp->status = NFS4ERR_INVAL;
1273                 kmem_free(nm, len);
1274                 goto out;
1275         }
1276 
1277 
1278         *cs->statusp = resp->status = do_rfs4_op_secinfo(cs, name, resp);
1279 
1280         if (name != nm)
1281                 kmem_free(name, MAXPATHLEN + 1);
1282         kmem_free(nm, len);
1283 
1284 out:
1285         DTRACE_NFSV4_2(op__secinfo__done, struct compound_state *, cs,
1286             SECINFO4res *, resp);
1287 }
1288 
1289 /*
1290  * Free SECINFO result.
1291  */
1292 /* ARGSUSED */
1293 static void
1294 rfs4_op_secinfo_free(nfs_resop4 *resop)
1295 {
1296         SECINFO4res *resp = &resop->nfs_resop4_u.opsecinfo;
1297         int count, i;
1298         secinfo4 *resok_val;
1299 
1300         /* If this is not an Ok result, nothing to free. */
1301         if (resp->status != NFS4_OK) {
1302                 return;
1303         }
1304 
1305         count = resp->SECINFO4resok_len;
1306         resok_val = resp->SECINFO4resok_val;
1307 
1308         for (i = 0; i < count; i++) {
1309                 if (resok_val[i].flavor == RPCSEC_GSS) {
1310                         rpcsec_gss_info *info;
1311 
1312                         info = &resok_val[i].flavor_info;
1313                         kmem_free(info->oid.sec_oid4_val,
1314                             info->oid.sec_oid4_len);
1315                 }
1316         }
1317         kmem_free(resok_val, count * sizeof (secinfo4));
1318         resp->SECINFO4resok_len = 0;
1319         resp->SECINFO4resok_val = NULL;
1320 }
1321 
1322 /* ARGSUSED */
1323 static void
1324 rfs4_op_access(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1325     struct compound_state *cs)
1326 {
1327         ACCESS4args *args = &argop->nfs_argop4_u.opaccess;
1328         ACCESS4res *resp = &resop->nfs_resop4_u.opaccess;
1329         int error;
1330         vnode_t *vp;
1331         struct vattr va;
1332         int checkwriteperm;
1333         cred_t *cr = cs->cr;
1334         bslabel_t *clabel, *slabel;
1335         ts_label_t *tslabel;
1336         boolean_t admin_low_client;
1337 
1338         DTRACE_NFSV4_2(op__access__start, struct compound_state *, cs,
1339             ACCESS4args *, args);
1340 
1341 #if 0   /* XXX allow access even if !cs->access. Eventually only pseudo fs */
1342         if (cs->access == CS_ACCESS_DENIED) {
1343                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
1344                 goto out;
1345         }
1346 #endif
1347         if (cs->vp == NULL) {
1348                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1349                 goto out;
1350         }
1351 
1352         ASSERT(cr != NULL);
1353 
1354         vp = cs->vp;
1355 
1356         /*
1357          * If the file system is exported read only, it is not appropriate
1358          * to check write permissions for regular files and directories.
1359          * Special files are interpreted by the client, so the underlying
1360          * permissions are sent back to the client for interpretation.
1361          */
1362         if (rdonly4(req, cs) &&
1363             (vp->v_type == VREG || vp->v_type == VDIR))
1364                 checkwriteperm = 0;
1365         else
1366                 checkwriteperm = 1;
1367 
1368         /*
1369          * XXX
1370          * We need the mode so that we can correctly determine access
1371          * permissions relative to a mandatory lock file.  Access to
1372          * mandatory lock files is denied on the server, so it might
1373          * as well be reflected to the server during the open.
1374          */
1375         va.va_mask = AT_MODE;
1376         error = VOP_GETATTR(vp, &va, 0, cr, NULL);
1377         if (error) {
1378                 *cs->statusp = resp->status = puterrno4(error);
1379                 goto out;
1380         }
1381         resp->access = 0;
1382         resp->supported = 0;
1383 
1384         if (is_system_labeled()) {
1385                 ASSERT(req->rq_label != NULL);
1386                 clabel = req->rq_label;
1387                 DTRACE_PROBE2(tx__rfs4__log__info__opaccess__clabel, char *,
1388                     "got client label from request(1)",
1389                     struct svc_req *, req);
1390                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
1391                         if ((tslabel = nfs_getflabel(vp, cs->exi)) == NULL) {
1392                                 *cs->statusp = resp->status = puterrno4(EACCES);
1393                                 goto out;
1394                         }
1395                         slabel = label2bslabel(tslabel);
1396                         DTRACE_PROBE3(tx__rfs4__log__info__opaccess__slabel,
1397                             char *, "got server label(1) for vp(2)",
1398                             bslabel_t *, slabel, vnode_t *, vp);
1399 
1400                         admin_low_client = B_FALSE;
1401                 } else
1402                         admin_low_client = B_TRUE;
1403         }
1404 
1405         if (args->access & ACCESS4_READ) {
1406                 error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
1407                 if (!error && !MANDLOCK(vp, va.va_mode) &&
1408                     (!is_system_labeled() || admin_low_client ||
1409                     bldominates(clabel, slabel)))
1410                         resp->access |= ACCESS4_READ;
1411                 resp->supported |= ACCESS4_READ;
1412         }
1413         if ((args->access & ACCESS4_LOOKUP) && vp->v_type == VDIR) {
1414                 error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL);
1415                 if (!error && (!is_system_labeled() || admin_low_client ||
1416                     bldominates(clabel, slabel)))
1417                         resp->access |= ACCESS4_LOOKUP;
1418                 resp->supported |= ACCESS4_LOOKUP;
1419         }
1420         if (checkwriteperm &&
1421             (args->access & (ACCESS4_MODIFY|ACCESS4_EXTEND))) {
1422                 error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
1423                 if (!error && !MANDLOCK(vp, va.va_mode) &&
1424                     (!is_system_labeled() || admin_low_client ||
1425                     blequal(clabel, slabel)))
1426                         resp->access |=
1427                             (args->access & (ACCESS4_MODIFY | ACCESS4_EXTEND));
1428                 resp->supported |=
1429                     resp->access & (ACCESS4_MODIFY | ACCESS4_EXTEND);
1430         }
1431 
1432         if (checkwriteperm &&
1433             (args->access & ACCESS4_DELETE) && vp->v_type == VDIR) {
1434                 error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
1435                 if (!error && (!is_system_labeled() || admin_low_client ||
1436                     blequal(clabel, slabel)))
1437                         resp->access |= ACCESS4_DELETE;
1438                 resp->supported |= ACCESS4_DELETE;
1439         }
1440         if (args->access & ACCESS4_EXECUTE && vp->v_type != VDIR) {
1441                 error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL);
1442                 if (!error && !MANDLOCK(vp, va.va_mode) &&
1443                     (!is_system_labeled() || admin_low_client ||
1444                     bldominates(clabel, slabel)))
1445                         resp->access |= ACCESS4_EXECUTE;
1446                 resp->supported |= ACCESS4_EXECUTE;
1447         }
1448 
1449         if (is_system_labeled() && !admin_low_client)
1450                 label_rele(tslabel);
1451 
1452         *cs->statusp = resp->status = NFS4_OK;
1453 out:
1454         DTRACE_NFSV4_2(op__access__done, struct compound_state *, cs,
1455             ACCESS4res *, resp);
1456 }
1457 
1458 /* ARGSUSED */
1459 static void
1460 rfs4_op_commit(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1461     struct compound_state *cs)
1462 {
1463         COMMIT4args *args = &argop->nfs_argop4_u.opcommit;
1464         COMMIT4res *resp = &resop->nfs_resop4_u.opcommit;
1465         int error;
1466         vnode_t *vp = cs->vp;
1467         cred_t *cr = cs->cr;
1468         vattr_t va;
1469         nfs4_srv_t *nsrv4;
1470 
1471         DTRACE_NFSV4_2(op__commit__start, struct compound_state *, cs,
1472             COMMIT4args *, args);
1473 
1474         if (vp == NULL) {
1475                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1476                 goto out;
1477         }
1478         if (cs->access == CS_ACCESS_DENIED) {
1479                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
1480                 goto out;
1481         }
1482 
1483         if (args->offset + args->count < args->offset) {
1484                 *cs->statusp = resp->status = NFS4ERR_INVAL;
1485                 goto out;
1486         }
1487 
1488         va.va_mask = AT_UID;
1489         error = VOP_GETATTR(vp, &va, 0, cr, NULL);
1490 
1491         /*
1492          * If we can't get the attributes, then we can't do the
1493          * right access checking.  So, we'll fail the request.
1494          */
1495         if (error) {
1496                 *cs->statusp = resp->status = puterrno4(error);
1497                 goto out;
1498         }
1499         if (rdonly4(req, cs)) {
1500                 *cs->statusp = resp->status = NFS4ERR_ROFS;
1501                 goto out;
1502         }
1503 
1504         if (vp->v_type != VREG) {
1505                 if (vp->v_type == VDIR)
1506                         resp->status = NFS4ERR_ISDIR;
1507                 else
1508                         resp->status = NFS4ERR_INVAL;
1509                 *cs->statusp = resp->status;
1510                 goto out;
1511         }
1512 
1513         if (crgetuid(cr) != va.va_uid &&
1514             (error = VOP_ACCESS(vp, VWRITE, 0, cs->cr, NULL))) {
1515                 *cs->statusp = resp->status = puterrno4(error);
1516                 goto out;
1517         }
1518 
1519         error = VOP_FSYNC(vp, FSYNC, cr, NULL);
1520 
1521         if (error) {
1522                 *cs->statusp = resp->status = puterrno4(error);
1523                 goto out;
1524         }
1525 
1526         nsrv4 = nfs4_get_srv();
1527         *cs->statusp = resp->status = NFS4_OK;
1528         resp->writeverf = nsrv4->write4verf;
1529 out:
1530         DTRACE_NFSV4_2(op__commit__done, struct compound_state *, cs,
1531             COMMIT4res *, resp);
1532 }
1533 
1534 /*
1535  * rfs4_op_mknod is called from rfs4_op_create after all initial verification
1536  * was completed. It does the nfsv4 create for special files.
1537  */
1538 /* ARGSUSED */
1539 static vnode_t *
1540 do_rfs4_op_mknod(CREATE4args *args, CREATE4res *resp, struct svc_req *req,
1541     struct compound_state *cs, vattr_t *vap, char *nm)
1542 {
1543         int error;
1544         cred_t *cr = cs->cr;
1545         vnode_t *dvp = cs->vp;
1546         vnode_t *vp = NULL;
1547         int mode;
1548         enum vcexcl excl;
1549 
1550         switch (args->type) {
1551         case NF4CHR:
1552         case NF4BLK:
1553                 if (secpolicy_sys_devices(cr) != 0) {
1554                         *cs->statusp = resp->status = NFS4ERR_PERM;
1555                         return (NULL);
1556                 }
1557                 if (args->type == NF4CHR)
1558                         vap->va_type = VCHR;
1559                 else
1560                         vap->va_type = VBLK;
1561                 vap->va_rdev = makedevice(args->ftype4_u.devdata.specdata1,
1562                     args->ftype4_u.devdata.specdata2);
1563                 vap->va_mask |= AT_RDEV;
1564                 break;
1565         case NF4SOCK:
1566                 vap->va_type = VSOCK;
1567                 break;
1568         case NF4FIFO:
1569                 vap->va_type = VFIFO;
1570                 break;
1571         default:
1572                 *cs->statusp = resp->status = NFS4ERR_BADTYPE;
1573                 return (NULL);
1574         }
1575 
1576         /*
1577          * Must specify the mode.
1578          */
1579         if (!(vap->va_mask & AT_MODE)) {
1580                 *cs->statusp = resp->status = NFS4ERR_INVAL;
1581                 return (NULL);
1582         }
1583 
1584         excl = EXCL;
1585 
1586         mode = 0;
1587 
1588         error = VOP_CREATE(dvp, nm, vap, excl, mode, &vp, cr, 0, NULL, NULL);
1589         if (error) {
1590                 *cs->statusp = resp->status = puterrno4(error);
1591                 return (NULL);
1592         }
1593         return (vp);
1594 }
1595 
1596 /*
1597  * nfsv4 create is used to create non-regular files. For regular files,
1598  * use nfsv4 open.
1599  */
1600 /* ARGSUSED */
1601 static void
1602 rfs4_op_create(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1603     struct compound_state *cs)
1604 {
1605         CREATE4args *args = &argop->nfs_argop4_u.opcreate;
1606         CREATE4res *resp = &resop->nfs_resop4_u.opcreate;
1607         int error;
1608         struct vattr bva, iva, iva2, ava, *vap;
1609         cred_t *cr = cs->cr;
1610         vnode_t *dvp = cs->vp;
1611         vnode_t *vp = NULL;
1612         vnode_t *realvp;
1613         char *nm, *lnm;
1614         uint_t len, llen;
1615         int syncval = 0;
1616         struct nfs4_svgetit_arg sarg;
1617         struct nfs4_ntov_table ntov;
1618         struct statvfs64 sb;
1619         nfsstat4 status;
1620         struct sockaddr *ca;
1621         char *name = NULL;
1622         char *lname = NULL;
1623 
1624         DTRACE_NFSV4_2(op__create__start, struct compound_state *, cs,
1625             CREATE4args *, args);
1626 
1627         resp->attrset = 0;
1628 
1629         if (dvp == NULL) {
1630                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1631                 goto out;
1632         }
1633 
1634         /*
1635          * If there is an unshared filesystem mounted on this vnode,
1636          * do not allow to create an object in this directory.
1637          */
1638         if (vn_ismntpt(dvp)) {
1639                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
1640                 goto out;
1641         }
1642 
1643         /* Verify that type is correct */
1644         switch (args->type) {
1645         case NF4LNK:
1646         case NF4BLK:
1647         case NF4CHR:
1648         case NF4SOCK:
1649         case NF4FIFO:
1650         case NF4DIR:
1651                 break;
1652         default:
1653                 *cs->statusp = resp->status = NFS4ERR_BADTYPE;
1654                 goto out;
1655         };
1656 
1657         if (cs->access == CS_ACCESS_DENIED) {
1658                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
1659                 goto out;
1660         }
1661         if (dvp->v_type != VDIR) {
1662                 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
1663                 goto out;
1664         }
1665         status = utf8_dir_verify(&args->objname);
1666         if (status != NFS4_OK) {
1667                 *cs->statusp = resp->status = status;
1668                 goto out;
1669         }
1670 
1671         if (rdonly4(req, cs)) {
1672                 *cs->statusp = resp->status = NFS4ERR_ROFS;
1673                 goto out;
1674         }
1675 
1676         /*
1677          * Name of newly created object
1678          */
1679         nm = utf8_to_fn(&args->objname, &len, NULL);
1680         if (nm == NULL) {
1681                 *cs->statusp = resp->status = NFS4ERR_INVAL;
1682                 goto out;
1683         }
1684 
1685         if (len > MAXNAMELEN) {
1686                 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
1687                 kmem_free(nm, len);
1688                 goto out;
1689         }
1690 
1691         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
1692         name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
1693             MAXPATHLEN  + 1);
1694 
1695         if (name == NULL) {
1696                 *cs->statusp = resp->status = NFS4ERR_INVAL;
1697                 kmem_free(nm, len);
1698                 goto out;
1699         }
1700 
1701         resp->attrset = 0;
1702 
1703         sarg.sbp = &sb;
1704         sarg.is_referral = B_FALSE;
1705         nfs4_ntov_table_init(&ntov);
1706 
1707         status = do_rfs4_set_attrs(&resp->attrset,
1708             &args->createattrs, cs, &sarg, &ntov, NFS4ATTR_SETIT);
1709 
1710         if (sarg.vap->va_mask == 0 && status == NFS4_OK)
1711                 status = NFS4ERR_INVAL;
1712 
1713         if (status != NFS4_OK) {
1714                 *cs->statusp = resp->status = status;
1715                 if (name != nm)
1716                         kmem_free(name, MAXPATHLEN + 1);
1717                 kmem_free(nm, len);
1718                 nfs4_ntov_table_free(&ntov, &sarg);
1719                 resp->attrset = 0;
1720                 goto out;
1721         }
1722 
1723         /* Get "before" change value */
1724         bva.va_mask = AT_CTIME|AT_SEQ|AT_MODE;
1725         error = VOP_GETATTR(dvp, &bva, 0, cr, NULL);
1726         if (error) {
1727                 *cs->statusp = resp->status = puterrno4(error);
1728                 if (name != nm)
1729                         kmem_free(name, MAXPATHLEN + 1);
1730                 kmem_free(nm, len);
1731                 nfs4_ntov_table_free(&ntov, &sarg);
1732                 resp->attrset = 0;
1733                 goto out;
1734         }
1735         NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bva.va_ctime)
1736 
1737         vap = sarg.vap;
1738 
1739         /*
1740          * Set the default initial values for attributes when the parent
1741          * directory does not have the VSUID/VSGID bit set and they have
1742          * not been specified in createattrs.
1743          */
1744         if (!(bva.va_mode & VSUID) && (vap->va_mask & AT_UID) == 0) {
1745                 vap->va_uid = crgetuid(cr);
1746                 vap->va_mask |= AT_UID;
1747         }
1748         if (!(bva.va_mode & VSGID) && (vap->va_mask & AT_GID) == 0) {
1749                 vap->va_gid = crgetgid(cr);
1750                 vap->va_mask |= AT_GID;
1751         }
1752 
1753         vap->va_mask |= AT_TYPE;
1754         switch (args->type) {
1755         case NF4DIR:
1756                 vap->va_type = VDIR;
1757                 if ((vap->va_mask & AT_MODE) == 0) {
1758                         vap->va_mode = 0700; /* default: owner rwx only */
1759                         vap->va_mask |= AT_MODE;
1760                 }
1761                 error = VOP_MKDIR(dvp, name, vap, &vp, cr, NULL, 0, NULL);
1762                 if (error)
1763                         break;
1764 
1765                 /*
1766                  * Get the initial "after" sequence number, if it fails,
1767                  * set to zero
1768                  */
1769                 iva.va_mask = AT_SEQ;
1770                 if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL))
1771                         iva.va_seq = 0;
1772                 break;
1773         case NF4LNK:
1774                 vap->va_type = VLNK;
1775                 if ((vap->va_mask & AT_MODE) == 0) {
1776                         vap->va_mode = 0700; /* default: owner rwx only */
1777                         vap->va_mask |= AT_MODE;
1778                 }
1779 
1780                 /*
1781                  * symlink names must be treated as data
1782                  */
1783                 lnm = utf8_to_str((utf8string *)&args->ftype4_u.linkdata,
1784                     &llen, NULL);
1785 
1786                 if (lnm == NULL) {
1787                         *cs->statusp = resp->status = NFS4ERR_INVAL;
1788                         if (name != nm)
1789                                 kmem_free(name, MAXPATHLEN + 1);
1790                         kmem_free(nm, len);
1791                         nfs4_ntov_table_free(&ntov, &sarg);
1792                         resp->attrset = 0;
1793                         goto out;
1794                 }
1795 
1796                 if (llen > MAXPATHLEN) {
1797                         *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
1798                         if (name != nm)
1799                                 kmem_free(name, MAXPATHLEN + 1);
1800                         kmem_free(nm, len);
1801                         kmem_free(lnm, llen);
1802                         nfs4_ntov_table_free(&ntov, &sarg);
1803                         resp->attrset = 0;
1804                         goto out;
1805                 }
1806 
1807                 lname = nfscmd_convname(ca, cs->exi, lnm,
1808                     NFSCMD_CONV_INBOUND, MAXPATHLEN  + 1);
1809 
1810                 if (lname == NULL) {
1811                         *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
1812                         if (name != nm)
1813                                 kmem_free(name, MAXPATHLEN + 1);
1814                         kmem_free(nm, len);
1815                         kmem_free(lnm, llen);
1816                         nfs4_ntov_table_free(&ntov, &sarg);
1817                         resp->attrset = 0;
1818                         goto out;
1819                 }
1820 
1821                 error = VOP_SYMLINK(dvp, name, vap, lname, cr, NULL, 0);
1822                 if (lname != lnm)
1823                         kmem_free(lname, MAXPATHLEN + 1);
1824                 kmem_free(lnm, llen);
1825                 if (error)
1826                         break;
1827 
1828                 /*
1829                  * Get the initial "after" sequence number, if it fails,
1830                  * set to zero
1831                  */
1832                 iva.va_mask = AT_SEQ;
1833                 if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL))
1834                         iva.va_seq = 0;
1835 
1836                 error = VOP_LOOKUP(dvp, name, &vp, NULL, 0, NULL, cr,
1837                     NULL, NULL, NULL);
1838                 if (error)
1839                         break;
1840 
1841                 /*
1842                  * va_seq is not safe over VOP calls, check it again
1843                  * if it has changed zero out iva to force atomic = FALSE.
1844                  */
1845                 iva2.va_mask = AT_SEQ;
1846                 if (VOP_GETATTR(dvp, &iva2, 0, cs->cr, NULL) ||
1847                     iva2.va_seq != iva.va_seq)
1848                         iva.va_seq = 0;
1849                 break;
1850         default:
1851                 /*
1852                  * probably a special file.
1853                  */
1854                 if ((vap->va_mask & AT_MODE) == 0) {
1855                         vap->va_mode = 0600; /* default: owner rw only */
1856                         vap->va_mask |= AT_MODE;
1857                 }
1858                 syncval = FNODSYNC;
1859                 /*
1860                  * We know this will only generate one VOP call
1861                  */
1862                 vp = do_rfs4_op_mknod(args, resp, req, cs, vap, name);
1863 
1864                 if (vp == NULL) {
1865                         if (name != nm)
1866                                 kmem_free(name, MAXPATHLEN + 1);
1867                         kmem_free(nm, len);
1868                         nfs4_ntov_table_free(&ntov, &sarg);
1869                         resp->attrset = 0;
1870                         goto out;
1871                 }
1872 
1873                 /*
1874                  * Get the initial "after" sequence number, if it fails,
1875                  * set to zero
1876                  */
1877                 iva.va_mask = AT_SEQ;
1878                 if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL))
1879                         iva.va_seq = 0;
1880 
1881                 break;
1882         }
1883         if (name != nm)
1884                 kmem_free(name, MAXPATHLEN + 1);
1885         kmem_free(nm, len);
1886 
1887         if (error) {
1888                 *cs->statusp = resp->status = puterrno4(error);
1889         }
1890 
1891         /*
1892          * Force modified data and metadata out to stable storage.
1893          */
1894         (void) VOP_FSYNC(dvp, 0, cr, NULL);
1895 
1896         if (resp->status != NFS4_OK) {
1897                 if (vp != NULL)
1898                         VN_RELE(vp);
1899                 nfs4_ntov_table_free(&ntov, &sarg);
1900                 resp->attrset = 0;
1901                 goto out;
1902         }
1903 
1904         /*
1905          * Finish setup of cinfo response, "before" value already set.
1906          * Get "after" change value, if it fails, simply return the
1907          * before value.
1908          */
1909         ava.va_mask = AT_CTIME|AT_SEQ;
1910         if (VOP_GETATTR(dvp, &ava, 0, cr, NULL)) {
1911                 ava.va_ctime = bva.va_ctime;
1912                 ava.va_seq = 0;
1913         }
1914         NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, ava.va_ctime);
1915 
1916         /*
1917          * True verification that object was created with correct
1918          * attrs is impossible.  The attrs could have been changed
1919          * immediately after object creation.  If attributes did
1920          * not verify, the only recourse for the server is to
1921          * destroy the object.  Maybe if some attrs (like gid)
1922          * are set incorrectly, the object should be destroyed;
1923          * however, seems bad as a default policy.  Do we really
1924          * want to destroy an object over one of the times not
1925          * verifying correctly?  For these reasons, the server
1926          * currently sets bits in attrset for createattrs
1927          * that were set; however, no verification is done.
1928          *
1929          * vmask_to_nmask accounts for vattr bits set on create
1930          *      [do_rfs4_set_attrs() only sets resp bits for
1931          *       non-vattr/vfs bits.]
1932          * Mask off any bits set by default so as not to return
1933          * more attrset bits than were requested in createattrs
1934          */
1935         nfs4_vmask_to_nmask(sarg.vap->va_mask, &resp->attrset);
1936         resp->attrset &= args->createattrs.attrmask;
1937         nfs4_ntov_table_free(&ntov, &sarg);
1938 
1939         error = makefh4(&cs->fh, vp, cs->exi);
1940         if (error) {
1941                 *cs->statusp = resp->status = puterrno4(error);
1942         }
1943 
1944         /*
1945          * The cinfo.atomic = TRUE only if we got no errors, we have
1946          * non-zero va_seq's, and it has incremented by exactly one
1947          * during the creation and it didn't change during the VOP_LOOKUP
1948          * or VOP_FSYNC.
1949          */
1950         if (!error && bva.va_seq && iva.va_seq && ava.va_seq &&
1951             iva.va_seq == (bva.va_seq + 1) && iva.va_seq == ava.va_seq)
1952                 resp->cinfo.atomic = TRUE;
1953         else
1954                 resp->cinfo.atomic = FALSE;
1955 
1956         /*
1957          * Force modified metadata out to stable storage.
1958          *
1959          * if a underlying vp exists, pass it to VOP_FSYNC
1960          */
1961         if (VOP_REALVP(vp, &realvp, NULL) == 0)
1962                 (void) VOP_FSYNC(realvp, syncval, cr, NULL);
1963         else
1964                 (void) VOP_FSYNC(vp, syncval, cr, NULL);
1965 
1966         if (resp->status != NFS4_OK) {
1967                 VN_RELE(vp);
1968                 goto out;
1969         }
1970         if (cs->vp)
1971                 VN_RELE(cs->vp);
1972 
1973         cs->vp = vp;
1974         *cs->statusp = resp->status = NFS4_OK;
1975 out:
1976         DTRACE_NFSV4_2(op__create__done, struct compound_state *, cs,
1977             CREATE4res *, resp);
1978 }
1979 
1980 /*ARGSUSED*/
1981 static void
1982 rfs4_op_delegpurge(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1983     struct compound_state *cs)
1984 {
1985         DTRACE_NFSV4_2(op__delegpurge__start, struct compound_state *, cs,
1986             DELEGPURGE4args *, &argop->nfs_argop4_u.opdelegpurge);
1987 
1988         rfs4_op_inval(argop, resop, req, cs);
1989 
1990         DTRACE_NFSV4_2(op__delegpurge__done, struct compound_state *, cs,
1991             DELEGPURGE4res *, &resop->nfs_resop4_u.opdelegpurge);
1992 }
1993 
1994 /*ARGSUSED*/
1995 static void
1996 rfs4_op_delegreturn(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1997     struct compound_state *cs)
1998 {
1999         DELEGRETURN4args *args = &argop->nfs_argop4_u.opdelegreturn;
2000         DELEGRETURN4res *resp = &resop->nfs_resop4_u.opdelegreturn;
2001         rfs4_deleg_state_t *dsp;
2002         nfsstat4 status;
2003 
2004         DTRACE_NFSV4_2(op__delegreturn__start, struct compound_state *, cs,
2005             DELEGRETURN4args *, args);
2006 
2007         status = rfs4_get_deleg_state(&args->deleg_stateid, &dsp);
2008         resp->status = *cs->statusp = status;
2009         if (status != NFS4_OK)
2010                 goto out;
2011 
2012         /* Ensure specified filehandle matches */
2013         if (cs->vp != dsp->rds_finfo->rf_vp) {
2014                 resp->status = *cs->statusp = NFS4ERR_BAD_STATEID;
2015         } else
2016                 rfs4_return_deleg(dsp, FALSE);
2017 
2018         rfs4_update_lease(dsp->rds_client);
2019 
2020         rfs4_deleg_state_rele(dsp);
2021 out:
2022         DTRACE_NFSV4_2(op__delegreturn__done, struct compound_state *, cs,
2023             DELEGRETURN4res *, resp);
2024 }
2025 
2026 /*
2027  * Check to see if a given "flavor" is an explicitly shared flavor.
2028  * The assumption of this routine is the "flavor" is already a valid
2029  * flavor in the secinfo list of "exi".
2030  *
2031  *      e.g.
2032  *              # share -o sec=flavor1 /export
2033  *              # share -o sec=flavor2 /export/home
2034  *
2035  *              flavor2 is not an explicitly shared flavor for /export,
2036  *              however it is in the secinfo list for /export thru the
2037  *              server namespace setup.
2038  */
2039 int
2040 is_exported_sec(int flavor, struct exportinfo *exi)
2041 {
2042         int     i;
2043         struct secinfo *sp;
2044 
2045         sp = exi->exi_export.ex_secinfo;
2046         for (i = 0; i < exi->exi_export.ex_seccnt; i++) {
2047                 if (flavor == sp[i].s_secinfo.sc_nfsnum ||
2048                     sp[i].s_secinfo.sc_nfsnum == AUTH_NONE) {
2049                         return (SEC_REF_EXPORTED(&sp[i]));
2050                 }
2051         }
2052 
2053         /* Should not reach this point based on the assumption */
2054         return (0);
2055 }
2056 
2057 /*
2058  * Check if the security flavor used in the request matches what is
2059  * required at the export point or at the root pseudo node (exi_root).
2060  *
2061  * returns 1 if there's a match or if exported with AUTH_NONE; 0 otherwise.
2062  *
2063  */
2064 static int
2065 secinfo_match_or_authnone(struct compound_state *cs)
2066 {
2067         int     i;
2068         struct secinfo *sp;
2069 
2070         /*
2071          * Check cs->nfsflavor (from the request) against
2072          * the current export data in cs->exi.
2073          */
2074         sp = cs->exi->exi_export.ex_secinfo;
2075         for (i = 0; i < cs->exi->exi_export.ex_seccnt; i++) {
2076                 if (cs->nfsflavor == sp[i].s_secinfo.sc_nfsnum ||
2077                     sp[i].s_secinfo.sc_nfsnum == AUTH_NONE)
2078                         return (1);
2079         }
2080 
2081         return (0);
2082 }
2083 
2084 /*
2085  * Check the access authority for the client and return the correct error.
2086  */
2087 nfsstat4
2088 call_checkauth4(struct compound_state *cs, struct svc_req *req)
2089 {
2090         int     authres;
2091 
2092         /*
2093          * First, check if the security flavor used in the request
2094          * are among the flavors set in the server namespace.
2095          */
2096         if (!secinfo_match_or_authnone(cs)) {
2097                 *cs->statusp = NFS4ERR_WRONGSEC;
2098                 return (*cs->statusp);
2099         }
2100 
2101         authres = checkauth4(cs, req);
2102 
2103         if (authres > 0) {
2104                 *cs->statusp = NFS4_OK;
2105                 if (! (cs->access & CS_ACCESS_LIMITED))
2106                         cs->access = CS_ACCESS_OK;
2107         } else if (authres == 0) {
2108                 *cs->statusp = NFS4ERR_ACCESS;
2109         } else if (authres == -2) {
2110                 *cs->statusp = NFS4ERR_WRONGSEC;
2111         } else {
2112                 *cs->statusp = NFS4ERR_DELAY;
2113         }
2114         return (*cs->statusp);
2115 }
2116 
2117 /*
2118  * bitmap4_to_attrmask is called by getattr and readdir.
2119  * It sets up the vattr mask and determines whether vfsstat call is needed
2120  * based on the input bitmap.
2121  * Returns nfsv4 status.
2122  */
2123 static nfsstat4
2124 bitmap4_to_attrmask(bitmap4 breq, struct nfs4_svgetit_arg *sargp)
2125 {
2126         int i;
2127         uint_t  va_mask;
2128         struct statvfs64 *sbp = sargp->sbp;
2129 
2130         sargp->sbp = NULL;
2131         sargp->flag = 0;
2132         sargp->rdattr_error = NFS4_OK;
2133         sargp->mntdfid_set = FALSE;
2134         if (sargp->cs->vp)
2135                 sargp->xattr = get_fh4_flag(&sargp->cs->fh,
2136                     FH4_ATTRDIR | FH4_NAMEDATTR);
2137         else
2138                 sargp->xattr = 0;
2139 
2140         /*
2141          * Set rdattr_error_req to true if return error per
2142          * failed entry rather than fail the readdir.
2143          */
2144         if (breq & FATTR4_RDATTR_ERROR_MASK)
2145                 sargp->rdattr_error_req = 1;
2146         else
2147                 sargp->rdattr_error_req = 0;
2148 
2149         /*
2150          * generate the va_mask
2151          * Handle the easy cases first
2152          */
2153         switch (breq) {
2154         case NFS4_NTOV_ATTR_MASK:
2155                 sargp->vap->va_mask = NFS4_NTOV_ATTR_AT_MASK;
2156                 return (NFS4_OK);
2157 
2158         case NFS4_FS_ATTR_MASK:
2159                 sargp->vap->va_mask = NFS4_FS_ATTR_AT_MASK;
2160                 sargp->sbp = sbp;
2161                 return (NFS4_OK);
2162 
2163         case NFS4_NTOV_ATTR_CACHE_MASK:
2164                 sargp->vap->va_mask = NFS4_NTOV_ATTR_CACHE_AT_MASK;
2165                 return (NFS4_OK);
2166 
2167         case FATTR4_LEASE_TIME_MASK:
2168                 sargp->vap->va_mask = 0;
2169                 return (NFS4_OK);
2170 
2171         default:
2172                 va_mask = 0;
2173                 for (i = 0; i < nfs4_ntov_map_size; i++) {
2174                         if ((breq & nfs4_ntov_map[i].fbit) &&
2175                             nfs4_ntov_map[i].vbit)
2176                                 va_mask |= nfs4_ntov_map[i].vbit;
2177                 }
2178 
2179                 /*
2180                  * Check is vfsstat is needed
2181                  */
2182                 if (breq & NFS4_FS_ATTR_MASK)
2183                         sargp->sbp = sbp;
2184 
2185                 sargp->vap->va_mask = va_mask;
2186                 return (NFS4_OK);
2187         }
2188         /* NOTREACHED */
2189 }
2190 
2191 /*
2192  * bitmap4_get_sysattrs is called by getattr and readdir.
2193  * It calls both VOP_GETATTR and VFS_STATVFS calls to get the attrs.
2194  * Returns nfsv4 status.
2195  */
2196 static nfsstat4
2197 bitmap4_get_sysattrs(struct nfs4_svgetit_arg *sargp)
2198 {
2199         int error;
2200         struct compound_state *cs = sargp->cs;
2201         vnode_t *vp = cs->vp;
2202 
2203         if (sargp->sbp != NULL) {
2204                 if (error = VFS_STATVFS(vp->v_vfsp, sargp->sbp)) {
2205                         sargp->sbp = NULL;   /* to identify error */
2206                         return (puterrno4(error));
2207                 }
2208         }
2209 
2210         return (rfs4_vop_getattr(vp, sargp->vap, 0, cs->cr));
2211 }
2212 
2213 static void
2214 nfs4_ntov_table_init(struct nfs4_ntov_table *ntovp)
2215 {
2216         ntovp->na = kmem_zalloc(sizeof (union nfs4_attr_u) * nfs4_ntov_map_size,
2217             KM_SLEEP);
2218         ntovp->attrcnt = 0;
2219         ntovp->vfsstat = FALSE;
2220 }
2221 
2222 static void
2223 nfs4_ntov_table_free(struct nfs4_ntov_table *ntovp,
2224     struct nfs4_svgetit_arg *sargp)
2225 {
2226         int i;
2227         union nfs4_attr_u *na;
2228         uint8_t *amap;
2229 
2230         /*
2231          * XXX Should do the same checks for whether the bit is set
2232          */
2233         for (i = 0, na = ntovp->na, amap = ntovp->amap;
2234             i < ntovp->attrcnt; i++, na++, amap++) {
2235                 (void) (*nfs4_ntov_map[*amap].sv_getit)(
2236                     NFS4ATTR_FREEIT, sargp, na);
2237         }
2238         if ((sargp->op == NFS4ATTR_SETIT) || (sargp->op == NFS4ATTR_VERIT)) {
2239                 /*
2240                  * xdr_free for getattr will be done later
2241                  */
2242                 for (i = 0, na = ntovp->na, amap = ntovp->amap;
2243                     i < ntovp->attrcnt; i++, na++, amap++) {
2244                         xdr_free(nfs4_ntov_map[*amap].xfunc, (caddr_t)na);
2245                 }
2246         }
2247         kmem_free(ntovp->na, sizeof (union nfs4_attr_u) * nfs4_ntov_map_size);
2248 }
2249 
2250 /*
2251  * do_rfs4_op_getattr gets the system attrs and converts into fattr4.
2252  */
2253 static nfsstat4
2254 do_rfs4_op_getattr(bitmap4 breq, fattr4 *fattrp,
2255     struct nfs4_svgetit_arg *sargp)
2256 {
2257         int error = 0;
2258         int i, k;
2259         struct nfs4_ntov_table ntov;
2260         XDR xdr;
2261         ulong_t xdr_size;
2262         char *xdr_attrs;
2263         nfsstat4 status = NFS4_OK;
2264         nfsstat4 prev_rdattr_error = sargp->rdattr_error;
2265         union nfs4_attr_u *na;
2266         uint8_t *amap;
2267 
2268         sargp->op = NFS4ATTR_GETIT;
2269         sargp->flag = 0;
2270 
2271         fattrp->attrmask = 0;
2272         /* if no bits requested, then return empty fattr4 */
2273         if (breq == 0) {
2274                 fattrp->attrlist4_len = 0;
2275                 fattrp->attrlist4 = NULL;
2276                 return (NFS4_OK);
2277         }
2278 
2279         /*
2280          * return NFS4ERR_INVAL when client requests write-only attrs
2281          */
2282         if (breq & (FATTR4_TIME_ACCESS_SET_MASK | FATTR4_TIME_MODIFY_SET_MASK))
2283                 return (NFS4ERR_INVAL);
2284 
2285         nfs4_ntov_table_init(&ntov);
2286         na = ntov.na;
2287         amap = ntov.amap;
2288 
2289         /*
2290          * Now loop to get or verify the attrs
2291          */
2292         for (i = 0; i < nfs4_ntov_map_size; i++) {
2293                 if (breq & nfs4_ntov_map[i].fbit) {
2294                         if ((*nfs4_ntov_map[i].sv_getit)(
2295                             NFS4ATTR_SUPPORTED, sargp, NULL) == 0) {
2296 
2297                                 error = (*nfs4_ntov_map[i].sv_getit)(
2298                                     NFS4ATTR_GETIT, sargp, na);
2299 
2300                                 /*
2301                                  * Possible error values:
2302                                  * >0 if sv_getit failed to
2303                                  * get the attr; 0 if succeeded;
2304                                  * <0 if rdattr_error and the
2305                                  * attribute cannot be returned.
2306                                  */
2307                                 if (error && !(sargp->rdattr_error_req))
2308                                         goto done;
2309                                 /*
2310                                  * If error then just for entry
2311                                  */
2312                                 if (error == 0) {
2313                                         fattrp->attrmask |=
2314                                             nfs4_ntov_map[i].fbit;
2315                                         *amap++ =
2316                                             (uint8_t)nfs4_ntov_map[i].nval;
2317                                         na++;
2318                                         (ntov.attrcnt)++;
2319                                 } else if ((error > 0) &&
2320                                     (sargp->rdattr_error == NFS4_OK)) {
2321                                         sargp->rdattr_error = puterrno4(error);
2322                                 }
2323                                 error = 0;
2324                         }
2325                 }
2326         }
2327 
2328         /*
2329          * If rdattr_error was set after the return value for it was assigned,
2330          * update it.
2331          */
2332         if (prev_rdattr_error != sargp->rdattr_error) {
2333                 na = ntov.na;
2334                 amap = ntov.amap;
2335                 for (i = 0; i < ntov.attrcnt; i++, na++, amap++) {
2336                         k = *amap;
2337                         if (k < FATTR4_RDATTR_ERROR) {
2338                                 continue;
2339                         }
2340                         if ((k == FATTR4_RDATTR_ERROR) &&
2341                             ((*nfs4_ntov_map[k].sv_getit)(
2342                             NFS4ATTR_SUPPORTED, sargp, NULL) == 0)) {
2343 
2344                                 (void) (*nfs4_ntov_map[k].sv_getit)(
2345                                     NFS4ATTR_GETIT, sargp, na);
2346                         }
2347                         break;
2348                 }
2349         }
2350 
2351         xdr_size = 0;
2352         na = ntov.na;
2353         amap = ntov.amap;
2354         for (i = 0; i < ntov.attrcnt; i++, na++, amap++) {
2355                 xdr_size += xdr_sizeof(nfs4_ntov_map[*amap].xfunc, na);
2356         }
2357 
2358         fattrp->attrlist4_len = xdr_size;
2359         if (xdr_size) {
2360                 /* freed by rfs4_op_getattr_free() */
2361                 fattrp->attrlist4 = xdr_attrs = kmem_zalloc(xdr_size, KM_SLEEP);
2362 
2363                 xdrmem_create(&xdr, xdr_attrs, xdr_size, XDR_ENCODE);
2364 
2365                 na = ntov.na;
2366                 amap = ntov.amap;
2367                 for (i = 0; i < ntov.attrcnt; i++, na++, amap++) {
2368                         if (!(*nfs4_ntov_map[*amap].xfunc)(&xdr, na)) {
2369                                 DTRACE_PROBE1(nfss__e__getattr4_encfail,
2370                                     int, *amap);
2371                                 status = NFS4ERR_SERVERFAULT;
2372                                 break;
2373                         }
2374                 }
2375                 /* xdrmem_destroy(&xdrs); */        /* NO-OP */
2376         } else {
2377                 fattrp->attrlist4 = NULL;
2378         }
2379 done:
2380 
2381         nfs4_ntov_table_free(&ntov, sargp);
2382 
2383         if (error != 0)
2384                 status = puterrno4(error);
2385 
2386         return (status);
2387 }
2388 
2389 /* ARGSUSED */
2390 static void
2391 rfs4_op_getattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2392     struct compound_state *cs)
2393 {
2394         GETATTR4args *args = &argop->nfs_argop4_u.opgetattr;
2395         GETATTR4res *resp = &resop->nfs_resop4_u.opgetattr;
2396         struct nfs4_svgetit_arg sarg;
2397         struct statvfs64 sb;
2398         nfsstat4 status;
2399 
2400         DTRACE_NFSV4_2(op__getattr__start, struct compound_state *, cs,
2401             GETATTR4args *, args);
2402 
2403         if (cs->vp == NULL) {
2404                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2405                 goto out;
2406         }
2407 
2408         if (cs->access == CS_ACCESS_DENIED) {
2409                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
2410                 goto out;
2411         }
2412 
2413         sarg.sbp = &sb;
2414         sarg.cs = cs;
2415         sarg.is_referral = B_FALSE;
2416 
2417         status = bitmap4_to_attrmask(args->attr_request, &sarg);
2418         if (status == NFS4_OK) {
2419 
2420                 status = bitmap4_get_sysattrs(&sarg);
2421                 if (status == NFS4_OK) {
2422 
2423                         /* Is this a referral? */
2424                         if (vn_is_nfs_reparse(cs->vp, cs->cr)) {
2425                                 /* Older V4 Solaris client sees a link */
2426                                 if (client_is_downrev(req))
2427                                         sarg.vap->va_type = VLNK;
2428                                 else
2429                                         sarg.is_referral = B_TRUE;
2430                         }
2431 
2432                         status = do_rfs4_op_getattr(args->attr_request,
2433                             &resp->obj_attributes, &sarg);
2434                 }
2435         }
2436         *cs->statusp = resp->status = status;
2437 out:
2438         DTRACE_NFSV4_2(op__getattr__done, struct compound_state *, cs,
2439             GETATTR4res *, resp);
2440 }
2441 
2442 static void
2443 rfs4_op_getattr_free(nfs_resop4 *resop)
2444 {
2445         GETATTR4res *resp = &resop->nfs_resop4_u.opgetattr;
2446 
2447         nfs4_fattr4_free(&resp->obj_attributes);
2448 }
2449 
2450 /* ARGSUSED */
2451 static void
2452 rfs4_op_getfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2453     struct compound_state *cs)
2454 {
2455         GETFH4res *resp = &resop->nfs_resop4_u.opgetfh;
2456 
2457         DTRACE_NFSV4_1(op__getfh__start, struct compound_state *, cs);
2458 
2459         if (cs->vp == NULL) {
2460                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2461                 goto out;
2462         }
2463         if (cs->access == CS_ACCESS_DENIED) {
2464                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
2465                 goto out;
2466         }
2467 
2468         /* check for reparse point at the share point */
2469         if (cs->exi->exi_moved || vn_is_nfs_reparse(cs->exi->exi_vp, cs->cr)) {
2470                 /* it's all bad */
2471                 cs->exi->exi_moved = 1;
2472                 *cs->statusp = resp->status = NFS4ERR_MOVED;
2473                 DTRACE_PROBE2(nfs4serv__func__referral__shared__moved,
2474                     vnode_t *, cs->vp, char *, "rfs4_op_getfh");
2475                 return;
2476         }
2477 
2478         /* check for reparse point at vp */
2479         if (vn_is_nfs_reparse(cs->vp, cs->cr) && !client_is_downrev(req)) {
2480                 /* it's not all bad */
2481                 *cs->statusp = resp->status = NFS4ERR_MOVED;
2482                 DTRACE_PROBE2(nfs4serv__func__referral__moved,
2483                     vnode_t *, cs->vp, char *, "rfs4_op_getfh");
2484                 return;
2485         }
2486 
2487         resp->object.nfs_fh4_val =
2488             kmem_alloc(cs->fh.nfs_fh4_len, KM_SLEEP);
2489         nfs_fh4_copy(&cs->fh, &resp->object);
2490         *cs->statusp = resp->status = NFS4_OK;
2491 out:
2492         DTRACE_NFSV4_2(op__getfh__done, struct compound_state *, cs,
2493             GETFH4res *, resp);
2494 }
2495 
2496 static void
2497 rfs4_op_getfh_free(nfs_resop4 *resop)
2498 {
2499         GETFH4res *resp = &resop->nfs_resop4_u.opgetfh;
2500 
2501         if (resp->status == NFS4_OK &&
2502             resp->object.nfs_fh4_val != NULL) {
2503                 kmem_free(resp->object.nfs_fh4_val, resp->object.nfs_fh4_len);
2504                 resp->object.nfs_fh4_val = NULL;
2505                 resp->object.nfs_fh4_len = 0;
2506         }
2507 }
2508 
2509 /*
2510  * illegal: args: void
2511  *          res : status (NFS4ERR_OP_ILLEGAL)
2512  */
2513 /* ARGSUSED */
2514 static void
2515 rfs4_op_illegal(nfs_argop4 *argop, nfs_resop4 *resop,
2516     struct svc_req *req, struct compound_state *cs)
2517 {
2518         ILLEGAL4res *resp = &resop->nfs_resop4_u.opillegal;
2519 
2520         resop->resop = OP_ILLEGAL;
2521         *cs->statusp = resp->status = NFS4ERR_OP_ILLEGAL;
2522 }
2523 
2524 /*
2525  * link: args: SAVED_FH: file, CURRENT_FH: target directory
2526  *       res: status. If success - CURRENT_FH unchanged, return change_info
2527  */
2528 /* ARGSUSED */
2529 static void
2530 rfs4_op_link(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2531     struct compound_state *cs)
2532 {
2533         LINK4args *args = &argop->nfs_argop4_u.oplink;
2534         LINK4res *resp = &resop->nfs_resop4_u.oplink;
2535         int error;
2536         vnode_t *vp;
2537         vnode_t *dvp;
2538         struct vattr bdva, idva, adva;
2539         char *nm;
2540         uint_t  len;
2541         struct sockaddr *ca;
2542         char *name = NULL;
2543         nfsstat4 status;
2544 
2545         DTRACE_NFSV4_2(op__link__start, struct compound_state *, cs,
2546             LINK4args *, args);
2547 
2548         /* SAVED_FH: source object */
2549         vp = cs->saved_vp;
2550         if (vp == NULL) {
2551                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2552                 goto out;
2553         }
2554 
2555         /* CURRENT_FH: target directory */
2556         dvp = cs->vp;
2557         if (dvp == NULL) {
2558                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2559                 goto out;
2560         }
2561 
2562         /*
2563          * If there is a non-shared filesystem mounted on this vnode,
2564          * do not allow to link any file in this directory.
2565          */
2566         if (vn_ismntpt(dvp)) {
2567                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
2568                 goto out;
2569         }
2570 
2571         if (cs->access == CS_ACCESS_DENIED) {
2572                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
2573                 goto out;
2574         }
2575 
2576         /* Check source object's type validity */
2577         if (vp->v_type == VDIR) {
2578                 *cs->statusp = resp->status = NFS4ERR_ISDIR;
2579                 goto out;
2580         }
2581 
2582         /* Check target directory's type */
2583         if (dvp->v_type != VDIR) {
2584                 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
2585                 goto out;
2586         }
2587 
2588         if (cs->saved_exi != cs->exi) {
2589                 *cs->statusp = resp->status = NFS4ERR_XDEV;
2590                 goto out;
2591         }
2592 
2593         status = utf8_dir_verify(&args->newname);
2594         if (status != NFS4_OK) {
2595                 *cs->statusp = resp->status = status;
2596                 goto out;
2597         }
2598 
2599         nm = utf8_to_fn(&args->newname, &len, NULL);
2600         if (nm == NULL) {
2601                 *cs->statusp = resp->status = NFS4ERR_INVAL;
2602                 goto out;
2603         }
2604 
2605         if (len > MAXNAMELEN) {
2606                 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
2607                 kmem_free(nm, len);
2608                 goto out;
2609         }
2610 
2611         if (rdonly4(req, cs)) {
2612                 *cs->statusp = resp->status = NFS4ERR_ROFS;
2613                 kmem_free(nm, len);
2614                 goto out;
2615         }
2616 
2617         /* Get "before" change value */
2618         bdva.va_mask = AT_CTIME|AT_SEQ;
2619         error = VOP_GETATTR(dvp, &bdva, 0, cs->cr, NULL);
2620         if (error) {
2621                 *cs->statusp = resp->status = puterrno4(error);
2622                 kmem_free(nm, len);
2623                 goto out;
2624         }
2625 
2626         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2627         name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
2628             MAXPATHLEN  + 1);
2629 
2630         if (name == NULL) {
2631                 *cs->statusp = resp->status = NFS4ERR_INVAL;
2632                 kmem_free(nm, len);
2633                 goto out;
2634         }
2635 
2636         NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bdva.va_ctime)
2637 
2638         error = VOP_LINK(dvp, vp, name, cs->cr, NULL, 0);
2639 
2640         if (nm != name)
2641                 kmem_free(name, MAXPATHLEN + 1);
2642         kmem_free(nm, len);
2643 
2644         /*
2645          * Get the initial "after" sequence number, if it fails, set to zero
2646          */
2647         idva.va_mask = AT_SEQ;
2648         if (VOP_GETATTR(dvp, &idva, 0, cs->cr, NULL))
2649                 idva.va_seq = 0;
2650 
2651         /*
2652          * Force modified data and metadata out to stable storage.
2653          */
2654         (void) VOP_FSYNC(vp, FNODSYNC, cs->cr, NULL);
2655         (void) VOP_FSYNC(dvp, 0, cs->cr, NULL);
2656 
2657         if (error) {
2658                 *cs->statusp = resp->status = puterrno4(error);
2659                 goto out;
2660         }
2661 
2662         /*
2663          * Get "after" change value, if it fails, simply return the
2664          * before value.
2665          */
2666         adva.va_mask = AT_CTIME|AT_SEQ;
2667         if (VOP_GETATTR(dvp, &adva, 0, cs->cr, NULL)) {
2668                 adva.va_ctime = bdva.va_ctime;
2669                 adva.va_seq = 0;
2670         }
2671 
2672         NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, adva.va_ctime)
2673 
2674         /*
2675          * The cinfo.atomic = TRUE only if we have
2676          * non-zero va_seq's, and it has incremented by exactly one
2677          * during the VOP_LINK and it didn't change during the VOP_FSYNC.
2678          */
2679         if (bdva.va_seq && idva.va_seq && adva.va_seq &&
2680             idva.va_seq == (bdva.va_seq + 1) && idva.va_seq == adva.va_seq)
2681                 resp->cinfo.atomic = TRUE;
2682         else
2683                 resp->cinfo.atomic = FALSE;
2684 
2685         *cs->statusp = resp->status = NFS4_OK;
2686 out:
2687         DTRACE_NFSV4_2(op__link__done, struct compound_state *, cs,
2688             LINK4res *, resp);
2689 }
2690 
2691 /*
2692  * Used by rfs4_op_lookup and rfs4_op_lookupp to do the actual work.
2693  */
2694 
2695 /* ARGSUSED */
2696 static nfsstat4
2697 do_rfs4_op_lookup(char *nm, struct svc_req *req, struct compound_state *cs)
2698 {
2699         int error;
2700         int different_export = 0;
2701         vnode_t *vp, *pre_tvp = NULL, *oldvp = NULL;
2702         struct exportinfo *exi = NULL, *pre_exi = NULL;
2703         nfsstat4 stat;
2704         fid_t fid;
2705         int attrdir, dotdot, walk;
2706         bool_t is_newvp = FALSE;
2707 
2708         if (cs->vp->v_flag & V_XATTRDIR) {
2709                 attrdir = 1;
2710                 ASSERT(get_fh4_flag(&cs->fh, FH4_ATTRDIR));
2711         } else {
2712                 attrdir = 0;
2713                 ASSERT(! get_fh4_flag(&cs->fh, FH4_ATTRDIR));
2714         }
2715 
2716         dotdot = (nm[0] == '.' && nm[1] == '.' && nm[2] == '\0');
2717 
2718         /*
2719          * If dotdotting, then need to check whether it's
2720          * above the root of a filesystem, or above an
2721          * export point.
2722          */
2723         if (dotdot) {
2724                 vnode_t *zone_rootvp;
2725 
2726                 ASSERT(cs->exi != NULL);
2727                 zone_rootvp = cs->exi->exi_ne->exi_root->exi_vp;
2728                 /*
2729                  * If dotdotting at the root of a filesystem, then
2730                  * need to traverse back to the mounted-on filesystem
2731                  * and do the dotdot lookup there.
2732                  */
2733                 if ((cs->vp->v_flag & VROOT) || VN_CMP(cs->vp, zone_rootvp)) {
2734 
2735                         /*
2736                          * If at the system root, then can
2737                          * go up no further.
2738                          */
2739                         if (VN_CMP(cs->vp, zone_rootvp))
2740                                 return (puterrno4(ENOENT));
2741 
2742                         /*
2743                          * Traverse back to the mounted-on filesystem
2744                          */
2745                         cs->vp = untraverse(cs->vp, zone_rootvp);
2746 
2747                         /*
2748                          * Set the different_export flag so we remember
2749                          * to pick up a new exportinfo entry for
2750                          * this new filesystem.
2751                          */
2752                         different_export = 1;
2753                 } else {
2754 
2755                         /*
2756                          * If dotdotting above an export point then set
2757                          * the different_export to get new export info.
2758                          */
2759                         different_export = nfs_exported(cs->exi, cs->vp);
2760                 }
2761         }
2762 
2763         error = VOP_LOOKUP(cs->vp, nm, &vp, NULL, 0, NULL, cs->cr,
2764             NULL, NULL, NULL);
2765         if (error)
2766                 return (puterrno4(error));
2767 
2768         /*
2769          * If the vnode is in a pseudo filesystem, check whether it is visible.
2770          *
2771          * XXX if the vnode is a symlink and it is not visible in
2772          * a pseudo filesystem, return ENOENT (not following symlink).
2773          * V4 client can not mount such symlink. This is a regression
2774          * from V2/V3.
2775          *
2776          * In the same exported filesystem, if the security flavor used
2777          * is not an explicitly shared flavor, limit the view to the visible
2778          * list entries only. This is not a WRONGSEC case because it's already
2779          * checked via PUTROOTFH/PUTPUBFH or PUTFH.
2780          */
2781         if (!different_export &&
2782             (PSEUDO(cs->exi) || ! is_exported_sec(cs->nfsflavor, cs->exi) ||
2783             cs->access & CS_ACCESS_LIMITED)) {
2784                 if (! nfs_visible(cs->exi, vp, &different_export)) {
2785                         VN_RELE(vp);
2786                         return (puterrno4(ENOENT));
2787                 }
2788         }
2789 
2790         /*
2791          * If it's a mountpoint, then traverse it.
2792          */
2793         if (vn_ismntpt(vp)) {
2794                 pre_exi = cs->exi;   /* save pre-traversed exportinfo */
2795                 pre_tvp = vp;           /* save pre-traversed vnode     */
2796 
2797                 /*
2798                  * hold pre_tvp to counteract rele by traverse.  We will
2799                  * need pre_tvp below if checkexport4 fails
2800                  */
2801                 VN_HOLD(pre_tvp);
2802                 if ((error = traverse(&vp)) != 0) {
2803                         VN_RELE(vp);
2804                         VN_RELE(pre_tvp);
2805                         return (puterrno4(error));
2806                 }
2807                 different_export = 1;
2808         } else if (vp->v_vfsp != cs->vp->v_vfsp) {
2809                 /*
2810                  * The vfsp comparison is to handle the case where
2811                  * a LOFS mount is shared.  lo_lookup traverses mount points,
2812                  * and NFS is unaware of local fs transistions because
2813                  * v_vfsmountedhere isn't set.  For this special LOFS case,
2814                  * the dir and the obj returned by lookup will have different
2815                  * vfs ptrs.
2816                  */
2817                 different_export = 1;
2818         }
2819 
2820         if (different_export) {
2821 
2822                 bzero(&fid, sizeof (fid));
2823                 fid.fid_len = MAXFIDSZ;
2824                 error = vop_fid_pseudo(vp, &fid);
2825                 if (error) {
2826                         VN_RELE(vp);
2827                         if (pre_tvp)
2828                                 VN_RELE(pre_tvp);
2829                         return (puterrno4(error));
2830                 }
2831 
2832                 if (dotdot)
2833                         exi = nfs_vptoexi(NULL, vp, cs->cr, &walk, NULL, TRUE);
2834                 else
2835                         exi = checkexport4(&vp->v_vfsp->vfs_fsid, &fid, vp);
2836 
2837                 if (exi == NULL) {
2838                         if (pre_tvp) {
2839                                 /*
2840                                  * If this vnode is a mounted-on vnode,
2841                                  * but the mounted-on file system is not
2842                                  * exported, send back the filehandle for
2843                                  * the mounted-on vnode, not the root of
2844                                  * the mounted-on file system.
2845                                  */
2846                                 VN_RELE(vp);
2847                                 vp = pre_tvp;
2848                                 exi = pre_exi;
2849                         } else {
2850                                 VN_RELE(vp);
2851                                 return (puterrno4(EACCES));
2852                         }
2853                 } else if (pre_tvp) {
2854                         /* we're done with pre_tvp now. release extra hold */
2855                         VN_RELE(pre_tvp);
2856                 }
2857 
2858                 cs->exi = exi;
2859 
2860                 /*
2861                  * Now we do a checkauth4. The reason is that
2862                  * this client/user may not have access to the new
2863                  * exported file system, and if they do,
2864                  * the client/user may be mapped to a different uid.
2865                  *
2866                  * We start with a new cr, because the checkauth4 done
2867                  * in the PUT*FH operation over wrote the cred's uid,
2868                  * gid, etc, and we want the real thing before calling
2869                  * checkauth4()
2870                  */
2871                 crfree(cs->cr);
2872                 cs->cr = crdup(cs->basecr);
2873 
2874                 oldvp = cs->vp;
2875                 cs->vp = vp;
2876                 is_newvp = TRUE;
2877 
2878                 stat = call_checkauth4(cs, req);
2879                 if (stat != NFS4_OK) {
2880                         VN_RELE(cs->vp);
2881                         cs->vp = oldvp;
2882                         return (stat);
2883                 }
2884         }
2885 
2886         /*
2887          * After various NFS checks, do a label check on the path
2888          * component. The label on this path should either be the
2889          * global zone's label or a zone's label. We are only
2890          * interested in the zone's label because exported files
2891          * in global zone is accessible (though read-only) to
2892          * clients. The exportability/visibility check is already
2893          * done before reaching this code.
2894          */
2895         if (is_system_labeled()) {
2896                 bslabel_t *clabel;
2897 
2898                 ASSERT(req->rq_label != NULL);
2899                 clabel = req->rq_label;
2900                 DTRACE_PROBE2(tx__rfs4__log__info__oplookup__clabel, char *,
2901                     "got client label from request(1)", struct svc_req *, req);
2902 
2903                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2904                         if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
2905                             cs->exi)) {
2906                                 error = EACCES;
2907                                 goto err_out;
2908                         }
2909                 } else {
2910                         /*
2911                          * We grant access to admin_low label clients
2912                          * only if the client is trusted, i.e. also
2913                          * running Solaris Trusted Extension.
2914                          */
2915                         struct sockaddr *ca;
2916                         int             addr_type;
2917                         void            *ipaddr;
2918                         tsol_tpc_t      *tp;
2919 
2920                         ca = (struct sockaddr *)svc_getrpccaller(
2921                             req->rq_xprt)->buf;
2922                         if (ca->sa_family == AF_INET) {
2923                                 addr_type = IPV4_VERSION;
2924                                 ipaddr = &((struct sockaddr_in *)ca)->sin_addr;
2925                         } else if (ca->sa_family == AF_INET6) {
2926                                 addr_type = IPV6_VERSION;
2927                                 ipaddr = &((struct sockaddr_in6 *)
2928                                     ca)->sin6_addr;
2929                         }
2930                         tp = find_tpc(ipaddr, addr_type, B_FALSE);
2931                         if (tp == NULL || tp->tpc_tp.tp_doi !=
2932                             l_admin_low->tsl_doi || tp->tpc_tp.host_type !=
2933                             SUN_CIPSO) {
2934                                 if (tp != NULL)
2935                                         TPC_RELE(tp);
2936                                 error = EACCES;
2937                                 goto err_out;
2938                         }
2939                         TPC_RELE(tp);
2940                 }
2941         }
2942 
2943         error = makefh4(&cs->fh, vp, cs->exi);
2944 
2945 err_out:
2946         if (error) {
2947                 if (is_newvp) {
2948                         VN_RELE(cs->vp);
2949                         cs->vp = oldvp;
2950                 } else
2951                         VN_RELE(vp);
2952                 return (puterrno4(error));
2953         }
2954 
2955         if (!is_newvp) {
2956                 if (cs->vp)
2957                         VN_RELE(cs->vp);
2958                 cs->vp = vp;
2959         } else if (oldvp)
2960                 VN_RELE(oldvp);
2961 
2962         /*
2963          * if did lookup on attrdir and didn't lookup .., set named
2964          * attr fh flag
2965          */
2966         if (attrdir && ! dotdot)
2967                 set_fh4_flag(&cs->fh, FH4_NAMEDATTR);
2968 
2969         /* Assume false for now, open proc will set this */
2970         cs->mandlock = FALSE;
2971 
2972         return (NFS4_OK);
2973 }
2974 
2975 /* ARGSUSED */
2976 static void
2977 rfs4_op_lookup(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2978     struct compound_state *cs)
2979 {
2980         LOOKUP4args *args = &argop->nfs_argop4_u.oplookup;
2981         LOOKUP4res *resp = &resop->nfs_resop4_u.oplookup;
2982         char *nm;
2983         uint_t len;
2984         struct sockaddr *ca;
2985         char *name = NULL;
2986         nfsstat4 status;
2987 
2988         DTRACE_NFSV4_2(op__lookup__start, struct compound_state *, cs,
2989             LOOKUP4args *, args);
2990 
2991         if (cs->vp == NULL) {
2992                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2993                 goto out;
2994         }
2995 
2996         if (cs->vp->v_type == VLNK) {
2997                 *cs->statusp = resp->status = NFS4ERR_SYMLINK;
2998                 goto out;
2999         }
3000 
3001         if (cs->vp->v_type != VDIR) {
3002                 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
3003                 goto out;
3004         }
3005 
3006         status = utf8_dir_verify(&args->objname);
3007         if (status != NFS4_OK) {
3008                 *cs->statusp = resp->status = status;
3009                 goto out;
3010         }
3011 
3012         nm = utf8_to_str(&args->objname, &len, NULL);
3013         if (nm == NULL) {
3014                 *cs->statusp = resp->status = NFS4ERR_INVAL;
3015                 goto out;
3016         }
3017 
3018         if (len > MAXNAMELEN) {
3019                 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
3020                 kmem_free(nm, len);
3021                 goto out;
3022         }
3023 
3024         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
3025         name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
3026             MAXPATHLEN  + 1);
3027 
3028         if (name == NULL) {
3029                 *cs->statusp = resp->status = NFS4ERR_INVAL;
3030                 kmem_free(nm, len);
3031                 goto out;
3032         }
3033 
3034         *cs->statusp = resp->status = do_rfs4_op_lookup(name, req, cs);
3035 
3036         if (name != nm)
3037                 kmem_free(name, MAXPATHLEN + 1);
3038         kmem_free(nm, len);
3039 
3040 out:
3041         DTRACE_NFSV4_2(op__lookup__done, struct compound_state *, cs,
3042             LOOKUP4res *, resp);
3043 }
3044 
3045 /* ARGSUSED */
3046 static void
3047 rfs4_op_lookupp(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req,
3048     struct compound_state *cs)
3049 {
3050         LOOKUPP4res *resp = &resop->nfs_resop4_u.oplookupp;
3051 
3052         DTRACE_NFSV4_1(op__lookupp__start, struct compound_state *, cs);
3053 
3054         if (cs->vp == NULL) {
3055                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3056                 goto out;
3057         }
3058 
3059         if (cs->vp->v_type != VDIR) {
3060                 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
3061                 goto out;
3062         }
3063 
3064         *cs->statusp = resp->status = do_rfs4_op_lookup("..", req, cs);
3065 
3066         /*
3067          * From NFSV4 Specification, LOOKUPP should not check for
3068          * NFS4ERR_WRONGSEC. Retrun NFS4_OK instead.
3069          */
3070         if (resp->status == NFS4ERR_WRONGSEC) {
3071                 *cs->statusp = resp->status = NFS4_OK;
3072         }
3073 
3074 out:
3075         DTRACE_NFSV4_2(op__lookupp__done, struct compound_state *, cs,
3076             LOOKUPP4res *, resp);
3077 }
3078 
3079 
3080 /*ARGSUSED2*/
3081 static void
3082 rfs4_op_openattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3083     struct compound_state *cs)
3084 {
3085         OPENATTR4args   *args = &argop->nfs_argop4_u.opopenattr;
3086         OPENATTR4res    *resp = &resop->nfs_resop4_u.opopenattr;
3087         vnode_t         *avp = NULL;
3088         int             lookup_flags = LOOKUP_XATTR, error;
3089         int             exp_ro = 0;
3090 
3091         DTRACE_NFSV4_2(op__openattr__start, struct compound_state *, cs,
3092             OPENATTR4args *, args);
3093 
3094         if (cs->vp == NULL) {
3095                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3096                 goto out;
3097         }
3098 
3099         if ((cs->vp->v_vfsp->vfs_flag & VFS_XATTR) == 0 &&
3100             !vfs_has_feature(cs->vp->v_vfsp, VFSFT_SYSATTR_VIEWS)) {
3101                 *cs->statusp = resp->status = puterrno4(ENOTSUP);
3102                 goto out;
3103         }
3104 
3105         /*
3106          * If file system supports passing ACE mask to VOP_ACCESS then
3107          * check for ACE_READ_NAMED_ATTRS, otherwise do legacy checks
3108          */
3109 
3110         if (vfs_has_feature(cs->vp->v_vfsp, VFSFT_ACEMASKONACCESS))
3111                 error = VOP_ACCESS(cs->vp, ACE_READ_NAMED_ATTRS,
3112                     V_ACE_MASK, cs->cr, NULL);
3113         else
3114                 error = ((VOP_ACCESS(cs->vp, VREAD, 0, cs->cr, NULL) != 0) &&
3115                     (VOP_ACCESS(cs->vp, VWRITE, 0, cs->cr, NULL) != 0) &&
3116                     (VOP_ACCESS(cs->vp, VEXEC, 0, cs->cr, NULL) != 0));
3117 
3118         if (error) {
3119                 *cs->statusp = resp->status = puterrno4(EACCES);
3120                 goto out;
3121         }
3122 
3123         /*
3124          * The CREATE_XATTR_DIR VOP flag cannot be specified if
3125          * the file system is exported read-only -- regardless of
3126          * createdir flag.  Otherwise the attrdir would be created
3127          * (assuming server fs isn't mounted readonly locally).  If
3128          * VOP_LOOKUP returns ENOENT in this case, the error will
3129          * be translated into EROFS.  ENOSYS is mapped to ENOTSUP
3130          * because specfs has no VOP_LOOKUP op, so the macro would
3131          * return ENOSYS.  EINVAL is returned by all (current)
3132          * Solaris file system implementations when any of their
3133          * restrictions are violated (xattr(dir) can't have xattrdir).
3134          * Returning NOTSUPP is more appropriate in this case
3135          * because the object will never be able to have an attrdir.
3136          */
3137         if (args->createdir && ! (exp_ro = rdonly4(req, cs)))
3138                 lookup_flags |= CREATE_XATTR_DIR;
3139 
3140         error = VOP_LOOKUP(cs->vp, "", &avp, NULL, lookup_flags, NULL, cs->cr,
3141             NULL, NULL, NULL);
3142 
3143         if (error) {
3144                 if (error == ENOENT && args->createdir && exp_ro)
3145                         *cs->statusp = resp->status = puterrno4(EROFS);
3146                 else if (error == EINVAL || error == ENOSYS)
3147                         *cs->statusp = resp->status = puterrno4(ENOTSUP);
3148                 else
3149                         *cs->statusp = resp->status = puterrno4(error);
3150                 goto out;
3151         }
3152 
3153         ASSERT(avp->v_flag & V_XATTRDIR);
3154 
3155         error = makefh4(&cs->fh, avp, cs->exi);
3156 
3157         if (error) {
3158                 VN_RELE(avp);
3159                 *cs->statusp = resp->status = puterrno4(error);
3160                 goto out;
3161         }
3162 
3163         VN_RELE(cs->vp);
3164         cs->vp = avp;
3165 
3166         /*
3167          * There is no requirement for an attrdir fh flag
3168          * because the attrdir has a vnode flag to distinguish
3169          * it from regular (non-xattr) directories.  The
3170          * FH4_ATTRDIR flag is set for future sanity checks.
3171          */
3172         set_fh4_flag(&cs->fh, FH4_ATTRDIR);
3173         *cs->statusp = resp->status = NFS4_OK;
3174 
3175 out:
3176         DTRACE_NFSV4_2(op__openattr__done, struct compound_state *, cs,
3177             OPENATTR4res *, resp);
3178 }
3179 
3180 static int
3181 do_io(int direction, vnode_t *vp, struct uio *uio, int ioflag, cred_t *cred,
3182     caller_context_t *ct)
3183 {
3184         int error;
3185         int i;
3186         clock_t delaytime;
3187 
3188         delaytime = MSEC_TO_TICK_ROUNDUP(rfs4_lock_delay);
3189 
3190         /*
3191          * Don't block on mandatory locks. If this routine returns
3192          * EAGAIN, the caller should return NFS4ERR_LOCKED.
3193          */
3194         uio->uio_fmode = FNONBLOCK;
3195 
3196         for (i = 0; i < rfs4_maxlock_tries; i++) {
3197 
3198 
3199                 if (direction == FREAD) {
3200                         (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, ct);
3201                         error = VOP_READ(vp, uio, ioflag, cred, ct);
3202                         VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, ct);
3203                 } else {
3204                         (void) VOP_RWLOCK(vp, V_WRITELOCK_TRUE, ct);
3205                         error = VOP_WRITE(vp, uio, ioflag, cred, ct);
3206                         VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, ct);
3207                 }
3208 
3209                 if (error != EAGAIN)
3210                         break;
3211 
3212                 if (i < rfs4_maxlock_tries - 1) {
3213                         delay(delaytime);
3214                         delaytime *= 2;
3215                 }
3216         }
3217 
3218         return (error);
3219 }
3220 
3221 /* ARGSUSED */
3222 static void
3223 rfs4_op_read(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3224     struct compound_state *cs)
3225 {
3226         READ4args *args = &argop->nfs_argop4_u.opread;
3227         READ4res *resp = &resop->nfs_resop4_u.opread;
3228         int error;
3229         int verror;
3230         vnode_t *vp;
3231         struct vattr va;
3232         struct iovec iov, *iovp = NULL;
3233         int iovcnt;
3234         struct uio uio;
3235         u_offset_t offset;
3236         bool_t *deleg = &cs->deleg;
3237         nfsstat4 stat;
3238         int in_crit = 0;
3239         mblk_t *mp = NULL;
3240         int alloc_err = 0;
3241         int rdma_used = 0;
3242         int loaned_buffers;
3243         caller_context_t ct;
3244         struct uio *uiop;
3245 
3246         DTRACE_NFSV4_2(op__read__start, struct compound_state *, cs,
3247             READ4args, args);
3248 
3249         vp = cs->vp;
3250         if (vp == NULL) {
3251                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3252                 goto out;
3253         }
3254         if (cs->access == CS_ACCESS_DENIED) {
3255                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
3256                 goto out;
3257         }
3258 
3259         if ((stat = rfs4_check_stateid(FREAD, vp, &args->stateid, FALSE,
3260             deleg, TRUE, &ct)) != NFS4_OK) {
3261                 *cs->statusp = resp->status = stat;
3262                 goto out;
3263         }
3264 
3265         /*
3266          * Enter the critical region before calling VOP_RWLOCK
3267          * to avoid a deadlock with write requests.
3268          */
3269         if (nbl_need_check(vp)) {
3270                 nbl_start_crit(vp, RW_READER);
3271                 in_crit = 1;
3272                 if (nbl_conflict(vp, NBL_READ, args->offset, args->count, 0,
3273                     &ct)) {
3274                         *cs->statusp = resp->status = NFS4ERR_LOCKED;
3275                         goto out;
3276                 }
3277         }
3278 
3279         if (args->wlist) {
3280                 if (args->count > clist_len(args->wlist)) {
3281                         *cs->statusp = resp->status = NFS4ERR_INVAL;
3282                         goto out;
3283                 }
3284                 rdma_used = 1;
3285         }
3286 
3287         /* use loaned buffers for TCP */
3288         loaned_buffers = (nfs_loaned_buffers && !rdma_used) ? 1 : 0;
3289 
3290         va.va_mask = AT_MODE|AT_SIZE|AT_UID;
3291         verror = VOP_GETATTR(vp, &va, 0, cs->cr, &ct);
3292 
3293         /*
3294          * If we can't get the attributes, then we can't do the
3295          * right access checking.  So, we'll fail the request.
3296          */
3297         if (verror) {
3298                 *cs->statusp = resp->status = puterrno4(verror);
3299                 goto out;
3300         }
3301 
3302         if (vp->v_type != VREG) {
3303                 *cs->statusp = resp->status =
3304                     ((vp->v_type == VDIR) ? NFS4ERR_ISDIR : NFS4ERR_INVAL);
3305                 goto out;
3306         }
3307 
3308         if (crgetuid(cs->cr) != va.va_uid &&
3309             (error = VOP_ACCESS(vp, VREAD, 0, cs->cr, &ct)) &&
3310             (error = VOP_ACCESS(vp, VEXEC, 0, cs->cr, &ct))) {
3311                 *cs->statusp = resp->status = puterrno4(error);
3312                 goto out;
3313         }
3314 
3315         if (MANDLOCK(vp, va.va_mode)) { /* XXX - V4 supports mand locking */
3316                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
3317                 goto out;
3318         }
3319 
3320         offset = args->offset;
3321         if (offset >= va.va_size) {
3322                 *cs->statusp = resp->status = NFS4_OK;
3323                 resp->eof = TRUE;
3324                 resp->data_len = 0;
3325                 resp->data_val = NULL;
3326                 resp->mblk = NULL;
3327                 /* RDMA */
3328                 resp->wlist = args->wlist;
3329                 resp->wlist_len = resp->data_len;
3330                 *cs->statusp = resp->status = NFS4_OK;
3331                 if (resp->wlist)
3332                         clist_zero_len(resp->wlist);
3333                 goto out;
3334         }
3335 
3336         if (args->count == 0) {
3337                 *cs->statusp = resp->status = NFS4_OK;
3338                 resp->eof = FALSE;
3339                 resp->data_len = 0;
3340                 resp->data_val = NULL;
3341                 resp->mblk = NULL;
3342                 /* RDMA */
3343                 resp->wlist = args->wlist;
3344                 resp->wlist_len = resp->data_len;
3345                 if (resp->wlist)
3346                         clist_zero_len(resp->wlist);
3347                 goto out;
3348         }
3349 
3350         /*
3351          * Do not allocate memory more than maximum allowed
3352          * transfer size
3353          */
3354         if (args->count > rfs4_tsize(req))
3355                 args->count = rfs4_tsize(req);
3356 
3357         if (loaned_buffers) {
3358                 uiop = (uio_t *)rfs_setup_xuio(vp);
3359                 ASSERT(uiop != NULL);
3360                 uiop->uio_segflg = UIO_SYSSPACE;
3361                 uiop->uio_loffset = args->offset;
3362                 uiop->uio_resid = args->count;
3363 
3364                 /* Jump to do the read if successful */
3365                 if (!VOP_REQZCBUF(vp, UIO_READ, (xuio_t *)uiop, cs->cr, &ct)) {
3366                         /*
3367                          * Need to hold the vnode until after VOP_RETZCBUF()
3368                          * is called.
3369                          */
3370                         VN_HOLD(vp);
3371                         goto doio_read;
3372                 }
3373 
3374                 DTRACE_PROBE2(nfss__i__reqzcbuf_failed, int,
3375                     uiop->uio_loffset, int, uiop->uio_resid);
3376 
3377                 uiop->uio_extflg = 0;
3378 
3379                 /* failure to setup for zero copy */
3380                 rfs_free_xuio((void *)uiop);
3381                 loaned_buffers = 0;
3382         }
3383 
3384         /*
3385          * If returning data via RDMA Write, then grab the chunk list. If we
3386          * aren't returning READ data w/RDMA_WRITE, then grab a mblk.
3387          */
3388         if (rdma_used) {
3389                 mp = NULL;
3390                 (void) rdma_get_wchunk(req, &iov, args->wlist);
3391                 uio.uio_iov = &iov;
3392                 uio.uio_iovcnt = 1;
3393         } else {
3394                 /*
3395                  * mp will contain the data to be sent out in the read reply.
3396                  * It will be freed after the reply has been sent.
3397                  */
3398                 mp = rfs_read_alloc(args->count, &iovp, &iovcnt);
3399                 ASSERT(mp != NULL);
3400                 ASSERT(alloc_err == 0);
3401                 uio.uio_iov = iovp;
3402                 uio.uio_iovcnt = iovcnt;
3403         }
3404 
3405         uio.uio_segflg = UIO_SYSSPACE;
3406         uio.uio_extflg = UIO_COPY_CACHED;
3407         uio.uio_loffset = args->offset;
3408         uio.uio_resid = args->count;
3409         uiop = &uio;
3410 
3411 doio_read:
3412         error = do_io(FREAD, vp, uiop, 0, cs->cr, &ct);
3413 
3414         va.va_mask = AT_SIZE;
3415         verror = VOP_GETATTR(vp, &va, 0, cs->cr, &ct);
3416 
3417         if (error) {
3418                 if (mp)
3419                         freemsg(mp);
3420                 *cs->statusp = resp->status = puterrno4(error);
3421                 goto out;
3422         }
3423 
3424         /* make mblk using zc buffers */
3425         if (loaned_buffers) {
3426                 mp = uio_to_mblk(uiop);
3427                 ASSERT(mp != NULL);
3428         }
3429 
3430         *cs->statusp = resp->status = NFS4_OK;
3431 
3432         ASSERT(uiop->uio_resid >= 0);
3433         resp->data_len = args->count - uiop->uio_resid;
3434         if (mp) {
3435                 resp->data_val = (char *)mp->b_datap->db_base;
3436                 rfs_rndup_mblks(mp, resp->data_len, loaned_buffers);
3437         } else {
3438                 resp->data_val = (caddr_t)iov.iov_base;
3439         }
3440 
3441         resp->mblk = mp;
3442 
3443         if (!verror && offset + resp->data_len == va.va_size)
3444                 resp->eof = TRUE;
3445         else
3446                 resp->eof = FALSE;
3447 
3448         if (rdma_used) {
3449                 if (!rdma_setup_read_data4(args, resp)) {
3450                         *cs->statusp = resp->status = NFS4ERR_INVAL;
3451                 }
3452         } else {
3453                 resp->wlist = NULL;
3454         }
3455 
3456 out:
3457         if (in_crit)
3458                 nbl_end_crit(vp);
3459 
3460         if (iovp != NULL)
3461                 kmem_free(iovp, iovcnt * sizeof (struct iovec));
3462 
3463         DTRACE_NFSV4_2(op__read__done, struct compound_state *, cs,
3464             READ4res *, resp);
3465 }
3466 
3467 static void
3468 rfs4_op_read_free(nfs_resop4 *resop)
3469 {
3470         READ4res        *resp = &resop->nfs_resop4_u.opread;
3471 
3472         if (resp->status == NFS4_OK && resp->mblk != NULL) {
3473                 freemsg(resp->mblk);
3474                 resp->mblk = NULL;
3475                 resp->data_val = NULL;
3476                 resp->data_len = 0;
3477         }
3478 }
3479 
3480 static void
3481 rfs4_op_readdir_free(nfs_resop4 * resop)
3482 {
3483         READDIR4res    *resp = &resop->nfs_resop4_u.opreaddir;
3484 
3485         if (resp->status == NFS4_OK && resp->mblk != NULL) {
3486                 freeb(resp->mblk);
3487                 resp->mblk = NULL;
3488                 resp->data_len = 0;
3489         }
3490 }
3491 
3492 
3493 /* ARGSUSED */
3494 static void
3495 rfs4_op_putpubfh(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req,
3496     struct compound_state *cs)
3497 {
3498         PUTPUBFH4res    *resp = &resop->nfs_resop4_u.opputpubfh;
3499         int             error;
3500         vnode_t         *vp;
3501         struct exportinfo *exi, *sav_exi;
3502         nfs_fh4_fmt_t   *fh_fmtp;
3503         nfs_export_t *ne = nfs_get_export();
3504 
3505         DTRACE_NFSV4_1(op__putpubfh__start, struct compound_state *, cs);
3506 
3507         if (cs->vp) {
3508                 VN_RELE(cs->vp);
3509                 cs->vp = NULL;
3510         }
3511 
3512         if (cs->cr)
3513                 crfree(cs->cr);
3514 
3515         cs->cr = crdup(cs->basecr);
3516 
3517         vp = ne->exi_public->exi_vp;
3518         if (vp == NULL) {
3519                 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
3520                 goto out;
3521         }
3522 
3523         error = makefh4(&cs->fh, vp, ne->exi_public);
3524         if (error != 0) {
3525                 *cs->statusp = resp->status = puterrno4(error);
3526                 goto out;
3527         }
3528         sav_exi = cs->exi;
3529         if (ne->exi_public == ne->exi_root) {
3530                 /*
3531                  * No filesystem is actually shared public, so we default
3532                  * to exi_root. In this case, we must check whether root
3533                  * is exported.
3534                  */
3535                 fh_fmtp = (nfs_fh4_fmt_t *)cs->fh.nfs_fh4_val;
3536 
3537                 /*
3538                  * if root filesystem is exported, the exportinfo struct that we
3539                  * should use is what checkexport4 returns, because root_exi is
3540                  * actually a mostly empty struct.
3541                  */
3542                 exi = checkexport4(&fh_fmtp->fh4_fsid,
3543                     (fid_t *)&fh_fmtp->fh4_xlen, NULL);
3544                 cs->exi = ((exi != NULL) ? exi : ne->exi_public);
3545         } else {
3546                 /*
3547                  * it's a properly shared filesystem
3548                  */
3549                 cs->exi = ne->exi_public;
3550         }
3551 
3552         if (is_system_labeled()) {
3553                 bslabel_t *clabel;
3554 
3555                 ASSERT(req->rq_label != NULL);
3556                 clabel = req->rq_label;
3557                 DTRACE_PROBE2(tx__rfs4__log__info__opputpubfh__clabel, char *,
3558                     "got client label from request(1)",
3559                     struct svc_req *, req);
3560                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3561                         if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3562                             cs->exi)) {
3563                                 *cs->statusp = resp->status =
3564                                     NFS4ERR_SERVERFAULT;
3565                                 goto out;
3566                         }
3567                 }
3568         }
3569 
3570         VN_HOLD(vp);
3571         cs->vp = vp;
3572 
3573         if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) {
3574                 VN_RELE(cs->vp);
3575                 cs->vp = NULL;
3576                 cs->exi = sav_exi;
3577                 goto out;
3578         }
3579 
3580         *cs->statusp = resp->status = NFS4_OK;
3581 out:
3582         DTRACE_NFSV4_2(op__putpubfh__done, struct compound_state *, cs,
3583             PUTPUBFH4res *, resp);
3584 }
3585 
3586 /*
3587  * XXX - issue with put*fh operations. Suppose /export/home is exported.
3588  * Suppose an NFS client goes to mount /export/home/joe. If /export, home,
3589  * or joe have restrictive search permissions, then we shouldn't let
3590  * the client get a file handle. This is easy to enforce. However, we
3591  * don't know what security flavor should be used until we resolve the
3592  * path name. Another complication is uid mapping. If root is
3593  * the user, then it will be mapped to the anonymous user by default,
3594  * but we won't know that till we've resolved the path name. And we won't
3595  * know what the anonymous user is.
3596  * Luckily, SECINFO is specified to take a full filename.
3597  * So what we will have to in rfs4_op_lookup is check that flavor of
3598  * the target object matches that of the request, and if root was the
3599  * caller, check for the root= and anon= options, and if necessary,
3600  * repeat the lookup using the right cred_t. But that's not done yet.
3601  */
3602 /* ARGSUSED */
3603 static void
3604 rfs4_op_putfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3605     struct compound_state *cs)
3606 {
3607         PUTFH4args *args = &argop->nfs_argop4_u.opputfh;
3608         PUTFH4res *resp = &resop->nfs_resop4_u.opputfh;
3609         nfs_fh4_fmt_t *fh_fmtp;
3610 
3611         DTRACE_NFSV4_2(op__putfh__start, struct compound_state *, cs,
3612             PUTFH4args *, args);
3613 
3614         if (cs->vp) {
3615                 VN_RELE(cs->vp);
3616                 cs->vp = NULL;
3617         }
3618 
3619         if (cs->cr) {
3620                 crfree(cs->cr);
3621                 cs->cr = NULL;
3622         }
3623 
3624 
3625         if (args->object.nfs_fh4_len < NFS_FH4_LEN) {
3626                 *cs->statusp = resp->status = NFS4ERR_BADHANDLE;
3627                 goto out;
3628         }
3629 
3630         fh_fmtp = (nfs_fh4_fmt_t *)args->object.nfs_fh4_val;
3631         cs->exi = checkexport4(&fh_fmtp->fh4_fsid, (fid_t *)&fh_fmtp->fh4_xlen,
3632             NULL);
3633 
3634         if (cs->exi == NULL) {
3635                 *cs->statusp = resp->status = NFS4ERR_STALE;
3636                 goto out;
3637         }
3638 
3639         cs->cr = crdup(cs->basecr);
3640 
3641         ASSERT(cs->cr != NULL);
3642 
3643         if (! (cs->vp = nfs4_fhtovp(&args->object, cs->exi, &resp->status))) {
3644                 *cs->statusp = resp->status;
3645                 goto out;
3646         }
3647 
3648         if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) {
3649                 VN_RELE(cs->vp);
3650                 cs->vp = NULL;
3651                 goto out;
3652         }
3653 
3654         nfs_fh4_copy(&args->object, &cs->fh);
3655         *cs->statusp = resp->status = NFS4_OK;
3656         cs->deleg = FALSE;
3657 
3658 out:
3659         DTRACE_NFSV4_2(op__putfh__done, struct compound_state *, cs,
3660             PUTFH4res *, resp);
3661 }
3662 
3663 /* ARGSUSED */
3664 static void
3665 rfs4_op_putrootfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3666     struct compound_state *cs)
3667 {
3668         PUTROOTFH4res *resp = &resop->nfs_resop4_u.opputrootfh;
3669         int error;
3670         fid_t fid;
3671         struct exportinfo *exi, *sav_exi;
3672 
3673         DTRACE_NFSV4_1(op__putrootfh__start, struct compound_state *, cs);
3674 
3675         if (cs->vp) {
3676                 VN_RELE(cs->vp);
3677                 cs->vp = NULL;
3678         }
3679 
3680         if (cs->cr)
3681                 crfree(cs->cr);
3682 
3683         cs->cr = crdup(cs->basecr);
3684 
3685         /*
3686          * Using rootdir, the system root vnode,
3687          * get its fid.
3688          */
3689         bzero(&fid, sizeof (fid));
3690         fid.fid_len = MAXFIDSZ;
3691         error = vop_fid_pseudo(ZONE_ROOTVP(), &fid);
3692         if (error != 0) {
3693                 *cs->statusp = resp->status = puterrno4(error);
3694                 goto out;
3695         }
3696 
3697         /*
3698          * Then use the root fsid & fid it to find out if it's exported
3699          *
3700          * If the server root isn't exported directly, then
3701          * it should at least be a pseudo export based on
3702          * one or more exports further down in the server's
3703          * file tree.
3704          */
3705         exi = checkexport4(&ZONE_ROOTVP()->v_vfsp->vfs_fsid, &fid, NULL);
3706         if (exi == NULL || exi->exi_export.ex_flags & EX_PUBLIC) {
3707                 NFS4_DEBUG(rfs4_debug,
3708                     (CE_WARN, "rfs4_op_putrootfh: export check failure"));
3709                 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
3710                 goto out;
3711         }
3712 
3713         /*
3714          * Now make a filehandle based on the root
3715          * export and root vnode.
3716          */
3717         error = makefh4(&cs->fh, ZONE_ROOTVP(), exi);
3718         if (error != 0) {
3719                 *cs->statusp = resp->status = puterrno4(error);
3720                 goto out;
3721         }
3722 
3723         sav_exi = cs->exi;
3724         cs->exi = exi;
3725 
3726         VN_HOLD(ZONE_ROOTVP());
3727         cs->vp = ZONE_ROOTVP();
3728 
3729         if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) {
3730                 VN_RELE(cs->vp);
3731                 cs->vp = NULL;
3732                 cs->exi = sav_exi;
3733                 goto out;
3734         }
3735 
3736         *cs->statusp = resp->status = NFS4_OK;
3737         cs->deleg = FALSE;
3738 out:
3739         DTRACE_NFSV4_2(op__putrootfh__done, struct compound_state *, cs,
3740             PUTROOTFH4res *, resp);
3741 }
3742 
3743 /*
3744  * readlink: args: CURRENT_FH.
3745  *      res: status. If success - CURRENT_FH unchanged, return linktext.
3746  */
3747 
3748 /* ARGSUSED */
3749 static void
3750 rfs4_op_readlink(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3751     struct compound_state *cs)
3752 {
3753         READLINK4res *resp = &resop->nfs_resop4_u.opreadlink;
3754         int error;
3755         vnode_t *vp;
3756         struct iovec iov;
3757         struct vattr va;
3758         struct uio uio;
3759         char *data;
3760         struct sockaddr *ca;
3761         char *name = NULL;
3762         int is_referral;
3763 
3764         DTRACE_NFSV4_1(op__readlink__start, struct compound_state *, cs);
3765 
3766         /* CURRENT_FH: directory */
3767         vp = cs->vp;
3768         if (vp == NULL) {
3769                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3770                 goto out;
3771         }
3772 
3773         if (cs->access == CS_ACCESS_DENIED) {
3774                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
3775                 goto out;
3776         }
3777 
3778         /* Is it a referral? */
3779         if (vn_is_nfs_reparse(vp, cs->cr) && client_is_downrev(req)) {
3780 
3781                 is_referral = 1;
3782 
3783         } else {
3784 
3785                 is_referral = 0;
3786 
3787                 if (vp->v_type == VDIR) {
3788                         *cs->statusp = resp->status = NFS4ERR_ISDIR;
3789                         goto out;
3790                 }
3791 
3792                 if (vp->v_type != VLNK) {
3793                         *cs->statusp = resp->status = NFS4ERR_INVAL;
3794                         goto out;
3795                 }
3796 
3797         }
3798 
3799         va.va_mask = AT_MODE;
3800         error = VOP_GETATTR(vp, &va, 0, cs->cr, NULL);
3801         if (error) {
3802                 *cs->statusp = resp->status = puterrno4(error);
3803                 goto out;
3804         }
3805 
3806         if (MANDLOCK(vp, va.va_mode)) {
3807                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
3808                 goto out;
3809         }
3810 
3811         data = kmem_alloc(MAXPATHLEN + 1, KM_SLEEP);
3812 
3813         if (is_referral) {
3814                 char *s;
3815                 size_t strsz;
3816                 kstat_named_t *stat =
3817                     cs->exi->exi_ne->ne_globals->svstat[NFS_V4];
3818 
3819                 /* Get an artificial symlink based on a referral */
3820                 s = build_symlink(vp, cs->cr, &strsz);
3821                 stat[NFS_REFERLINKS].value.ui64++;
3822                 DTRACE_PROBE2(nfs4serv__func__referral__reflink,
3823                     vnode_t *, vp, char *, s);
3824                 if (s == NULL)
3825                         error = EINVAL;
3826                 else {
3827                         error = 0;
3828                         (void) strlcpy(data, s, MAXPATHLEN + 1);
3829                         kmem_free(s, strsz);
3830                 }
3831 
3832         } else {
3833 
3834                 iov.iov_base = data;
3835                 iov.iov_len = MAXPATHLEN;
3836                 uio.uio_iov = &iov;
3837                 uio.uio_iovcnt = 1;
3838                 uio.uio_segflg = UIO_SYSSPACE;
3839                 uio.uio_extflg = UIO_COPY_CACHED;
3840                 uio.uio_loffset = 0;
3841                 uio.uio_resid = MAXPATHLEN;
3842 
3843                 error = VOP_READLINK(vp, &uio, cs->cr, NULL);
3844 
3845                 if (!error)
3846                         *(data + MAXPATHLEN - uio.uio_resid) = '\0';
3847         }
3848 
3849         if (error) {
3850                 kmem_free((caddr_t)data, (uint_t)MAXPATHLEN + 1);
3851                 *cs->statusp = resp->status = puterrno4(error);
3852                 goto out;
3853         }
3854 
3855         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
3856         name = nfscmd_convname(ca, cs->exi, data, NFSCMD_CONV_OUTBOUND,
3857             MAXPATHLEN  + 1);
3858 
3859         if (name == NULL) {
3860                 /*
3861                  * Even though the conversion failed, we return
3862                  * something. We just don't translate it.
3863                  */
3864                 name = data;
3865         }
3866 
3867         /*
3868          * treat link name as data
3869          */
3870         (void) str_to_utf8(name, (utf8string *)&resp->link);
3871 
3872         if (name != data)
3873                 kmem_free(name, MAXPATHLEN + 1);
3874         kmem_free((caddr_t)data, (uint_t)MAXPATHLEN + 1);
3875         *cs->statusp = resp->status = NFS4_OK;
3876 
3877 out:
3878         DTRACE_NFSV4_2(op__readlink__done, struct compound_state *, cs,
3879             READLINK4res *, resp);
3880 }
3881 
3882 static void
3883 rfs4_op_readlink_free(nfs_resop4 *resop)
3884 {
3885         READLINK4res *resp = &resop->nfs_resop4_u.opreadlink;
3886         utf8string *symlink = (utf8string *)&resp->link;
3887 
3888         if (symlink->utf8string_val) {
3889                 UTF8STRING_FREE(*symlink)
3890         }
3891 }
3892 
3893 /*
3894  * release_lockowner:
3895  *      Release any state associated with the supplied
3896  *      lockowner. Note if any lo_state is holding locks we will not
3897  *      rele that lo_state and thus the lockowner will not be destroyed.
3898  *      A client using lock after the lock owner stateid has been released
3899  *      will suffer the consequence of NFS4ERR_BAD_STATEID and would have
3900  *      to reissue the lock with new_lock_owner set to TRUE.
3901  *      args: lock_owner
3902  *      res:  status
3903  */
3904 /* ARGSUSED */
3905 static void
3906 rfs4_op_release_lockowner(nfs_argop4 *argop, nfs_resop4 *resop,
3907     struct svc_req *req, struct compound_state *cs)
3908 {
3909         RELEASE_LOCKOWNER4args *ap = &argop->nfs_argop4_u.oprelease_lockowner;
3910         RELEASE_LOCKOWNER4res *resp = &resop->nfs_resop4_u.oprelease_lockowner;
3911         rfs4_lockowner_t *lo;
3912         rfs4_openowner_t *oo;
3913         rfs4_state_t *sp;
3914         rfs4_lo_state_t *lsp;
3915         rfs4_client_t *cp;
3916         bool_t create = FALSE;
3917         locklist_t *llist;
3918         sysid_t sysid;
3919 
3920         DTRACE_NFSV4_2(op__release__lockowner__start, struct compound_state *,
3921             cs, RELEASE_LOCKOWNER4args *, ap);
3922 
3923         /* Make sure there is a clientid around for this request */
3924         cp = rfs4_findclient_by_id(ap->lock_owner.clientid, FALSE);
3925 
3926         if (cp == NULL) {
3927                 *cs->statusp = resp->status =
3928                     rfs4_check_clientid(&ap->lock_owner.clientid, 0);
3929                 goto out;
3930         }
3931         rfs4_client_rele(cp);
3932 
3933         lo = rfs4_findlockowner(&ap->lock_owner, &create);
3934         if (lo == NULL) {
3935                 *cs->statusp = resp->status = NFS4_OK;
3936                 goto out;
3937         }
3938         ASSERT(lo->rl_client != NULL);
3939 
3940         /*
3941          * Check for EXPIRED client. If so will reap state with in a lease
3942          * period or on next set_clientid_confirm step
3943          */
3944         if (rfs4_lease_expired(lo->rl_client)) {
3945                 rfs4_lockowner_rele(lo);
3946                 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
3947                 goto out;
3948         }
3949 
3950         /*
3951          * If no sysid has been assigned, then no locks exist; just return.
3952          */
3953         rfs4_dbe_lock(lo->rl_client->rc_dbe);
3954         if (lo->rl_client->rc_sysidt == LM_NOSYSID) {
3955                 rfs4_lockowner_rele(lo);
3956                 rfs4_dbe_unlock(lo->rl_client->rc_dbe);
3957                 goto out;
3958         }
3959 
3960         sysid = lo->rl_client->rc_sysidt;
3961         rfs4_dbe_unlock(lo->rl_client->rc_dbe);
3962 
3963         /*
3964          * Mark the lockowner invalid.
3965          */
3966         rfs4_dbe_hide(lo->rl_dbe);
3967 
3968         /*
3969          * sysid-pid pair should now not be used since the lockowner is
3970          * invalid. If the client were to instantiate the lockowner again
3971          * it would be assigned a new pid. Thus we can get the list of
3972          * current locks.
3973          */
3974 
3975         llist = flk_get_active_locks(sysid, lo->rl_pid);
3976         /* If we are still holding locks fail */
3977         if (llist != NULL) {
3978 
3979                 *cs->statusp = resp->status = NFS4ERR_LOCKS_HELD;
3980 
3981                 flk_free_locklist(llist);
3982                 /*
3983                  * We need to unhide the lockowner so the client can
3984                  * try it again. The bad thing here is if the client
3985                  * has a logic error that took it here in the first place
3986                  * they probably have lost accounting of the locks that it
3987                  * is holding. So we may have dangling state until the
3988                  * open owner state is reaped via close. One scenario
3989                  * that could possibly occur is that the client has
3990                  * sent the unlock request(s) in separate threads
3991                  * and has not waited for the replies before sending the
3992                  * RELEASE_LOCKOWNER request. Presumably, it would expect
3993                  * and deal appropriately with NFS4ERR_LOCKS_HELD, by
3994                  * reissuing the request.
3995                  */
3996                 rfs4_dbe_unhide(lo->rl_dbe);
3997                 rfs4_lockowner_rele(lo);
3998                 goto out;
3999         }
4000 
4001         /*
4002          * For the corresponding client we need to check each open
4003          * owner for any opens that have lockowner state associated
4004          * with this lockowner.
4005          */
4006 
4007         rfs4_dbe_lock(lo->rl_client->rc_dbe);
4008         for (oo = list_head(&lo->rl_client->rc_openownerlist); oo != NULL;
4009             oo = list_next(&lo->rl_client->rc_openownerlist, oo)) {
4010 
4011                 rfs4_dbe_lock(oo->ro_dbe);
4012                 for (sp = list_head(&oo->ro_statelist); sp != NULL;
4013                     sp = list_next(&oo->ro_statelist, sp)) {
4014 
4015                         rfs4_dbe_lock(sp->rs_dbe);
4016                         for (lsp = list_head(&sp->rs_lostatelist);
4017                             lsp != NULL;
4018                             lsp = list_next(&sp->rs_lostatelist, lsp)) {
4019                                 if (lsp->rls_locker == lo) {
4020                                         rfs4_dbe_lock(lsp->rls_dbe);
4021                                         rfs4_dbe_invalidate(lsp->rls_dbe);
4022                                         rfs4_dbe_unlock(lsp->rls_dbe);
4023                                 }
4024                         }
4025                         rfs4_dbe_unlock(sp->rs_dbe);
4026                 }
4027                 rfs4_dbe_unlock(oo->ro_dbe);
4028         }
4029         rfs4_dbe_unlock(lo->rl_client->rc_dbe);
4030 
4031         rfs4_lockowner_rele(lo);
4032 
4033         *cs->statusp = resp->status = NFS4_OK;
4034 
4035 out:
4036         DTRACE_NFSV4_2(op__release__lockowner__done, struct compound_state *,
4037             cs, RELEASE_LOCKOWNER4res *, resp);
4038 }
4039 
4040 /*
4041  * short utility function to lookup a file and recall the delegation
4042  */
4043 static rfs4_file_t *
4044 rfs4_lookup_and_findfile(vnode_t *dvp, char *nm, vnode_t **vpp,
4045     int *lkup_error, cred_t *cr)
4046 {
4047         vnode_t *vp;
4048         rfs4_file_t *fp = NULL;
4049         bool_t fcreate = FALSE;
4050         int error;
4051 
4052         if (vpp)
4053                 *vpp = NULL;
4054 
4055         if ((error = VOP_LOOKUP(dvp, nm, &vp, NULL, 0, NULL, cr, NULL, NULL,
4056             NULL)) == 0) {
4057                 if (vp->v_type == VREG)
4058                         fp = rfs4_findfile(vp, NULL, &fcreate);
4059                 if (vpp)
4060                         *vpp = vp;
4061                 else
4062                         VN_RELE(vp);
4063         }
4064 
4065         if (lkup_error)
4066                 *lkup_error = error;
4067 
4068         return (fp);
4069 }
4070 
4071 /*
4072  * remove: args: CURRENT_FH: directory; name.
4073  *      res: status. If success - CURRENT_FH unchanged, return change_info
4074  *              for directory.
4075  */
4076 /* ARGSUSED */
4077 static void
4078 rfs4_op_remove(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4079     struct compound_state *cs)
4080 {
4081         REMOVE4args *args = &argop->nfs_argop4_u.opremove;
4082         REMOVE4res *resp = &resop->nfs_resop4_u.opremove;
4083         int error;
4084         vnode_t *dvp, *vp;
4085         struct vattr bdva, idva, adva;
4086         char *nm;
4087         uint_t len;
4088         rfs4_file_t *fp;
4089         int in_crit = 0;
4090         bslabel_t *clabel;
4091         struct sockaddr *ca;
4092         char *name = NULL;
4093         nfsstat4 status;
4094 
4095         DTRACE_NFSV4_2(op__remove__start, struct compound_state *, cs,
4096             REMOVE4args *, args);
4097 
4098         /* CURRENT_FH: directory */
4099         dvp = cs->vp;
4100         if (dvp == NULL) {
4101                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4102                 goto out;
4103         }
4104 
4105         if (cs->access == CS_ACCESS_DENIED) {
4106                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4107                 goto out;
4108         }
4109 
4110         /*
4111          * If there is an unshared filesystem mounted on this vnode,
4112          * Do not allow to remove anything in this directory.
4113          */
4114         if (vn_ismntpt(dvp)) {
4115                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4116                 goto out;
4117         }
4118 
4119         if (dvp->v_type != VDIR) {
4120                 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
4121                 goto out;
4122         }
4123 
4124         status = utf8_dir_verify(&args->target);
4125         if (status != NFS4_OK) {
4126                 *cs->statusp = resp->status = status;
4127                 goto out;
4128         }
4129 
4130         /*
4131          * Lookup the file so that we can check if it's a directory
4132          */
4133         nm = utf8_to_fn(&args->target, &len, NULL);
4134         if (nm == NULL) {
4135                 *cs->statusp = resp->status = NFS4ERR_INVAL;
4136                 goto out;
4137         }
4138 
4139         if (len > MAXNAMELEN) {
4140                 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
4141                 kmem_free(nm, len);
4142                 goto out;
4143         }
4144 
4145         if (rdonly4(req, cs)) {
4146                 *cs->statusp = resp->status = NFS4ERR_ROFS;
4147                 kmem_free(nm, len);
4148                 goto out;
4149         }
4150 
4151         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
4152         name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
4153             MAXPATHLEN  + 1);
4154 
4155         if (name == NULL) {
4156                 *cs->statusp = resp->status = NFS4ERR_INVAL;
4157                 kmem_free(nm, len);
4158                 goto out;
4159         }
4160 
4161         /*
4162          * Lookup the file to determine type and while we are see if
4163          * there is a file struct around and check for delegation.
4164          * We don't need to acquire va_seq before this lookup, if
4165          * it causes an update, cinfo.before will not match, which will
4166          * trigger a cache flush even if atomic is TRUE.
4167          */
4168         if (fp = rfs4_lookup_and_findfile(dvp, name, &vp, &error, cs->cr)) {
4169                 if (rfs4_check_delegated_byfp(FWRITE, fp, TRUE, TRUE, TRUE,
4170                     NULL)) {
4171                         VN_RELE(vp);
4172                         rfs4_file_rele(fp);
4173                         *cs->statusp = resp->status = NFS4ERR_DELAY;
4174                         if (nm != name)
4175                                 kmem_free(name, MAXPATHLEN + 1);
4176                         kmem_free(nm, len);
4177                         goto out;
4178                 }
4179         }
4180 
4181         /* Didn't find anything to remove */
4182         if (vp == NULL) {
4183                 *cs->statusp = resp->status = error;
4184                 if (nm != name)
4185                         kmem_free(name, MAXPATHLEN + 1);
4186                 kmem_free(nm, len);
4187                 goto out;
4188         }
4189 
4190         if (nbl_need_check(vp)) {
4191                 nbl_start_crit(vp, RW_READER);
4192                 in_crit = 1;
4193                 if (nbl_conflict(vp, NBL_REMOVE, 0, 0, 0, NULL)) {
4194                         *cs->statusp = resp->status = NFS4ERR_FILE_OPEN;
4195                         if (nm != name)
4196                                 kmem_free(name, MAXPATHLEN + 1);
4197                         kmem_free(nm, len);
4198                         nbl_end_crit(vp);
4199                         VN_RELE(vp);
4200                         if (fp) {
4201                                 rfs4_clear_dont_grant(fp);
4202                                 rfs4_file_rele(fp);
4203                         }
4204                         goto out;
4205                 }
4206         }
4207 
4208         /* check label before allowing removal */
4209         if (is_system_labeled()) {
4210                 ASSERT(req->rq_label != NULL);
4211                 clabel = req->rq_label;
4212                 DTRACE_PROBE2(tx__rfs4__log__info__opremove__clabel, char *,
4213                     "got client label from request(1)",
4214                     struct svc_req *, req);
4215                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
4216                         if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
4217                             cs->exi)) {
4218                                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4219                                 if (name != nm)
4220                                         kmem_free(name, MAXPATHLEN + 1);
4221                                 kmem_free(nm, len);
4222                                 if (in_crit)
4223                                         nbl_end_crit(vp);
4224                                 VN_RELE(vp);
4225                                 if (fp) {
4226                                         rfs4_clear_dont_grant(fp);
4227                                         rfs4_file_rele(fp);
4228                                 }
4229                                 goto out;
4230                         }
4231                 }
4232         }
4233 
4234         /* Get dir "before" change value */
4235         bdva.va_mask = AT_CTIME|AT_SEQ;
4236         error = VOP_GETATTR(dvp, &bdva, 0, cs->cr, NULL);
4237         if (error) {
4238                 *cs->statusp = resp->status = puterrno4(error);
4239                 if (nm != name)
4240                         kmem_free(name, MAXPATHLEN + 1);
4241                 kmem_free(nm, len);
4242                 if (in_crit)
4243                         nbl_end_crit(vp);
4244                 VN_RELE(vp);
4245                 if (fp) {
4246                         rfs4_clear_dont_grant(fp);
4247                         rfs4_file_rele(fp);
4248                 }
4249                 goto out;
4250         }
4251         NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bdva.va_ctime)
4252 
4253         /* Actually do the REMOVE operation */
4254         if (vp->v_type == VDIR) {
4255                 /*
4256                  * Can't remove a directory that has a mounted-on filesystem.
4257                  */
4258                 if (vn_ismntpt(vp)) {
4259                         error = EACCES;
4260                 } else {
4261                         /*
4262                          * System V defines rmdir to return EEXIST,
4263                          * not ENOTEMPTY, if the directory is not
4264                          * empty.  A System V NFS server needs to map
4265                          * NFS4ERR_EXIST to NFS4ERR_NOTEMPTY to
4266                          * transmit over the wire.
4267                          */
4268                         if ((error = VOP_RMDIR(dvp, name, ZONE_ROOTVP(), cs->cr,
4269                             NULL, 0)) == EEXIST)
4270                                 error = ENOTEMPTY;
4271                 }
4272         } else {
4273                 if ((error = VOP_REMOVE(dvp, name, cs->cr, NULL, 0)) == 0 &&
4274                     fp != NULL) {
4275                         struct vattr va;
4276                         vnode_t *tvp;
4277 
4278                         rfs4_dbe_lock(fp->rf_dbe);
4279                         tvp = fp->rf_vp;
4280                         if (tvp)
4281                                 VN_HOLD(tvp);
4282                         rfs4_dbe_unlock(fp->rf_dbe);
4283 
4284                         if (tvp) {
4285                                 /*
4286                                  * This is va_seq safe because we are not
4287                                  * manipulating dvp.
4288                                  */
4289                                 va.va_mask = AT_NLINK;
4290                                 if (!VOP_GETATTR(tvp, &va, 0, cs->cr, NULL) &&
4291                                     va.va_nlink == 0) {
4292                                         /* Remove state on file remove */
4293                                         if (in_crit) {
4294                                                 nbl_end_crit(vp);
4295                                                 in_crit = 0;
4296                                         }
4297                                         rfs4_close_all_state(fp);
4298                                 }
4299                                 VN_RELE(tvp);
4300                         }
4301                 }
4302         }
4303 
4304         if (in_crit)
4305                 nbl_end_crit(vp);
4306         VN_RELE(vp);
4307 
4308         if (fp) {
4309                 rfs4_clear_dont_grant(fp);
4310                 rfs4_file_rele(fp);
4311         }
4312         if (nm != name)
4313                 kmem_free(name, MAXPATHLEN + 1);
4314         kmem_free(nm, len);
4315 
4316         if (error) {
4317                 *cs->statusp = resp->status = puterrno4(error);
4318                 goto out;
4319         }
4320 
4321         /*
4322          * Get the initial "after" sequence number, if it fails, set to zero
4323          */
4324         idva.va_mask = AT_SEQ;
4325         if (VOP_GETATTR(dvp, &idva, 0, cs->cr, NULL))
4326                 idva.va_seq = 0;
4327 
4328         /*
4329          * Force modified data and metadata out to stable storage.
4330          */
4331         (void) VOP_FSYNC(dvp, 0, cs->cr, NULL);
4332 
4333         /*
4334          * Get "after" change value, if it fails, simply return the
4335          * before value.
4336          */
4337         adva.va_mask = AT_CTIME|AT_SEQ;
4338         if (VOP_GETATTR(dvp, &adva, 0, cs->cr, NULL)) {
4339                 adva.va_ctime = bdva.va_ctime;
4340                 adva.va_seq = 0;
4341         }
4342 
4343         NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, adva.va_ctime)
4344 
4345         /*
4346          * The cinfo.atomic = TRUE only if we have
4347          * non-zero va_seq's, and it has incremented by exactly one
4348          * during the VOP_REMOVE/RMDIR and it didn't change during
4349          * the VOP_FSYNC.
4350          */
4351         if (bdva.va_seq && idva.va_seq && adva.va_seq &&
4352             idva.va_seq == (bdva.va_seq + 1) && idva.va_seq == adva.va_seq)
4353                 resp->cinfo.atomic = TRUE;
4354         else
4355                 resp->cinfo.atomic = FALSE;
4356 
4357         *cs->statusp = resp->status = NFS4_OK;
4358 
4359 out:
4360         DTRACE_NFSV4_2(op__remove__done, struct compound_state *, cs,
4361             REMOVE4res *, resp);
4362 }
4363 
4364 /*
4365  * rename: args: SAVED_FH: from directory, CURRENT_FH: target directory,
4366  *              oldname and newname.
4367  *      res: status. If success - CURRENT_FH unchanged, return change_info
4368  *              for both from and target directories.
4369  */
4370 /* ARGSUSED */
4371 static void
4372 rfs4_op_rename(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4373     struct compound_state *cs)
4374 {
4375         RENAME4args *args = &argop->nfs_argop4_u.oprename;
4376         RENAME4res *resp = &resop->nfs_resop4_u.oprename;
4377         int error;
4378         vnode_t *odvp;
4379         vnode_t *ndvp;
4380         vnode_t *srcvp, *targvp, *tvp;
4381         struct vattr obdva, oidva, oadva;
4382         struct vattr nbdva, nidva, nadva;
4383         char *onm, *nnm;
4384         uint_t olen, nlen;
4385         rfs4_file_t *fp, *sfp;
4386         int in_crit_src, in_crit_targ;
4387         int fp_rele_grant_hold, sfp_rele_grant_hold;
4388         int unlinked;
4389         bslabel_t *clabel;
4390         struct sockaddr *ca;
4391         char *converted_onm = NULL;
4392         char *converted_nnm = NULL;
4393         nfsstat4 status;
4394 
4395         DTRACE_NFSV4_2(op__rename__start, struct compound_state *, cs,
4396             RENAME4args *, args);
4397 
4398         fp = sfp = NULL;
4399         srcvp = targvp = tvp = NULL;
4400         in_crit_src = in_crit_targ = 0;
4401         fp_rele_grant_hold = sfp_rele_grant_hold = 0;
4402         unlinked = 0;
4403 
4404         /* CURRENT_FH: target directory */
4405         ndvp = cs->vp;
4406         if (ndvp == NULL) {
4407                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4408                 goto out;
4409         }
4410 
4411         /* SAVED_FH: from directory */
4412         odvp = cs->saved_vp;
4413         if (odvp == NULL) {
4414                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4415                 goto out;
4416         }
4417 
4418         if (cs->access == CS_ACCESS_DENIED) {
4419                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4420                 goto out;
4421         }
4422 
4423         /*
4424          * If there is an unshared filesystem mounted on this vnode,
4425          * do not allow to rename objects in this directory.
4426          */
4427         if (vn_ismntpt(odvp)) {
4428                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4429                 goto out;
4430         }
4431 
4432         /*
4433          * If there is an unshared filesystem mounted on this vnode,
4434          * do not allow to rename to this directory.
4435          */
4436         if (vn_ismntpt(ndvp)) {
4437                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4438                 goto out;
4439         }
4440 
4441         if (odvp->v_type != VDIR || ndvp->v_type != VDIR) {
4442                 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
4443                 goto out;
4444         }
4445 
4446         if (cs->saved_exi != cs->exi) {
4447                 *cs->statusp = resp->status = NFS4ERR_XDEV;
4448                 goto out;
4449         }
4450 
4451         status = utf8_dir_verify(&args->oldname);
4452         if (status != NFS4_OK) {
4453                 *cs->statusp = resp->status = status;
4454                 goto out;
4455         }
4456 
4457         status = utf8_dir_verify(&args->newname);
4458         if (status != NFS4_OK) {
4459                 *cs->statusp = resp->status = status;
4460                 goto out;
4461         }
4462 
4463         onm = utf8_to_fn(&args->oldname, &olen, NULL);
4464         if (onm == NULL) {
4465                 *cs->statusp = resp->status = NFS4ERR_INVAL;
4466                 goto out;
4467         }
4468         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
4469         nlen = MAXPATHLEN + 1;
4470         converted_onm = nfscmd_convname(ca, cs->exi, onm, NFSCMD_CONV_INBOUND,
4471             nlen);
4472 
4473         if (converted_onm == NULL) {
4474                 *cs->statusp = resp->status = NFS4ERR_INVAL;
4475                 kmem_free(onm, olen);
4476                 goto out;
4477         }
4478 
4479         nnm = utf8_to_fn(&args->newname, &nlen, NULL);
4480         if (nnm == NULL) {
4481                 *cs->statusp = resp->status = NFS4ERR_INVAL;
4482                 if (onm != converted_onm)
4483                         kmem_free(converted_onm, MAXPATHLEN + 1);
4484                 kmem_free(onm, olen);
4485                 goto out;
4486         }
4487         converted_nnm = nfscmd_convname(ca, cs->exi, nnm, NFSCMD_CONV_INBOUND,
4488             MAXPATHLEN  + 1);
4489 
4490         if (converted_nnm == NULL) {
4491                 *cs->statusp = resp->status = NFS4ERR_INVAL;
4492                 kmem_free(nnm, nlen);
4493                 nnm = NULL;
4494                 if (onm != converted_onm)
4495                         kmem_free(converted_onm, MAXPATHLEN + 1);
4496                 kmem_free(onm, olen);
4497                 goto out;
4498         }
4499 
4500 
4501         if (olen > MAXNAMELEN || nlen > MAXNAMELEN) {
4502                 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
4503                 kmem_free(onm, olen);
4504                 kmem_free(nnm, nlen);
4505                 goto out;
4506         }
4507 
4508 
4509         if (rdonly4(req, cs)) {
4510                 *cs->statusp = resp->status = NFS4ERR_ROFS;
4511                 if (onm != converted_onm)
4512                         kmem_free(converted_onm, MAXPATHLEN + 1);
4513                 kmem_free(onm, olen);
4514                 if (nnm != converted_nnm)
4515                         kmem_free(converted_nnm, MAXPATHLEN + 1);
4516                 kmem_free(nnm, nlen);
4517                 goto out;
4518         }
4519 
4520         /* check label of the target dir */
4521         if (is_system_labeled()) {
4522                 ASSERT(req->rq_label != NULL);
4523                 clabel = req->rq_label;
4524                 DTRACE_PROBE2(tx__rfs4__log__info__oprename__clabel, char *,
4525                     "got client label from request(1)",
4526                     struct svc_req *, req);
4527                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
4528                         if (!do_rfs_label_check(clabel, ndvp,
4529                             EQUALITY_CHECK, cs->exi)) {
4530                                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4531                                 goto err_out;
4532                         }
4533                 }
4534         }
4535 
4536         /*
4537          * Is the source a file and have a delegation?
4538          * We don't need to acquire va_seq before these lookups, if
4539          * it causes an update, cinfo.before will not match, which will
4540          * trigger a cache flush even if atomic is TRUE.
4541          */
4542         if (sfp = rfs4_lookup_and_findfile(odvp, converted_onm, &srcvp,
4543             &error, cs->cr)) {
4544                 if (rfs4_check_delegated_byfp(FWRITE, sfp, TRUE, TRUE, TRUE,
4545                     NULL)) {
4546                         *cs->statusp = resp->status = NFS4ERR_DELAY;
4547                         goto err_out;
4548                 }
4549         }
4550 
4551         if (srcvp == NULL) {
4552                 *cs->statusp = resp->status = puterrno4(error);
4553                 if (onm != converted_onm)
4554                         kmem_free(converted_onm, MAXPATHLEN + 1);
4555                 kmem_free(onm, olen);
4556                 if (nnm != converted_nnm)
4557                         kmem_free(converted_nnm, MAXPATHLEN + 1);
4558                 kmem_free(nnm, nlen);
4559                 goto out;
4560         }
4561 
4562         sfp_rele_grant_hold = 1;
4563 
4564         /* Does the destination exist and a file and have a delegation? */
4565         if (fp = rfs4_lookup_and_findfile(ndvp, converted_nnm, &targvp,
4566             NULL, cs->cr)) {
4567                 if (rfs4_check_delegated_byfp(FWRITE, fp, TRUE, TRUE, TRUE,
4568                     NULL)) {
4569                         *cs->statusp = resp->status = NFS4ERR_DELAY;
4570                         goto err_out;
4571                 }
4572         }
4573         fp_rele_grant_hold = 1;
4574 
4575         /* Check for NBMAND lock on both source and target */
4576         if (nbl_need_check(srcvp)) {
4577                 nbl_start_crit(srcvp, RW_READER);
4578                 in_crit_src = 1;
4579                 if (nbl_conflict(srcvp, NBL_RENAME, 0, 0, 0, NULL)) {
4580                         *cs->statusp = resp->status = NFS4ERR_FILE_OPEN;
4581                         goto err_out;
4582                 }
4583         }
4584 
4585         if (targvp && nbl_need_check(targvp)) {
4586                 nbl_start_crit(targvp, RW_READER);
4587                 in_crit_targ = 1;
4588                 if (nbl_conflict(targvp, NBL_REMOVE, 0, 0, 0, NULL)) {
4589                         *cs->statusp = resp->status = NFS4ERR_FILE_OPEN;
4590                         goto err_out;
4591                 }
4592         }
4593 
4594         /* Get source "before" change value */
4595         obdva.va_mask = AT_CTIME|AT_SEQ;
4596         error = VOP_GETATTR(odvp, &obdva, 0, cs->cr, NULL);
4597         if (!error) {
4598                 nbdva.va_mask = AT_CTIME|AT_SEQ;
4599                 error = VOP_GETATTR(ndvp, &nbdva, 0, cs->cr, NULL);
4600         }
4601         if (error) {
4602                 *cs->statusp = resp->status = puterrno4(error);
4603                 goto err_out;
4604         }
4605 
4606         NFS4_SET_FATTR4_CHANGE(resp->source_cinfo.before, obdva.va_ctime)
4607         NFS4_SET_FATTR4_CHANGE(resp->target_cinfo.before, nbdva.va_ctime)
4608 
4609         error = VOP_RENAME(odvp, converted_onm, ndvp, converted_nnm, cs->cr,
4610             NULL, 0);
4611 
4612         /*
4613          * If target existed and was unlinked by VOP_RENAME, state will need
4614          * closed. To avoid deadlock, rfs4_close_all_state will be done after
4615          * any necessary nbl_end_crit on srcvp and tgtvp.
4616          */
4617         if (error == 0 && fp != NULL) {
4618                 rfs4_dbe_lock(fp->rf_dbe);
4619                 tvp = fp->rf_vp;
4620                 if (tvp)
4621                         VN_HOLD(tvp);
4622                 rfs4_dbe_unlock(fp->rf_dbe);
4623 
4624                 if (tvp) {
4625                         struct vattr va;
4626                         va.va_mask = AT_NLINK;
4627 
4628                         if (!VOP_GETATTR(tvp, &va, 0, cs->cr, NULL) &&
4629                             va.va_nlink == 0) {
4630                                 unlinked = 1;
4631 
4632                                 /* DEBUG data */
4633                                 if ((srcvp == targvp) || (tvp != targvp)) {
4634                                         cmn_err(CE_WARN, "rfs4_op_rename: "
4635                                             "srcvp %p, targvp: %p, tvp: %p",
4636                                             (void *)srcvp, (void *)targvp,
4637                                             (void *)tvp);
4638                                 }
4639                         } else {
4640                                 VN_RELE(tvp);
4641                         }
4642                 }
4643         }
4644         if (error == 0)
4645                 vn_renamepath(ndvp, srcvp, nnm, nlen - 1);
4646 
4647         if (in_crit_src)
4648                 nbl_end_crit(srcvp);
4649         if (srcvp)
4650                 VN_RELE(srcvp);
4651         if (in_crit_targ)
4652                 nbl_end_crit(targvp);
4653         if (targvp)
4654                 VN_RELE(targvp);
4655 
4656         if (unlinked) {
4657                 ASSERT(fp != NULL);
4658                 ASSERT(tvp != NULL);
4659 
4660                 /* DEBUG data */
4661                 if (RW_READ_HELD(&tvp->v_nbllock)) {
4662                         cmn_err(CE_WARN, "rfs4_op_rename: "
4663                             "RW_READ_HELD(%p)", (void *)tvp);
4664                 }
4665 
4666                 /* The file is gone and so should the state */
4667                 rfs4_close_all_state(fp);
4668                 VN_RELE(tvp);
4669         }
4670 
4671         if (sfp) {
4672                 rfs4_clear_dont_grant(sfp);
4673                 rfs4_file_rele(sfp);
4674         }
4675         if (fp) {
4676                 rfs4_clear_dont_grant(fp);
4677                 rfs4_file_rele(fp);
4678         }
4679 
4680         if (converted_onm != onm)
4681                 kmem_free(converted_onm, MAXPATHLEN + 1);
4682         kmem_free(onm, olen);
4683         if (converted_nnm != nnm)
4684                 kmem_free(converted_nnm, MAXPATHLEN + 1);
4685         kmem_free(nnm, nlen);
4686 
4687         /*
4688          * Get the initial "after" sequence number, if it fails, set to zero
4689          */
4690         oidva.va_mask = AT_SEQ;
4691         if (VOP_GETATTR(odvp, &oidva, 0, cs->cr, NULL))
4692                 oidva.va_seq = 0;
4693 
4694         nidva.va_mask = AT_SEQ;
4695         if (VOP_GETATTR(ndvp, &nidva, 0, cs->cr, NULL))
4696                 nidva.va_seq = 0;
4697 
4698         /*
4699          * Force modified data and metadata out to stable storage.
4700          */
4701         (void) VOP_FSYNC(odvp, 0, cs->cr, NULL);
4702         (void) VOP_FSYNC(ndvp, 0, cs->cr, NULL);
4703 
4704         if (error) {
4705                 *cs->statusp = resp->status = puterrno4(error);
4706                 goto out;
4707         }
4708 
4709         /*
4710          * Get "after" change values, if it fails, simply return the
4711          * before value.
4712          */
4713         oadva.va_mask = AT_CTIME|AT_SEQ;
4714         if (VOP_GETATTR(odvp, &oadva, 0, cs->cr, NULL)) {
4715                 oadva.va_ctime = obdva.va_ctime;
4716                 oadva.va_seq = 0;
4717         }
4718 
4719         nadva.va_mask = AT_CTIME|AT_SEQ;
4720         if (VOP_GETATTR(odvp, &nadva, 0, cs->cr, NULL)) {
4721                 nadva.va_ctime = nbdva.va_ctime;
4722                 nadva.va_seq = 0;
4723         }
4724 
4725         NFS4_SET_FATTR4_CHANGE(resp->source_cinfo.after, oadva.va_ctime)
4726         NFS4_SET_FATTR4_CHANGE(resp->target_cinfo.after, nadva.va_ctime)
4727 
4728         /*
4729          * The cinfo.atomic = TRUE only if we have
4730          * non-zero va_seq's, and it has incremented by exactly one
4731          * during the VOP_RENAME and it didn't change during the VOP_FSYNC.
4732          */
4733         if (obdva.va_seq && oidva.va_seq && oadva.va_seq &&
4734             oidva.va_seq == (obdva.va_seq + 1) && oidva.va_seq == oadva.va_seq)
4735                 resp->source_cinfo.atomic = TRUE;
4736         else
4737                 resp->source_cinfo.atomic = FALSE;
4738 
4739         if (nbdva.va_seq && nidva.va_seq && nadva.va_seq &&
4740             nidva.va_seq == (nbdva.va_seq + 1) && nidva.va_seq == nadva.va_seq)
4741                 resp->target_cinfo.atomic = TRUE;
4742         else
4743                 resp->target_cinfo.atomic = FALSE;
4744 
4745 #ifdef  VOLATILE_FH_TEST
4746         {
4747         extern void add_volrnm_fh(struct exportinfo *, vnode_t *);
4748 
4749         /*
4750          * Add the renamed file handle to the volatile rename list
4751          */
4752         if (cs->exi->exi_export.ex_flags & EX_VOLRNM) {
4753                 /* file handles may expire on rename */
4754                 vnode_t *vp;
4755 
4756                 nnm = utf8_to_fn(&args->newname, &nlen, NULL);
4757                 /*
4758                  * Already know that nnm will be a valid string
4759                  */
4760                 error = VOP_LOOKUP(ndvp, nnm, &vp, NULL, 0, NULL, cs->cr,
4761                     NULL, NULL, NULL);
4762                 kmem_free(nnm, nlen);
4763                 if (!error) {
4764                         add_volrnm_fh(cs->exi, vp);
4765                         VN_RELE(vp);
4766                 }
4767         }
4768         }
4769 #endif  /* VOLATILE_FH_TEST */
4770 
4771         *cs->statusp = resp->status = NFS4_OK;
4772 out:
4773         DTRACE_NFSV4_2(op__rename__done, struct compound_state *, cs,
4774             RENAME4res *, resp);
4775         return;
4776 
4777 err_out:
4778         if (onm != converted_onm)
4779                 kmem_free(converted_onm, MAXPATHLEN + 1);
4780         if (onm != NULL)
4781                 kmem_free(onm, olen);
4782         if (nnm != converted_nnm)
4783                 kmem_free(converted_nnm, MAXPATHLEN + 1);
4784         if (nnm != NULL)
4785                 kmem_free(nnm, nlen);
4786 
4787         if (in_crit_src) nbl_end_crit(srcvp);
4788         if (in_crit_targ) nbl_end_crit(targvp);
4789         if (targvp) VN_RELE(targvp);
4790         if (srcvp) VN_RELE(srcvp);
4791         if (sfp) {
4792                 if (sfp_rele_grant_hold) rfs4_clear_dont_grant(sfp);
4793                 rfs4_file_rele(sfp);
4794         }
4795         if (fp) {
4796                 if (fp_rele_grant_hold) rfs4_clear_dont_grant(fp);
4797                 rfs4_file_rele(fp);
4798         }
4799 
4800         DTRACE_NFSV4_2(op__rename__done, struct compound_state *, cs,
4801             RENAME4res *, resp);
4802 }
4803 
4804 /* ARGSUSED */
4805 static void
4806 rfs4_op_renew(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4807     struct compound_state *cs)
4808 {
4809         RENEW4args *args = &argop->nfs_argop4_u.oprenew;
4810         RENEW4res *resp = &resop->nfs_resop4_u.oprenew;
4811         rfs4_client_t *cp;
4812 
4813         DTRACE_NFSV4_2(op__renew__start, struct compound_state *, cs,
4814             RENEW4args *, args);
4815 
4816         if ((cp = rfs4_findclient_by_id(args->clientid, FALSE)) == NULL) {
4817                 *cs->statusp = resp->status =
4818                     rfs4_check_clientid(&args->clientid, 0);
4819                 goto out;
4820         }
4821 
4822         if (rfs4_lease_expired(cp)) {
4823                 rfs4_client_rele(cp);
4824                 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
4825                 goto out;
4826         }
4827 
4828         rfs4_update_lease(cp);
4829 
4830         mutex_enter(cp->rc_cbinfo.cb_lock);
4831         if (cp->rc_cbinfo.cb_notified_of_cb_path_down == FALSE) {
4832                 cp->rc_cbinfo.cb_notified_of_cb_path_down = TRUE;
4833                 *cs->statusp = resp->status = NFS4ERR_CB_PATH_DOWN;
4834         } else {
4835                 *cs->statusp = resp->status = NFS4_OK;
4836         }
4837         mutex_exit(cp->rc_cbinfo.cb_lock);
4838 
4839         rfs4_client_rele(cp);
4840 
4841 out:
4842         DTRACE_NFSV4_2(op__renew__done, struct compound_state *, cs,
4843             RENEW4res *, resp);
4844 }
4845 
4846 /* ARGSUSED */
4847 static void
4848 rfs4_op_restorefh(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req,
4849     struct compound_state *cs)
4850 {
4851         RESTOREFH4res *resp = &resop->nfs_resop4_u.oprestorefh;
4852 
4853         DTRACE_NFSV4_1(op__restorefh__start, struct compound_state *, cs);
4854 
4855         /* No need to check cs->access - we are not accessing any object */
4856         if ((cs->saved_vp == NULL) || (cs->saved_fh.nfs_fh4_val == NULL)) {
4857                 *cs->statusp = resp->status = NFS4ERR_RESTOREFH;
4858                 goto out;
4859         }
4860         if (cs->vp != NULL) {
4861                 VN_RELE(cs->vp);
4862         }
4863         cs->vp = cs->saved_vp;
4864         cs->saved_vp = NULL;
4865         cs->exi = cs->saved_exi;
4866         nfs_fh4_copy(&cs->saved_fh, &cs->fh);
4867         *cs->statusp = resp->status = NFS4_OK;
4868         cs->deleg = FALSE;
4869 
4870 out:
4871         DTRACE_NFSV4_2(op__restorefh__done, struct compound_state *, cs,
4872             RESTOREFH4res *, resp);
4873 }
4874 
4875 /* ARGSUSED */
4876 static void
4877 rfs4_op_savefh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4878     struct compound_state *cs)
4879 {
4880         SAVEFH4res *resp = &resop->nfs_resop4_u.opsavefh;
4881 
4882         DTRACE_NFSV4_1(op__savefh__start, struct compound_state *, cs);
4883 
4884         /* No need to check cs->access - we are not accessing any object */
4885         if (cs->vp == NULL) {
4886                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4887                 goto out;
4888         }
4889         if (cs->saved_vp != NULL) {
4890                 VN_RELE(cs->saved_vp);
4891         }
4892         cs->saved_vp = cs->vp;
4893         VN_HOLD(cs->saved_vp);
4894         cs->saved_exi = cs->exi;
4895         /*
4896          * since SAVEFH is fairly rare, don't alloc space for its fh
4897          * unless necessary.
4898          */
4899         if (cs->saved_fh.nfs_fh4_val == NULL) {
4900                 cs->saved_fh.nfs_fh4_val = kmem_alloc(NFS4_FHSIZE, KM_SLEEP);
4901         }
4902         nfs_fh4_copy(&cs->fh, &cs->saved_fh);
4903         *cs->statusp = resp->status = NFS4_OK;
4904 
4905 out:
4906         DTRACE_NFSV4_2(op__savefh__done, struct compound_state *, cs,
4907             SAVEFH4res *, resp);
4908 }
4909 
4910 /*
4911  * rfs4_verify_attr is called when nfsv4 Setattr failed, but we wish to
4912  * return the bitmap of attrs that were set successfully. It is also
4913  * called by Verify/Nverify to test the vattr/vfsstat attrs. It should
4914  * always be called only after rfs4_do_set_attrs().
4915  *
4916  * Verify that the attributes are same as the expected ones. sargp->vap
4917  * and sargp->sbp contain the input attributes as translated from fattr4.
4918  *
4919  * This function verifies only the attrs that correspond to a vattr or
4920  * vfsstat struct. That is because of the extra step needed to get the
4921  * corresponding system structs. Other attributes have already been set or
4922  * verified by do_rfs4_set_attrs.
4923  *
4924  * Return 0 if all attrs match, -1 if some don't, error if error processing.
4925  */
4926 static int
4927 rfs4_verify_attr(struct nfs4_svgetit_arg *sargp,
4928     bitmap4 *resp, struct nfs4_ntov_table *ntovp)
4929 {
4930         int error, ret_error = 0;
4931         int i, k;
4932         uint_t sva_mask = sargp->vap->va_mask;
4933         uint_t vbit;
4934         union nfs4_attr_u *na;
4935         uint8_t *amap;
4936         bool_t getsb = ntovp->vfsstat;
4937 
4938         if (sva_mask != 0) {
4939                 /*
4940                  * Okay to overwrite sargp->vap because we verify based
4941                  * on the incoming values.
4942                  */
4943                 ret_error = VOP_GETATTR(sargp->cs->vp, sargp->vap, 0,
4944                     sargp->cs->cr, NULL);
4945                 if (ret_error) {
4946                         if (resp == NULL)
4947                                 return (ret_error);
4948                         /*
4949                          * Must return bitmap of successful attrs
4950                          */
4951                         sva_mask = 0;   /* to prevent checking vap later */
4952                 } else {
4953                         /*
4954                          * Some file systems clobber va_mask. it is probably
4955                          * wrong of them to do so, nonethless we practice
4956                          * defensive coding.
4957                          * See bug id 4276830.
4958                          */
4959                         sargp->vap->va_mask = sva_mask;
4960                 }
4961         }
4962 
4963         if (getsb) {
4964                 /*
4965                  * Now get the superblock and loop on the bitmap, as there is
4966                  * no simple way of translating from superblock to bitmap4.
4967                  */
4968                 ret_error = VFS_STATVFS(sargp->cs->vp->v_vfsp, sargp->sbp);
4969                 if (ret_error) {
4970                         if (resp == NULL)
4971                                 goto errout;
4972                         getsb = FALSE;
4973                 }
4974         }
4975 
4976         /*
4977          * Now loop and verify each attribute which getattr returned
4978          * whether it's the same as the input.
4979          */
4980         if (resp == NULL && !getsb && (sva_mask == 0))
4981                 goto errout;
4982 
4983         na = ntovp->na;
4984         amap = ntovp->amap;
4985         k = 0;
4986         for (i = 0; i < ntovp->attrcnt; i++, na++, amap++) {
4987                 k = *amap;
4988                 ASSERT(nfs4_ntov_map[k].nval == k);
4989                 vbit = nfs4_ntov_map[k].vbit;
4990 
4991                 /*
4992                  * If vattr attribute but VOP_GETATTR failed, or it's
4993                  * superblock attribute but VFS_STATVFS failed, skip
4994                  */
4995                 if (vbit) {
4996                         if ((vbit & sva_mask) == 0)
4997                                 continue;
4998                 } else if (!(getsb && nfs4_ntov_map[k].vfsstat)) {
4999                         continue;
5000                 }
5001                 error = (*nfs4_ntov_map[k].sv_getit)(NFS4ATTR_VERIT, sargp, na);
5002                 if (resp != NULL) {
5003                         if (error)
5004                                 ret_error = -1; /* not all match */
5005                         else    /* update response bitmap */
5006                                 *resp |= nfs4_ntov_map[k].fbit;
5007                         continue;
5008                 }
5009                 if (error) {
5010                         ret_error = -1; /* not all match */
5011                         break;
5012                 }
5013         }
5014 errout:
5015         return (ret_error);
5016 }
5017 
5018 /*
5019  * Decode the attribute to be set/verified. If the attr requires a sys op
5020  * (VOP_GETATTR, VFS_VFSSTAT), and the request is to verify, then don't
5021  * call the sv_getit function for it, because the sys op hasn't yet been done.
5022  * Return 0 for success, error code if failed.
5023  *
5024  * Note: the decoded arg is not freed here but in nfs4_ntov_table_free.
5025  */
5026 static int
5027 decode_fattr4_attr(nfs4_attr_cmd_t cmd, struct nfs4_svgetit_arg *sargp,
5028     int k, XDR *xdrp, bitmap4 *resp_bval, union nfs4_attr_u *nap)
5029 {
5030         int error = 0;
5031         bool_t set_later;
5032 
5033         sargp->vap->va_mask |= nfs4_ntov_map[k].vbit;
5034 
5035         if ((*nfs4_ntov_map[k].xfunc)(xdrp, nap)) {
5036                 set_later = nfs4_ntov_map[k].vbit || nfs4_ntov_map[k].vfsstat;
5037                 /*
5038                  * don't verify yet if a vattr or sb dependent attr,
5039                  * because we don't have their sys values yet.
5040                  * Will be done later.
5041                  */
5042                 if (! (set_later && (cmd == NFS4ATTR_VERIT))) {
5043                         /*
5044                          * ACLs are a special case, since setting the MODE
5045                          * conflicts with setting the ACL.  We delay setting
5046                          * the ACL until all other attributes have been set.
5047                          * The ACL gets set in do_rfs4_op_setattr().
5048                          */
5049                         if (nfs4_ntov_map[k].fbit != FATTR4_ACL_MASK) {
5050                                 error = (*nfs4_ntov_map[k].sv_getit)(cmd,
5051                                     sargp, nap);
5052                                 if (error) {
5053                                         xdr_free(nfs4_ntov_map[k].xfunc,
5054                                             (caddr_t)nap);
5055                                 }
5056                         }
5057                 }
5058         } else {
5059 #ifdef  DEBUG
5060                 cmn_err(CE_NOTE, "decode_fattr4_attr: error "
5061                     "decoding attribute %d\n", k);
5062 #endif
5063                 error = EINVAL;
5064         }
5065         if (!error && resp_bval && !set_later) {
5066                 *resp_bval |= nfs4_ntov_map[k].fbit;
5067         }
5068 
5069         return (error);
5070 }
5071 
5072 /*
5073  * Set vattr based on incoming fattr4 attrs - used by setattr.
5074  * Set response mask. Ignore any values that are not writable vattr attrs.
5075  */
5076 static nfsstat4
5077 do_rfs4_set_attrs(bitmap4 *resp, fattr4 *fattrp, struct compound_state *cs,
5078     struct nfs4_svgetit_arg *sargp, struct nfs4_ntov_table *ntovp,
5079     nfs4_attr_cmd_t cmd)
5080 {
5081         int error = 0;
5082         int i;
5083         char *attrs = fattrp->attrlist4;
5084         uint32_t attrslen = fattrp->attrlist4_len;
5085         XDR xdr;
5086         nfsstat4 status = NFS4_OK;
5087         vnode_t *vp = cs->vp;
5088         union nfs4_attr_u *na;
5089         uint8_t *amap;
5090 
5091 #ifndef lint
5092         /*
5093          * Make sure that maximum attribute number can be expressed as an
5094          * 8 bit quantity.
5095          */
5096         ASSERT(NFS4_MAXNUM_ATTRS <= (UINT8_MAX + 1));
5097 #endif
5098 
5099         if (vp == NULL) {
5100                 if (resp)
5101                         *resp = 0;
5102                 return (NFS4ERR_NOFILEHANDLE);
5103         }
5104         if (cs->access == CS_ACCESS_DENIED) {
5105                 if (resp)
5106                         *resp = 0;
5107                 return (NFS4ERR_ACCESS);
5108         }
5109 
5110         sargp->op = cmd;
5111         sargp->cs = cs;
5112         sargp->flag = 0;     /* may be set later */
5113         sargp->vap->va_mask = 0;
5114         sargp->rdattr_error = NFS4_OK;
5115         sargp->rdattr_error_req = FALSE;
5116         /* sargp->sbp is set by the caller */
5117 
5118         xdrmem_create(&xdr, attrs, attrslen, XDR_DECODE);
5119 
5120         na = ntovp->na;
5121         amap = ntovp->amap;
5122 
5123         /*
5124          * The following loop iterates on the nfs4_ntov_map checking
5125          * if the fbit is set in the requested bitmap.
5126          * If set then we process the arguments using the
5127          * rfs4_fattr4 conversion functions to populate the setattr
5128          * vattr and va_mask. Any settable attrs that are not using vattr
5129          * will be set in this loop.
5130          */
5131         for (i = 0; i < nfs4_ntov_map_size; i++) {
5132                 if (!(fattrp->attrmask & nfs4_ntov_map[i].fbit)) {
5133                         continue;
5134                 }
5135                 /*
5136                  * If setattr, must be a writable attr.
5137                  * If verify/nverify, must be a readable attr.
5138                  */
5139                 if ((error = (*nfs4_ntov_map[i].sv_getit)(
5140                     NFS4ATTR_SUPPORTED, sargp, NULL)) != 0) {
5141                         /*
5142                          * Client tries to set/verify an
5143                          * unsupported attribute, tries to set
5144                          * a read only attr or verify a write
5145                          * only one - error!
5146                          */
5147                         break;
5148                 }
5149                 /*
5150                  * Decode the attribute to set/verify
5151                  */
5152                 error = decode_fattr4_attr(cmd, sargp, nfs4_ntov_map[i].nval,
5153                     &xdr, resp ? resp : NULL, na);
5154                 if (error)
5155                         break;
5156                 *amap++ = (uint8_t)nfs4_ntov_map[i].nval;
5157                 na++;
5158                 (ntovp->attrcnt)++;
5159                 if (nfs4_ntov_map[i].vfsstat)
5160                         ntovp->vfsstat = TRUE;
5161         }
5162 
5163         if (error != 0)
5164                 status = (error == ENOTSUP ? NFS4ERR_ATTRNOTSUPP :
5165                     puterrno4(error));
5166         /* xdrmem_destroy(&xdrs); */        /* NO-OP */
5167         return (status);
5168 }
5169 
5170 static nfsstat4
5171 do_rfs4_op_setattr(bitmap4 *resp, fattr4 *fattrp, struct compound_state *cs,
5172     stateid4 *stateid)
5173 {
5174         int error = 0;
5175         struct nfs4_svgetit_arg sarg;
5176         bool_t trunc;
5177 
5178         nfsstat4 status = NFS4_OK;
5179         cred_t *cr = cs->cr;
5180         vnode_t *vp = cs->vp;
5181         struct nfs4_ntov_table ntov;
5182         struct statvfs64 sb;
5183         struct vattr bva;
5184         struct flock64 bf;
5185         int in_crit = 0;
5186         uint_t saved_mask = 0;
5187         caller_context_t ct;
5188 
5189         *resp = 0;
5190         sarg.sbp = &sb;
5191         sarg.is_referral = B_FALSE;
5192         nfs4_ntov_table_init(&ntov);
5193         status = do_rfs4_set_attrs(resp, fattrp, cs, &sarg, &ntov,
5194             NFS4ATTR_SETIT);
5195         if (status != NFS4_OK) {
5196                 /*
5197                  * failed set attrs
5198                  */
5199                 goto done;
5200         }
5201         if ((sarg.vap->va_mask == 0) &&
5202             (! (fattrp->attrmask & FATTR4_ACL_MASK))) {
5203                 /*
5204                  * no further work to be done
5205                  */
5206                 goto done;
5207         }
5208 
5209         /*
5210          * If we got a request to set the ACL and the MODE, only
5211          * allow changing VSUID, VSGID, and VSVTX.  Attempting
5212          * to change any other bits, along with setting an ACL,
5213          * gives NFS4ERR_INVAL.
5214          */
5215         if ((fattrp->attrmask & FATTR4_ACL_MASK) &&
5216             (fattrp->attrmask & FATTR4_MODE_MASK)) {
5217                 vattr_t va;
5218 
5219                 va.va_mask = AT_MODE;
5220                 error = VOP_GETATTR(vp, &va, 0, cs->cr, NULL);
5221                 if (error) {
5222                         status = puterrno4(error);
5223                         goto done;
5224                 }
5225                 if ((sarg.vap->va_mode ^ va.va_mode) &
5226                     ~(VSUID | VSGID | VSVTX)) {
5227                         status = NFS4ERR_INVAL;
5228                         goto done;
5229                 }
5230         }
5231 
5232         /* Check stateid only if size has been set */
5233         if (sarg.vap->va_mask & AT_SIZE) {
5234                 trunc = (sarg.vap->va_size == 0);
5235                 status = rfs4_check_stateid(FWRITE, cs->vp, stateid,
5236                     trunc, &cs->deleg, sarg.vap->va_mask & AT_SIZE, &ct);
5237                 if (status != NFS4_OK)
5238                         goto done;
5239         } else {
5240                 ct.cc_sysid = 0;
5241                 ct.cc_pid = 0;
5242                 ct.cc_caller_id = nfs4_srv_caller_id;
5243                 ct.cc_flags = CC_DONTBLOCK;
5244         }
5245 
5246         /* XXX start of possible race with delegations */
5247 
5248         /*
5249          * We need to specially handle size changes because it is
5250          * possible for the client to create a file with read-only
5251          * modes, but with the file opened for writing. If the client
5252          * then tries to set the file size, e.g. ftruncate(3C),
5253          * fcntl(F_FREESP), the normal access checking done in
5254          * VOP_SETATTR would prevent the client from doing it even though
5255          * it should be allowed to do so.  To get around this, we do the
5256          * access checking for ourselves and use VOP_SPACE which doesn't
5257          * do the access checking.
5258          * Also the client should not be allowed to change the file
5259          * size if there is a conflicting non-blocking mandatory lock in
5260          * the region of the change.
5261          */
5262         if (vp->v_type == VREG && (sarg.vap->va_mask & AT_SIZE)) {
5263                 u_offset_t offset;
5264                 ssize_t length;
5265 
5266                 /*
5267                  * ufs_setattr clears AT_SIZE from vap->va_mask, but
5268                  * before returning, sarg.vap->va_mask is used to
5269                  * generate the setattr reply bitmap.  We also clear
5270                  * AT_SIZE below before calling VOP_SPACE.  For both
5271                  * of these cases, the va_mask needs to be saved here
5272                  * and restored after calling VOP_SETATTR.
5273                  */
5274                 saved_mask = sarg.vap->va_mask;
5275 
5276                 /*
5277                  * Check any possible conflict due to NBMAND locks.
5278                  * Get into critical region before VOP_GETATTR, so the
5279                  * size attribute is valid when checking conflicts.
5280                  */
5281                 if (nbl_need_check(vp)) {
5282                         nbl_start_crit(vp, RW_READER);
5283                         in_crit = 1;
5284                 }
5285 
5286                 bva.va_mask = AT_UID|AT_SIZE;
5287                 if (error = VOP_GETATTR(vp, &bva, 0, cr, &ct)) {
5288                         status = puterrno4(error);
5289                         goto done;
5290                 }
5291 
5292                 if (in_crit) {
5293                         if (sarg.vap->va_size < bva.va_size) {
5294                                 offset = sarg.vap->va_size;
5295                                 length = bva.va_size - sarg.vap->va_size;
5296                         } else {
5297                                 offset = bva.va_size;
5298                                 length = sarg.vap->va_size - bva.va_size;
5299                         }
5300                         if (nbl_conflict(vp, NBL_WRITE, offset, length, 0,
5301                             &ct)) {
5302                                 status = NFS4ERR_LOCKED;
5303                                 goto done;
5304                         }
5305                 }
5306 
5307                 if (crgetuid(cr) == bva.va_uid) {
5308                         sarg.vap->va_mask &= ~AT_SIZE;
5309                         bf.l_type = F_WRLCK;
5310                         bf.l_whence = 0;
5311                         bf.l_start = (off64_t)sarg.vap->va_size;
5312                         bf.l_len = 0;
5313                         bf.l_sysid = 0;
5314                         bf.l_pid = 0;
5315                         error = VOP_SPACE(vp, F_FREESP, &bf, FWRITE,
5316                             (offset_t)sarg.vap->va_size, cr, &ct);
5317                 }
5318         }
5319 
5320         if (!error && sarg.vap->va_mask != 0)
5321                 error = VOP_SETATTR(vp, sarg.vap, sarg.flag, cr, &ct);
5322 
5323         /* restore va_mask -- ufs_setattr clears AT_SIZE */
5324         if (saved_mask & AT_SIZE)
5325                 sarg.vap->va_mask |= AT_SIZE;
5326 
5327         /*
5328          * If an ACL was being set, it has been delayed until now,
5329          * in order to set the mode (via the VOP_SETATTR() above) first.
5330          */
5331         if ((! error) && (fattrp->attrmask & FATTR4_ACL_MASK)) {
5332                 int i;
5333 
5334                 for (i = 0; i < NFS4_MAXNUM_ATTRS; i++)
5335                         if (ntov.amap[i] == FATTR4_ACL)
5336                                 break;
5337                 if (i < NFS4_MAXNUM_ATTRS) {
5338                         error = (*nfs4_ntov_map[FATTR4_ACL].sv_getit)(
5339                             NFS4ATTR_SETIT, &sarg, &ntov.na[i]);
5340                         if (error == 0) {
5341                                 *resp |= FATTR4_ACL_MASK;
5342                         } else if (error == ENOTSUP) {
5343                                 (void) rfs4_verify_attr(&sarg, resp, &ntov);
5344                                 status = NFS4ERR_ATTRNOTSUPP;
5345                                 goto done;
5346                         }
5347                 } else {
5348                         NFS4_DEBUG(rfs4_debug,
5349                             (CE_NOTE, "do_rfs4_op_setattr: "
5350                             "unable to find ACL in fattr4"));
5351                         error = EINVAL;
5352                 }
5353         }
5354 
5355         if (error) {
5356                 /* check if a monitor detected a delegation conflict */
5357                 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK))
5358                         status = NFS4ERR_DELAY;
5359                 else
5360                         status = puterrno4(error);
5361 
5362                 /*
5363                  * Set the response bitmap when setattr failed.
5364                  * If VOP_SETATTR partially succeeded, test by doing a
5365                  * VOP_GETATTR on the object and comparing the data
5366                  * to the setattr arguments.
5367                  */
5368                 (void) rfs4_verify_attr(&sarg, resp, &ntov);
5369         } else {
5370                 /*
5371                  * Force modified metadata out to stable storage.
5372                  */
5373                 (void) VOP_FSYNC(vp, FNODSYNC, cr, &ct);
5374                 /*
5375                  * Set response bitmap
5376                  */
5377                 nfs4_vmask_to_nmask_set(sarg.vap->va_mask, resp);
5378         }
5379 
5380 /* Return early and already have a NFSv4 error */
5381 done:
5382         /*
5383          * Except for nfs4_vmask_to_nmask_set(), vattr --> fattr
5384          * conversion sets both readable and writeable NFS4 attrs
5385          * for AT_MTIME and AT_ATIME.  The line below masks out
5386          * unrequested attrs from the setattr result bitmap.  This
5387          * is placed after the done: label to catch the ATTRNOTSUP
5388          * case.
5389          */
5390         *resp &= fattrp->attrmask;
5391 
5392         if (in_crit)
5393                 nbl_end_crit(vp);
5394 
5395         nfs4_ntov_table_free(&ntov, &sarg);
5396 
5397         return (status);
5398 }
5399 
5400 /* ARGSUSED */
5401 static void
5402 rfs4_op_setattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5403     struct compound_state *cs)
5404 {
5405         SETATTR4args *args = &argop->nfs_argop4_u.opsetattr;
5406         SETATTR4res *resp = &resop->nfs_resop4_u.opsetattr;
5407         bslabel_t *clabel;
5408 
5409         DTRACE_NFSV4_2(op__setattr__start, struct compound_state *, cs,
5410             SETATTR4args *, args);
5411 
5412         if (cs->vp == NULL) {
5413                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5414                 goto out;
5415         }
5416 
5417         /*
5418          * If there is an unshared filesystem mounted on this vnode,
5419          * do not allow to setattr on this vnode.
5420          */
5421         if (vn_ismntpt(cs->vp)) {
5422                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
5423                 goto out;
5424         }
5425 
5426         resp->attrsset = 0;
5427 
5428         if (rdonly4(req, cs)) {
5429                 *cs->statusp = resp->status = NFS4ERR_ROFS;
5430                 goto out;
5431         }
5432 
5433         /* check label before setting attributes */
5434         if (is_system_labeled()) {
5435                 ASSERT(req->rq_label != NULL);
5436                 clabel = req->rq_label;
5437                 DTRACE_PROBE2(tx__rfs4__log__info__opsetattr__clabel, char *,
5438                     "got client label from request(1)",
5439                     struct svc_req *, req);
5440                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
5441                         if (!do_rfs_label_check(clabel, cs->vp,
5442                             EQUALITY_CHECK, cs->exi)) {
5443                                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
5444                                 goto out;
5445                         }
5446                 }
5447         }
5448 
5449         *cs->statusp = resp->status =
5450             do_rfs4_op_setattr(&resp->attrsset, &args->obj_attributes, cs,
5451             &args->stateid);
5452 
5453 out:
5454         DTRACE_NFSV4_2(op__setattr__done, struct compound_state *, cs,
5455             SETATTR4res *, resp);
5456 }
5457 
5458 /* ARGSUSED */
5459 static void
5460 rfs4_op_verify(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5461     struct compound_state *cs)
5462 {
5463         /*
5464          * verify and nverify are exactly the same, except that nverify
5465          * succeeds when some argument changed, and verify succeeds when
5466          * when none changed.
5467          */
5468 
5469         VERIFY4args  *args = &argop->nfs_argop4_u.opverify;
5470         VERIFY4res *resp = &resop->nfs_resop4_u.opverify;
5471 
5472         int error;
5473         struct nfs4_svgetit_arg sarg;
5474         struct statvfs64 sb;
5475         struct nfs4_ntov_table ntov;
5476 
5477         DTRACE_NFSV4_2(op__verify__start, struct compound_state *, cs,
5478             VERIFY4args *, args);
5479 
5480         if (cs->vp == NULL) {
5481                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5482                 goto out;
5483         }
5484 
5485         sarg.sbp = &sb;
5486         sarg.is_referral = B_FALSE;
5487         nfs4_ntov_table_init(&ntov);
5488         resp->status = do_rfs4_set_attrs(NULL, &args->obj_attributes, cs,
5489             &sarg, &ntov, NFS4ATTR_VERIT);
5490         if (resp->status != NFS4_OK) {
5491                 /*
5492                  * do_rfs4_set_attrs will try to verify systemwide attrs,
5493                  * so could return -1 for "no match".
5494                  */
5495                 if (resp->status == -1)
5496                         resp->status = NFS4ERR_NOT_SAME;
5497                 goto done;
5498         }
5499         error = rfs4_verify_attr(&sarg, NULL, &ntov);
5500         switch (error) {
5501         case 0:
5502                 resp->status = NFS4_OK;
5503                 break;
5504         case -1:
5505                 resp->status = NFS4ERR_NOT_SAME;
5506                 break;
5507         default:
5508                 resp->status = puterrno4(error);
5509                 break;
5510         }
5511 done:
5512         *cs->statusp = resp->status;
5513         nfs4_ntov_table_free(&ntov, &sarg);
5514 out:
5515         DTRACE_NFSV4_2(op__verify__done, struct compound_state *, cs,
5516             VERIFY4res *, resp);
5517 }
5518 
5519 /* ARGSUSED */
5520 static void
5521 rfs4_op_nverify(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5522     struct compound_state *cs)
5523 {
5524         /*
5525          * verify and nverify are exactly the same, except that nverify
5526          * succeeds when some argument changed, and verify succeeds when
5527          * when none changed.
5528          */
5529 
5530         NVERIFY4args  *args = &argop->nfs_argop4_u.opnverify;
5531         NVERIFY4res *resp = &resop->nfs_resop4_u.opnverify;
5532 
5533         int error;
5534         struct nfs4_svgetit_arg sarg;
5535         struct statvfs64 sb;
5536         struct nfs4_ntov_table ntov;
5537 
5538         DTRACE_NFSV4_2(op__nverify__start, struct compound_state *, cs,
5539             NVERIFY4args *, args);
5540 
5541         if (cs->vp == NULL) {
5542                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5543                 DTRACE_NFSV4_2(op__nverify__done, struct compound_state *, cs,
5544                     NVERIFY4res *, resp);
5545                 return;
5546         }
5547         sarg.sbp = &sb;
5548         sarg.is_referral = B_FALSE;
5549         nfs4_ntov_table_init(&ntov);
5550         resp->status = do_rfs4_set_attrs(NULL, &args->obj_attributes, cs,
5551             &sarg, &ntov, NFS4ATTR_VERIT);
5552         if (resp->status != NFS4_OK) {
5553                 /*
5554                  * do_rfs4_set_attrs will try to verify systemwide attrs,
5555                  * so could return -1 for "no match".
5556                  */
5557                 if (resp->status == -1)
5558                         resp->status = NFS4_OK;
5559                 goto done;
5560         }
5561         error = rfs4_verify_attr(&sarg, NULL, &ntov);
5562         switch (error) {
5563         case 0:
5564                 resp->status = NFS4ERR_SAME;
5565                 break;
5566         case -1:
5567                 resp->status = NFS4_OK;
5568                 break;
5569         default:
5570                 resp->status = puterrno4(error);
5571                 break;
5572         }
5573 done:
5574         *cs->statusp = resp->status;
5575         nfs4_ntov_table_free(&ntov, &sarg);
5576 
5577         DTRACE_NFSV4_2(op__nverify__done, struct compound_state *, cs,
5578             NVERIFY4res *, resp);
5579 }
5580 
5581 /*
5582  * XXX - This should live in an NFS header file.
5583  */
5584 #define MAX_IOVECS      12
5585 
5586 /* ARGSUSED */
5587 static void
5588 rfs4_op_write(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5589     struct compound_state *cs)
5590 {
5591         WRITE4args *args = &argop->nfs_argop4_u.opwrite;
5592         WRITE4res *resp = &resop->nfs_resop4_u.opwrite;
5593         int error;
5594         vnode_t *vp;
5595         struct vattr bva;
5596         u_offset_t rlimit;
5597         struct uio uio;
5598         struct iovec iov[MAX_IOVECS];
5599         struct iovec *iovp;
5600         int iovcnt;
5601         int ioflag;
5602         cred_t *savecred, *cr;
5603         bool_t *deleg = &cs->deleg;
5604         nfsstat4 stat;
5605         int in_crit = 0;
5606         caller_context_t ct;
5607         nfs4_srv_t *nsrv4;
5608 
5609         DTRACE_NFSV4_2(op__write__start, struct compound_state *, cs,
5610             WRITE4args *, args);
5611 
5612         vp = cs->vp;
5613         if (vp == NULL) {
5614                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5615                 goto out;
5616         }
5617         if (cs->access == CS_ACCESS_DENIED) {
5618                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
5619                 goto out;
5620         }
5621 
5622         cr = cs->cr;
5623 
5624         if ((stat = rfs4_check_stateid(FWRITE, vp, &args->stateid, FALSE,
5625             deleg, TRUE, &ct)) != NFS4_OK) {
5626                 *cs->statusp = resp->status = stat;
5627                 goto out;
5628         }
5629 
5630         /*
5631          * We have to enter the critical region before calling VOP_RWLOCK
5632          * to avoid a deadlock with ufs.
5633          */
5634         if (nbl_need_check(vp)) {
5635                 nbl_start_crit(vp, RW_READER);
5636                 in_crit = 1;
5637                 if (nbl_conflict(vp, NBL_WRITE,
5638                     args->offset, args->data_len, 0, &ct)) {
5639                         *cs->statusp = resp->status = NFS4ERR_LOCKED;
5640                         goto out;
5641                 }
5642         }
5643 
5644         bva.va_mask = AT_MODE | AT_UID;
5645         error = VOP_GETATTR(vp, &bva, 0, cr, &ct);
5646 
5647         /*
5648          * If we can't get the attributes, then we can't do the
5649          * right access checking.  So, we'll fail the request.
5650          */
5651         if (error) {
5652                 *cs->statusp = resp->status = puterrno4(error);
5653                 goto out;
5654         }
5655 
5656         if (rdonly4(req, cs)) {
5657                 *cs->statusp = resp->status = NFS4ERR_ROFS;
5658                 goto out;
5659         }
5660 
5661         if (vp->v_type != VREG) {
5662                 *cs->statusp = resp->status =
5663                     ((vp->v_type == VDIR) ? NFS4ERR_ISDIR : NFS4ERR_INVAL);
5664                 goto out;
5665         }
5666 
5667         if (crgetuid(cr) != bva.va_uid &&
5668             (error = VOP_ACCESS(vp, VWRITE, 0, cr, &ct))) {
5669                 *cs->statusp = resp->status = puterrno4(error);
5670                 goto out;
5671         }
5672 
5673         if (MANDLOCK(vp, bva.va_mode)) {
5674                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
5675                 goto out;
5676         }
5677 
5678         nsrv4 = nfs4_get_srv();
5679         if (args->data_len == 0) {
5680                 *cs->statusp = resp->status = NFS4_OK;
5681                 resp->count = 0;
5682                 resp->committed = args->stable;
5683                 resp->writeverf = nsrv4->write4verf;
5684                 goto out;
5685         }
5686 
5687         if (args->mblk != NULL) {
5688                 mblk_t *m;
5689                 uint_t bytes, round_len;
5690 
5691                 iovcnt = 0;
5692                 bytes = 0;
5693                 round_len = roundup(args->data_len, BYTES_PER_XDR_UNIT);
5694                 for (m = args->mblk;
5695                     m != NULL && bytes < round_len;
5696                     m = m->b_cont) {
5697                         iovcnt++;
5698                         bytes += MBLKL(m);
5699                 }
5700 #ifdef DEBUG
5701                 /* should have ended on an mblk boundary */
5702                 if (bytes != round_len) {
5703                         printf("bytes=0x%x, round_len=0x%x, req len=0x%x\n",
5704                             bytes, round_len, args->data_len);
5705                         printf("args=%p, args->mblk=%p, m=%p", (void *)args,
5706                             (void *)args->mblk, (void *)m);
5707                         ASSERT(bytes == round_len);
5708                 }
5709 #endif
5710                 if (iovcnt <= MAX_IOVECS) {
5711                         iovp = iov;
5712                 } else {
5713                         iovp = kmem_alloc(sizeof (*iovp) * iovcnt, KM_SLEEP);
5714                 }
5715                 mblk_to_iov(args->mblk, iovcnt, iovp);
5716         } else if (args->rlist != NULL) {
5717                 iovcnt = 1;
5718                 iovp = iov;
5719                 iovp->iov_base = (char *)((args->rlist)->u.c_daddr3);
5720                 iovp->iov_len = args->data_len;
5721         } else {
5722                 iovcnt = 1;
5723                 iovp = iov;
5724                 iovp->iov_base = args->data_val;
5725                 iovp->iov_len = args->data_len;
5726         }
5727 
5728         uio.uio_iov = iovp;
5729         uio.uio_iovcnt = iovcnt;
5730 
5731         uio.uio_segflg = UIO_SYSSPACE;
5732         uio.uio_extflg = UIO_COPY_DEFAULT;
5733         uio.uio_loffset = args->offset;
5734         uio.uio_resid = args->data_len;
5735         uio.uio_llimit = curproc->p_fsz_ctl;
5736         rlimit = uio.uio_llimit - args->offset;
5737         if (rlimit < (u_offset_t)uio.uio_resid)
5738                 uio.uio_resid = (int)rlimit;
5739 
5740         if (args->stable == UNSTABLE4)
5741                 ioflag = 0;
5742         else if (args->stable == FILE_SYNC4)
5743                 ioflag = FSYNC;
5744         else if (args->stable == DATA_SYNC4)
5745                 ioflag = FDSYNC;
5746         else {
5747                 if (iovp != iov)
5748                         kmem_free(iovp, sizeof (*iovp) * iovcnt);
5749                 *cs->statusp = resp->status = NFS4ERR_INVAL;
5750                 goto out;
5751         }
5752 
5753         /*
5754          * We're changing creds because VM may fault and we need
5755          * the cred of the current thread to be used if quota
5756          * checking is enabled.
5757          */
5758         savecred = curthread->t_cred;
5759         curthread->t_cred = cr;
5760         error = do_io(FWRITE, vp, &uio, ioflag, cr, &ct);
5761         curthread->t_cred = savecred;
5762 
5763         if (iovp != iov)
5764                 kmem_free(iovp, sizeof (*iovp) * iovcnt);
5765 
5766         if (error) {
5767                 *cs->statusp = resp->status = puterrno4(error);
5768                 goto out;
5769         }
5770 
5771         *cs->statusp = resp->status = NFS4_OK;
5772         resp->count = args->data_len - uio.uio_resid;
5773 
5774         if (ioflag == 0)
5775                 resp->committed = UNSTABLE4;
5776         else
5777                 resp->committed = FILE_SYNC4;
5778 
5779         resp->writeverf = nsrv4->write4verf;
5780 
5781 out:
5782         if (in_crit)
5783                 nbl_end_crit(vp);
5784 
5785         DTRACE_NFSV4_2(op__write__done, struct compound_state *, cs,
5786             WRITE4res *, resp);
5787 }
5788 
5789 
5790 /* XXX put in a header file */
5791 extern int      sec_svc_getcred(struct svc_req *, cred_t *,  caddr_t *, int *);
5792 
5793 void
5794 rfs4_compound(COMPOUND4args *args, COMPOUND4res *resp, struct exportinfo *exi,
5795     struct svc_req *req, cred_t *cr, int *rv)
5796 {
5797         uint_t i;
5798         struct compound_state cs;
5799         nfs4_srv_t *nsrv4;
5800         nfs_export_t *ne = nfs_get_export();
5801 
5802         if (rv != NULL)
5803                 *rv = 0;
5804         rfs4_init_compound_state(&cs);
5805         /*
5806          * Form a reply tag by copying over the request tag.
5807          */
5808         resp->tag.utf8string_len = args->tag.utf8string_len;
5809         if (args->tag.utf8string_len != 0) {
5810                 resp->tag.utf8string_val =
5811                     kmem_alloc(args->tag.utf8string_len, KM_SLEEP);
5812                 bcopy(args->tag.utf8string_val, resp->tag.utf8string_val,
5813                     resp->tag.utf8string_len);
5814         } else {
5815                 resp->tag.utf8string_val = NULL;
5816         }
5817 
5818         cs.statusp = &resp->status;
5819         cs.req = req;
5820         resp->array = NULL;
5821         resp->array_len = 0;
5822 
5823         /*
5824          * XXX for now, minorversion should be zero
5825          */
5826         if (args->minorversion != NFS4_MINORVERSION) {
5827                 DTRACE_NFSV4_2(compound__start, struct compound_state *,
5828                     &cs, COMPOUND4args *, args);
5829                 resp->status = NFS4ERR_MINOR_VERS_MISMATCH;
5830                 DTRACE_NFSV4_2(compound__done, struct compound_state *,
5831                     &cs, COMPOUND4res *, resp);
5832                 return;
5833         }
5834 
5835         if (args->array_len == 0) {
5836                 resp->status = NFS4_OK;
5837                 return;
5838         }
5839 
5840         ASSERT(exi == NULL);
5841         ASSERT(cr == NULL);
5842 
5843         cr = crget();
5844         ASSERT(cr != NULL);
5845 
5846         if (sec_svc_getcred(req, cr, &cs.principal, &cs.nfsflavor) == 0) {
5847                 DTRACE_NFSV4_2(compound__start, struct compound_state *,
5848                     &cs, COMPOUND4args *, args);
5849                 crfree(cr);
5850                 DTRACE_NFSV4_2(compound__done, struct compound_state *,
5851                     &cs, COMPOUND4res *, resp);
5852                 svcerr_badcred(req->rq_xprt);
5853                 if (rv != NULL)
5854                         *rv = 1;
5855                 return;
5856         }
5857         resp->array_len = args->array_len;
5858         resp->array = kmem_zalloc(args->array_len * sizeof (nfs_resop4),
5859             KM_SLEEP);
5860 
5861         cs.basecr = cr;
5862         nsrv4 = nfs4_get_srv();
5863 
5864         DTRACE_NFSV4_2(compound__start, struct compound_state *, &cs,
5865             COMPOUND4args *, args);
5866 
5867         /*
5868          * For now, NFS4 compound processing must be protected by
5869          * exported_lock because it can access more than one exportinfo
5870          * per compound and share/unshare can now change multiple
5871          * exinfo structs.  The NFS2/3 code only refs 1 exportinfo
5872          * per proc (excluding public exinfo), and exi_count design
5873          * is sufficient to protect concurrent execution of NFS2/3
5874          * ops along with unexport.  This lock will be removed as
5875          * part of the NFSv4 phase 2 namespace redesign work.
5876          */
5877         rw_enter(&ne->exported_lock, RW_READER);
5878 
5879         /*
5880          * If this is the first compound we've seen, we need to start all
5881          * new instances' grace periods.
5882          */
5883         if (nsrv4->seen_first_compound == 0) {
5884                 rfs4_grace_start_new(nsrv4);
5885                 /*
5886                  * This must be set after rfs4_grace_start_new(), otherwise
5887                  * another thread could proceed past here before the former
5888                  * is finished.
5889                  */
5890                 nsrv4->seen_first_compound = 1;
5891         }
5892 
5893         for (i = 0; i < args->array_len && cs.cont; i++) {
5894                 nfs_argop4 *argop;
5895                 nfs_resop4 *resop;
5896                 uint_t op;
5897                 kstat_named_t *stat = ne->ne_globals->rfsproccnt[NFS_V4];
5898 
5899                 argop = &args->array[i];
5900                 resop = &resp->array[i];
5901                 resop->resop = argop->argop;
5902                 op = (uint_t)resop->resop;
5903 
5904                 if (op < rfsv4disp_cnt) {
5905                         /*
5906                          * Count the individual ops here; NULL and COMPOUND
5907                          * are counted in common_dispatch()
5908                          */
5909                         stat[op].value.ui64++;
5910 
5911                         NFS4_DEBUG(rfs4_debug > 1,
5912                             (CE_NOTE, "Executing %s", rfs4_op_string[op]));
5913                         (*rfsv4disptab[op].dis_proc)(argop, resop, req, &cs);
5914                         NFS4_DEBUG(rfs4_debug > 1, (CE_NOTE, "%s returned %d",
5915                             rfs4_op_string[op], *cs.statusp));
5916                         if (*cs.statusp != NFS4_OK)
5917                                 cs.cont = FALSE;
5918                 } else {
5919                         /*
5920                          * This is effectively dead code since XDR code
5921                          * will have already returned BADXDR if op doesn't
5922                          * decode to legal value.  This only done for a
5923                          * day when XDR code doesn't verify v4 opcodes.
5924                          */
5925                         op = OP_ILLEGAL;
5926                         stat[OP_ILLEGAL_IDX].value.ui64++;
5927 
5928                         rfs4_op_illegal(argop, resop, req, &cs);
5929                         cs.cont = FALSE;
5930                 }
5931 
5932                 /*
5933                  * If not at last op, and if we are to stop, then
5934                  * compact the results array.
5935                  */
5936                 if ((i + 1) < args->array_len && !cs.cont) {
5937                         nfs_resop4 *new_res = kmem_alloc(
5938                             (i+1) * sizeof (nfs_resop4), KM_SLEEP);
5939                         bcopy(resp->array,
5940                             new_res, (i+1) * sizeof (nfs_resop4));
5941                         kmem_free(resp->array,
5942                             args->array_len * sizeof (nfs_resop4));
5943 
5944                         resp->array_len =  i + 1;
5945                         resp->array = new_res;
5946                 }
5947         }
5948 
5949         rw_exit(&ne->exported_lock);
5950 
5951         /*
5952          * clear exportinfo and vnode fields from compound_state before dtrace
5953          * probe, to avoid tracing residual values for path and share path.
5954          */
5955         if (cs.vp)
5956                 VN_RELE(cs.vp);
5957         if (cs.saved_vp)
5958                 VN_RELE(cs.saved_vp);
5959         cs.exi = cs.saved_exi = NULL;
5960         cs.vp = cs.saved_vp = NULL;
5961 
5962         DTRACE_NFSV4_2(compound__done, struct compound_state *, &cs,
5963             COMPOUND4res *, resp);
5964 
5965         if (cs.saved_fh.nfs_fh4_val)
5966                 kmem_free(cs.saved_fh.nfs_fh4_val, NFS4_FHSIZE);
5967 
5968         if (cs.basecr)
5969                 crfree(cs.basecr);
5970         if (cs.cr)
5971                 crfree(cs.cr);
5972         /*
5973          * done with this compound request, free the label
5974          */
5975 
5976         if (req->rq_label != NULL) {
5977                 kmem_free(req->rq_label, sizeof (bslabel_t));
5978                 req->rq_label = NULL;
5979         }
5980 }
5981 
5982 /*
5983  * XXX because of what appears to be duplicate calls to rfs4_compound_free
5984  * XXX zero out the tag and array values. Need to investigate why the
5985  * XXX calls occur, but at least prevent the panic for now.
5986  */
5987 void
5988 rfs4_compound_free(COMPOUND4res *resp)
5989 {
5990         uint_t i;
5991 
5992         if (resp->tag.utf8string_val) {
5993                 UTF8STRING_FREE(resp->tag)
5994         }
5995 
5996         for (i = 0; i < resp->array_len; i++) {
5997                 nfs_resop4 *resop;
5998                 uint_t op;
5999 
6000                 resop = &resp->array[i];
6001                 op = (uint_t)resop->resop;
6002                 if (op < rfsv4disp_cnt) {
6003                         (*rfsv4disptab[op].dis_resfree)(resop);
6004                 }
6005         }
6006         if (resp->array != NULL) {
6007                 kmem_free(resp->array, resp->array_len * sizeof (nfs_resop4));
6008         }
6009 }
6010 
6011 /*
6012  * Process the value of the compound request rpc flags, as a bit-AND
6013  * of the individual per-op flags (idempotent, allowork, publicfh_ok)
6014  */
6015 void
6016 rfs4_compound_flagproc(COMPOUND4args *args, int *flagp)
6017 {
6018         int i;
6019         int flag = RPC_ALL;
6020 
6021         for (i = 0; flag && i < args->array_len; i++) {
6022                 uint_t op;
6023 
6024                 op = (uint_t)args->array[i].argop;
6025 
6026                 if (op < rfsv4disp_cnt)
6027                         flag &= rfsv4disptab[op].dis_flags;
6028                 else
6029                         flag = 0;
6030         }
6031         *flagp = flag;
6032 }
6033 
6034 nfsstat4
6035 rfs4_client_sysid(rfs4_client_t *cp, sysid_t *sp)
6036 {
6037         nfsstat4 e;
6038 
6039         rfs4_dbe_lock(cp->rc_dbe);
6040 
6041         if (cp->rc_sysidt != LM_NOSYSID) {
6042                 *sp = cp->rc_sysidt;
6043                 e = NFS4_OK;
6044 
6045         } else if ((cp->rc_sysidt = lm_alloc_sysidt()) != LM_NOSYSID) {
6046                 *sp = cp->rc_sysidt;
6047                 e = NFS4_OK;
6048 
6049                 NFS4_DEBUG(rfs4_debug, (CE_NOTE,
6050                     "rfs4_client_sysid: allocated 0x%x\n", *sp));
6051         } else
6052                 e = NFS4ERR_DELAY;
6053 
6054         rfs4_dbe_unlock(cp->rc_dbe);
6055         return (e);
6056 }
6057 
6058 #if defined(DEBUG) && ! defined(lint)
6059 static void lock_print(char *str, int operation, struct flock64 *flk)
6060 {
6061         char *op, *type;
6062 
6063         switch (operation) {
6064         case F_GETLK: op = "F_GETLK";
6065                 break;
6066         case F_SETLK: op = "F_SETLK";
6067                 break;
6068         case F_SETLK_NBMAND: op = "F_SETLK_NBMAND";
6069                 break;
6070         default: op = "F_UNKNOWN";
6071                 break;
6072         }
6073         switch (flk->l_type) {
6074         case F_UNLCK: type = "F_UNLCK";
6075                 break;
6076         case F_RDLCK: type = "F_RDLCK";
6077                 break;
6078         case F_WRLCK: type = "F_WRLCK";
6079                 break;
6080         default: type = "F_UNKNOWN";
6081                 break;
6082         }
6083 
6084         ASSERT(flk->l_whence == 0);
6085         cmn_err(CE_NOTE, "%s:  %s, type = %s, off = %llx len = %llx pid = %d",
6086             str, op, type, (longlong_t)flk->l_start,
6087             flk->l_len ? (longlong_t)flk->l_len : ~0LL, flk->l_pid);
6088 }
6089 
6090 #define LOCK_PRINT(d, s, t, f) if (d) lock_print(s, t, f)
6091 #else
6092 #define LOCK_PRINT(d, s, t, f)
6093 #endif
6094 
6095 /*ARGSUSED*/
6096 static bool_t
6097 creds_ok(cred_set_t cr_set, struct svc_req *req, struct compound_state *cs)
6098 {
6099         return (TRUE);
6100 }
6101 
6102 /*
6103  * Look up the pathname using the vp in cs as the directory vnode.
6104  * cs->vp will be the vnode for the file on success
6105  */
6106 
6107 static nfsstat4
6108 rfs4_lookup(component4 *component, struct svc_req *req,
6109     struct compound_state *cs)
6110 {
6111         char *nm;
6112         uint32_t len;
6113         nfsstat4 status;
6114         struct sockaddr *ca;
6115         char *name;
6116 
6117         if (cs->vp == NULL) {
6118                 return (NFS4ERR_NOFILEHANDLE);
6119         }
6120         if (cs->vp->v_type != VDIR) {
6121                 return (NFS4ERR_NOTDIR);
6122         }
6123 
6124         status = utf8_dir_verify(component);
6125         if (status != NFS4_OK)
6126                 return (status);
6127 
6128         nm = utf8_to_fn(component, &len, NULL);
6129         if (nm == NULL) {
6130                 return (NFS4ERR_INVAL);
6131         }
6132 
6133         if (len > MAXNAMELEN) {
6134                 kmem_free(nm, len);
6135                 return (NFS4ERR_NAMETOOLONG);
6136         }
6137 
6138         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
6139         name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
6140             MAXPATHLEN + 1);
6141 
6142         if (name == NULL) {
6143                 kmem_free(nm, len);
6144                 return (NFS4ERR_INVAL);
6145         }
6146 
6147         status = do_rfs4_op_lookup(name, req, cs);
6148 
6149         if (name != nm)
6150                 kmem_free(name, MAXPATHLEN + 1);
6151 
6152         kmem_free(nm, len);
6153 
6154         return (status);
6155 }
6156 
6157 static nfsstat4
6158 rfs4_lookupfile(component4 *component, struct svc_req *req,
6159     struct compound_state *cs, uint32_t access, change_info4 *cinfo)
6160 {
6161         nfsstat4 status;
6162         vnode_t *dvp = cs->vp;
6163         vattr_t bva, ava, fva;
6164         int error;
6165 
6166         /* Get "before" change value */
6167         bva.va_mask = AT_CTIME|AT_SEQ;
6168         error = VOP_GETATTR(dvp, &bva, 0, cs->cr, NULL);
6169         if (error)
6170                 return (puterrno4(error));
6171 
6172         /* rfs4_lookup may VN_RELE directory */
6173         VN_HOLD(dvp);
6174 
6175         status = rfs4_lookup(component, req, cs);
6176         if (status != NFS4_OK) {
6177                 VN_RELE(dvp);
6178                 return (status);
6179         }
6180 
6181         /*
6182          * Get "after" change value, if it fails, simply return the
6183          * before value.
6184          */
6185         ava.va_mask = AT_CTIME|AT_SEQ;
6186         if (VOP_GETATTR(dvp, &ava, 0, cs->cr, NULL)) {
6187                 ava.va_ctime = bva.va_ctime;
6188                 ava.va_seq = 0;
6189         }
6190         VN_RELE(dvp);
6191 
6192         /*
6193          * Validate the file is a file
6194          */
6195         fva.va_mask = AT_TYPE|AT_MODE;
6196         error = VOP_GETATTR(cs->vp, &fva, 0, cs->cr, NULL);
6197         if (error)
6198                 return (puterrno4(error));
6199 
6200         if (fva.va_type != VREG) {
6201                 if (fva.va_type == VDIR)
6202                         return (NFS4ERR_ISDIR);
6203                 if (fva.va_type == VLNK)
6204                         return (NFS4ERR_SYMLINK);
6205                 return (NFS4ERR_INVAL);
6206         }
6207 
6208         NFS4_SET_FATTR4_CHANGE(cinfo->before, bva.va_ctime);
6209         NFS4_SET_FATTR4_CHANGE(cinfo->after, ava.va_ctime);
6210 
6211         /*
6212          * It is undefined if VOP_LOOKUP will change va_seq, so
6213          * cinfo.atomic = TRUE only if we have
6214          * non-zero va_seq's, and they have not changed.
6215          */
6216         if (bva.va_seq && ava.va_seq && ava.va_seq == bva.va_seq)
6217                 cinfo->atomic = TRUE;
6218         else
6219                 cinfo->atomic = FALSE;
6220 
6221         /* Check for mandatory locking */
6222         cs->mandlock = MANDLOCK(cs->vp, fva.va_mode);
6223         return (check_open_access(access, cs, req));
6224 }
6225 
6226 static nfsstat4
6227 create_vnode(vnode_t *dvp, char *nm,  vattr_t *vap, createmode4 mode,
6228     cred_t *cr, vnode_t **vpp, bool_t *created)
6229 {
6230         int error;
6231         nfsstat4 status = NFS4_OK;
6232         vattr_t va;
6233 
6234 tryagain:
6235 
6236         /*
6237          * The file open mode used is VWRITE.  If the client needs
6238          * some other semantic, then it should do the access checking
6239          * itself.  It would have been nice to have the file open mode
6240          * passed as part of the arguments.
6241          */
6242 
6243         *created = TRUE;
6244         error = VOP_CREATE(dvp, nm, vap, EXCL, VWRITE, vpp, cr, 0, NULL, NULL);
6245 
6246         if (error) {
6247                 *created = FALSE;
6248 
6249                 /*
6250                  * If we got something other than file already exists
6251                  * then just return this error.  Otherwise, we got
6252                  * EEXIST.  If we were doing a GUARDED create, then
6253                  * just return this error.  Otherwise, we need to
6254                  * make sure that this wasn't a duplicate of an
6255                  * exclusive create request.
6256                  *
6257                  * The assumption is made that a non-exclusive create
6258                  * request will never return EEXIST.
6259                  */
6260 
6261                 if (error != EEXIST || mode == GUARDED4) {
6262                         status = puterrno4(error);
6263                         return (status);
6264                 }
6265                 error = VOP_LOOKUP(dvp, nm, vpp, NULL, 0, NULL, cr,
6266                     NULL, NULL, NULL);
6267 
6268                 if (error) {
6269                         /*
6270                          * We couldn't find the file that we thought that
6271                          * we just created.  So, we'll just try creating
6272                          * it again.
6273                          */
6274                         if (error == ENOENT)
6275                                 goto tryagain;
6276 
6277                         status = puterrno4(error);
6278                         return (status);
6279                 }
6280 
6281                 if (mode == UNCHECKED4) {
6282                         /* existing object must be regular file */
6283                         if ((*vpp)->v_type != VREG) {
6284                                 if ((*vpp)->v_type == VDIR)
6285                                         status = NFS4ERR_ISDIR;
6286                                 else if ((*vpp)->v_type == VLNK)
6287                                         status = NFS4ERR_SYMLINK;
6288                                 else
6289                                         status = NFS4ERR_INVAL;
6290                                 VN_RELE(*vpp);
6291                                 return (status);
6292                         }
6293 
6294                         return (NFS4_OK);
6295                 }
6296 
6297                 /* Check for duplicate request */
6298                 va.va_mask = AT_MTIME;
6299                 error = VOP_GETATTR(*vpp, &va, 0, cr, NULL);
6300                 if (!error) {
6301                         /* We found the file */
6302                         const timestruc_t *mtime = &vap->va_mtime;
6303 
6304                         if (va.va_mtime.tv_sec != mtime->tv_sec ||
6305                             va.va_mtime.tv_nsec != mtime->tv_nsec) {
6306                                 /* but its not our creation */
6307                                 VN_RELE(*vpp);
6308                                 return (NFS4ERR_EXIST);
6309                         }
6310                         *created = TRUE; /* retrans of create == created */
6311                         return (NFS4_OK);
6312                 }
6313                 VN_RELE(*vpp);
6314                 return (NFS4ERR_EXIST);
6315         }
6316 
6317         return (NFS4_OK);
6318 }
6319 
6320 static nfsstat4
6321 check_open_access(uint32_t access, struct compound_state *cs,
6322     struct svc_req *req)
6323 {
6324         int error;
6325         vnode_t *vp;
6326         bool_t readonly;
6327         cred_t *cr = cs->cr;
6328 
6329         /* For now we don't allow mandatory locking as per V2/V3 */
6330         if (cs->access == CS_ACCESS_DENIED || cs->mandlock) {
6331                 return (NFS4ERR_ACCESS);
6332         }
6333 
6334         vp = cs->vp;
6335         ASSERT(cr != NULL && vp->v_type == VREG);
6336 
6337         /*
6338          * If the file system is exported read only and we are trying
6339          * to open for write, then return NFS4ERR_ROFS
6340          */
6341 
6342         readonly = rdonly4(req, cs);
6343 
6344         if ((access & OPEN4_SHARE_ACCESS_WRITE) && readonly)
6345                 return (NFS4ERR_ROFS);
6346 
6347         if (access & OPEN4_SHARE_ACCESS_READ) {
6348                 if ((VOP_ACCESS(vp, VREAD, 0, cr, NULL) != 0) &&
6349                     (VOP_ACCESS(vp, VEXEC, 0, cr, NULL) != 0)) {
6350                         return (NFS4ERR_ACCESS);
6351                 }
6352         }
6353 
6354         if (access & OPEN4_SHARE_ACCESS_WRITE) {
6355                 error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
6356                 if (error)
6357                         return (NFS4ERR_ACCESS);
6358         }
6359 
6360         return (NFS4_OK);
6361 }
6362 
6363 static nfsstat4
6364 rfs4_createfile(OPEN4args *args, struct svc_req *req, struct compound_state *cs,
6365     change_info4 *cinfo, bitmap4 *attrset, clientid4 clientid)
6366 {
6367         struct nfs4_svgetit_arg sarg;
6368         struct nfs4_ntov_table ntov;
6369 
6370         bool_t ntov_table_init = FALSE;
6371         struct statvfs64 sb;
6372         nfsstat4 status;
6373         vnode_t *vp;
6374         vattr_t bva, ava, iva, cva, *vap;
6375         vnode_t *dvp;
6376         timespec32_t *mtime;
6377         char *nm = NULL;
6378         uint_t buflen;
6379         bool_t created;
6380         bool_t setsize = FALSE;
6381         len_t reqsize;
6382         int error;
6383         bool_t trunc;
6384         caller_context_t ct;
6385         component4 *component;
6386         bslabel_t *clabel;
6387         struct sockaddr *ca;
6388         char *name = NULL;
6389 
6390         sarg.sbp = &sb;
6391         sarg.is_referral = B_FALSE;
6392 
6393         dvp = cs->vp;
6394 
6395         /* Check if the file system is read only */
6396         if (rdonly4(req, cs))
6397                 return (NFS4ERR_ROFS);
6398 
6399         /* check the label of including directory */
6400         if (is_system_labeled()) {
6401                 ASSERT(req->rq_label != NULL);
6402                 clabel = req->rq_label;
6403                 DTRACE_PROBE2(tx__rfs4__log__info__opremove__clabel, char *,
6404                     "got client label from request(1)",
6405                     struct svc_req *, req);
6406                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
6407                         if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
6408                             cs->exi)) {
6409                                 return (NFS4ERR_ACCESS);
6410                         }
6411                 }
6412         }
6413 
6414         /*
6415          * Get the last component of path name in nm. cs will reference
6416          * the including directory on success.
6417          */
6418         component = &args->open_claim4_u.file;
6419         status = utf8_dir_verify(component);
6420         if (status != NFS4_OK)
6421                 return (status);
6422 
6423         nm = utf8_to_fn(component, &buflen, NULL);
6424 
6425         if (nm == NULL)
6426                 return (NFS4ERR_RESOURCE);
6427 
6428         if (buflen > MAXNAMELEN) {
6429                 kmem_free(nm, buflen);
6430                 return (NFS4ERR_NAMETOOLONG);
6431         }
6432 
6433         bva.va_mask = AT_TYPE|AT_CTIME|AT_SEQ;
6434         error = VOP_GETATTR(dvp, &bva, 0, cs->cr, NULL);
6435         if (error) {
6436                 kmem_free(nm, buflen);
6437                 return (puterrno4(error));
6438         }
6439 
6440         if (bva.va_type != VDIR) {
6441                 kmem_free(nm, buflen);
6442                 return (NFS4ERR_NOTDIR);
6443         }
6444 
6445         NFS4_SET_FATTR4_CHANGE(cinfo->before, bva.va_ctime)
6446 
6447         switch (args->mode) {
6448         case GUARDED4:
6449                 /*FALLTHROUGH*/
6450         case UNCHECKED4:
6451                 nfs4_ntov_table_init(&ntov);
6452                 ntov_table_init = TRUE;
6453 
6454                 *attrset = 0;
6455                 status = do_rfs4_set_attrs(attrset,
6456                     &args->createhow4_u.createattrs,
6457                     cs, &sarg, &ntov, NFS4ATTR_SETIT);
6458 
6459                 if (status == NFS4_OK && (sarg.vap->va_mask & AT_TYPE) &&
6460                     sarg.vap->va_type != VREG) {
6461                         if (sarg.vap->va_type == VDIR)
6462                                 status = NFS4ERR_ISDIR;
6463                         else if (sarg.vap->va_type == VLNK)
6464                                 status = NFS4ERR_SYMLINK;
6465                         else
6466                                 status = NFS4ERR_INVAL;
6467                 }
6468 
6469                 if (status != NFS4_OK) {
6470                         kmem_free(nm, buflen);
6471                         nfs4_ntov_table_free(&ntov, &sarg);
6472                         *attrset = 0;
6473                         return (status);
6474                 }
6475 
6476                 vap = sarg.vap;
6477                 vap->va_type = VREG;
6478                 vap->va_mask |= AT_TYPE;
6479 
6480                 if ((vap->va_mask & AT_MODE) == 0) {
6481                         vap->va_mask |= AT_MODE;
6482                         vap->va_mode = (mode_t)0600;
6483                 }
6484 
6485                 if (vap->va_mask & AT_SIZE) {
6486 
6487                         /* Disallow create with a non-zero size */
6488 
6489                         if ((reqsize = sarg.vap->va_size) != 0) {
6490                                 kmem_free(nm, buflen);
6491                                 nfs4_ntov_table_free(&ntov, &sarg);
6492                                 *attrset = 0;
6493                                 return (NFS4ERR_INVAL);
6494                         }
6495                         setsize = TRUE;
6496                 }
6497                 break;
6498 
6499         case EXCLUSIVE4:
6500                 /* prohibit EXCL create of named attributes */
6501                 if (dvp->v_flag & V_XATTRDIR) {
6502                         kmem_free(nm, buflen);
6503                         *attrset = 0;
6504                         return (NFS4ERR_INVAL);
6505                 }
6506 
6507                 cva.va_mask = AT_TYPE | AT_MTIME | AT_MODE;
6508                 cva.va_type = VREG;
6509                 /*
6510                  * Ensure no time overflows. Assumes underlying
6511                  * filesystem supports at least 32 bits.
6512                  * Truncate nsec to usec resolution to allow valid
6513                  * compares even if the underlying filesystem truncates.
6514                  */
6515                 mtime = (timespec32_t *)&args->createhow4_u.createverf;
6516                 cva.va_mtime.tv_sec = mtime->tv_sec % TIME32_MAX;
6517                 cva.va_mtime.tv_nsec = (mtime->tv_nsec / 1000) * 1000;
6518                 cva.va_mode = (mode_t)0;
6519                 vap = &cva;
6520 
6521                 /*
6522                  * For EXCL create, attrset is set to the server attr
6523                  * used to cache the client's verifier.
6524                  */
6525                 *attrset = FATTR4_TIME_MODIFY_MASK;
6526                 break;
6527         }
6528 
6529         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
6530         name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
6531             MAXPATHLEN  + 1);
6532 
6533         if (name == NULL) {
6534                 kmem_free(nm, buflen);
6535                 return (NFS4ERR_SERVERFAULT);
6536         }
6537 
6538         status = create_vnode(dvp, name, vap, args->mode,
6539             cs->cr, &vp, &created);
6540         if (nm != name)
6541                 kmem_free(name, MAXPATHLEN + 1);
6542         kmem_free(nm, buflen);
6543 
6544         if (status != NFS4_OK) {
6545                 if (ntov_table_init)
6546                         nfs4_ntov_table_free(&ntov, &sarg);
6547                 *attrset = 0;
6548                 return (status);
6549         }
6550 
6551         trunc = (setsize && !created);
6552 
6553         if (args->mode != EXCLUSIVE4) {
6554                 bitmap4 createmask = args->createhow4_u.createattrs.attrmask;
6555 
6556                 /*
6557                  * True verification that object was created with correct
6558                  * attrs is impossible.  The attrs could have been changed
6559                  * immediately after object creation.  If attributes did
6560                  * not verify, the only recourse for the server is to
6561                  * destroy the object.  Maybe if some attrs (like gid)
6562                  * are set incorrectly, the object should be destroyed;
6563                  * however, seems bad as a default policy.  Do we really
6564                  * want to destroy an object over one of the times not
6565                  * verifying correctly?  For these reasons, the server
6566                  * currently sets bits in attrset for createattrs
6567                  * that were set; however, no verification is done.
6568                  *
6569                  * vmask_to_nmask accounts for vattr bits set on create
6570                  *      [do_rfs4_set_attrs() only sets resp bits for
6571                  *       non-vattr/vfs bits.]
6572                  * Mask off any bits we set by default so as not to return
6573                  * more attrset bits than were requested in createattrs
6574                  */
6575                 if (created) {
6576                         nfs4_vmask_to_nmask(sarg.vap->va_mask, attrset);
6577                         *attrset &= createmask;
6578                 } else {
6579                         /*
6580                          * We did not create the vnode (we tried but it
6581                          * already existed).  In this case, the only createattr
6582                          * that the spec allows the server to set is size,
6583                          * and even then, it can only be set if it is 0.
6584                          */
6585                         *attrset = 0;
6586                         if (trunc)
6587                                 *attrset = FATTR4_SIZE_MASK;
6588                 }
6589         }
6590         if (ntov_table_init)
6591                 nfs4_ntov_table_free(&ntov, &sarg);
6592 
6593         /*
6594          * Get the initial "after" sequence number, if it fails,
6595          * set to zero, time to before.
6596          */
6597         iva.va_mask = AT_CTIME|AT_SEQ;
6598         if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL)) {
6599                 iva.va_seq = 0;
6600                 iva.va_ctime = bva.va_ctime;
6601         }
6602 
6603         /*
6604          * create_vnode attempts to create the file exclusive,
6605          * if it already exists the VOP_CREATE will fail and
6606          * may not increase va_seq. It is atomic if
6607          * we haven't changed the directory, but if it has changed
6608          * we don't know what changed it.
6609          */
6610         if (!created) {
6611                 if (bva.va_seq && iva.va_seq &&
6612                     bva.va_seq == iva.va_seq)
6613                         cinfo->atomic = TRUE;
6614                 else
6615                         cinfo->atomic = FALSE;
6616                 NFS4_SET_FATTR4_CHANGE(cinfo->after, iva.va_ctime);
6617         } else {
6618                 /*
6619                  * The entry was created, we need to sync the
6620                  * directory metadata.
6621                  */
6622                 (void) VOP_FSYNC(dvp, 0, cs->cr, NULL);
6623 
6624                 /*
6625                  * Get "after" change value, if it fails, simply return the
6626                  * before value.
6627                  */
6628                 ava.va_mask = AT_CTIME|AT_SEQ;
6629                 if (VOP_GETATTR(dvp, &ava, 0, cs->cr, NULL)) {
6630                         ava.va_ctime = bva.va_ctime;
6631                         ava.va_seq = 0;
6632                 }
6633 
6634                 NFS4_SET_FATTR4_CHANGE(cinfo->after, ava.va_ctime);
6635 
6636                 /*
6637                  * The cinfo->atomic = TRUE only if we have
6638                  * non-zero va_seq's, and it has incremented by exactly one
6639                  * during the create_vnode and it didn't
6640                  * change during the VOP_FSYNC.
6641                  */
6642                 if (bva.va_seq && iva.va_seq && ava.va_seq &&
6643                     iva.va_seq == (bva.va_seq + 1) && iva.va_seq == ava.va_seq)
6644                         cinfo->atomic = TRUE;
6645                 else
6646                         cinfo->atomic = FALSE;
6647         }
6648 
6649         /* Check for mandatory locking and that the size gets set. */
6650         cva.va_mask = AT_MODE;
6651         if (setsize)
6652                 cva.va_mask |= AT_SIZE;
6653 
6654         /* Assume the worst */
6655         cs->mandlock = TRUE;
6656 
6657         if (VOP_GETATTR(vp, &cva, 0, cs->cr, NULL) == 0) {
6658                 cs->mandlock = MANDLOCK(cs->vp, cva.va_mode);
6659 
6660                 /*
6661                  * Truncate the file if necessary; this would be
6662                  * the case for create over an existing file.
6663                  */
6664 
6665                 if (trunc) {
6666                         int in_crit = 0;
6667                         rfs4_file_t *fp;
6668                         nfs4_srv_t *nsrv4;
6669                         bool_t create = FALSE;
6670 
6671                         /*
6672                          * We are writing over an existing file.
6673                          * Check to see if we need to recall a delegation.
6674                          */
6675                         nsrv4 = nfs4_get_srv();
6676                         rfs4_hold_deleg_policy(nsrv4);
6677                         if ((fp = rfs4_findfile(vp, NULL, &create)) != NULL) {
6678                                 if (rfs4_check_delegated_byfp(FWRITE, fp,
6679                                     (reqsize == 0), FALSE, FALSE, &clientid)) {
6680                                         rfs4_file_rele(fp);
6681                                         rfs4_rele_deleg_policy(nsrv4);
6682                                         VN_RELE(vp);
6683                                         *attrset = 0;
6684                                         return (NFS4ERR_DELAY);
6685                                 }
6686                                 rfs4_file_rele(fp);
6687                         }
6688                         rfs4_rele_deleg_policy(nsrv4);
6689 
6690                         if (nbl_need_check(vp)) {
6691                                 in_crit = 1;
6692 
6693                                 ASSERT(reqsize == 0);
6694 
6695                                 nbl_start_crit(vp, RW_READER);
6696                                 if (nbl_conflict(vp, NBL_WRITE, 0,
6697                                     cva.va_size, 0, NULL)) {
6698                                         in_crit = 0;
6699                                         nbl_end_crit(vp);
6700                                         VN_RELE(vp);
6701                                         *attrset = 0;
6702                                         return (NFS4ERR_ACCESS);
6703                                 }
6704                         }
6705                         ct.cc_sysid = 0;
6706                         ct.cc_pid = 0;
6707                         ct.cc_caller_id = nfs4_srv_caller_id;
6708                         ct.cc_flags = CC_DONTBLOCK;
6709 
6710                         cva.va_mask = AT_SIZE;
6711                         cva.va_size = reqsize;
6712                         (void) VOP_SETATTR(vp, &cva, 0, cs->cr, &ct);
6713                         if (in_crit)
6714                                 nbl_end_crit(vp);
6715                 }
6716         }
6717 
6718         error = makefh4(&cs->fh, vp, cs->exi);
6719 
6720         /*
6721          * Force modified data and metadata out to stable storage.
6722          */
6723         (void) VOP_FSYNC(vp, FNODSYNC, cs->cr, NULL);
6724 
6725         if (error) {
6726                 VN_RELE(vp);
6727                 *attrset = 0;
6728                 return (puterrno4(error));
6729         }
6730 
6731         /* if parent dir is attrdir, set namedattr fh flag */
6732         if (dvp->v_flag & V_XATTRDIR)
6733                 set_fh4_flag(&cs->fh, FH4_NAMEDATTR);
6734 
6735         if (cs->vp)
6736                 VN_RELE(cs->vp);
6737 
6738         cs->vp = vp;
6739 
6740         /*
6741          * if we did not create the file, we will need to check
6742          * the access bits on the file
6743          */
6744 
6745         if (!created) {
6746                 if (setsize)
6747                         args->share_access |= OPEN4_SHARE_ACCESS_WRITE;
6748                 status = check_open_access(args->share_access, cs, req);
6749                 if (status != NFS4_OK)
6750                         *attrset = 0;
6751         }
6752         return (status);
6753 }
6754 
6755 /*ARGSUSED*/
6756 static void
6757 rfs4_do_open(struct compound_state *cs, struct svc_req *req,
6758     rfs4_openowner_t *oo, delegreq_t deleg,
6759     uint32_t access, uint32_t deny,
6760     OPEN4res *resp, int deleg_cur)
6761 {
6762         /* XXX Currently not using req  */
6763         rfs4_state_t *sp;
6764         rfs4_file_t *fp;
6765         bool_t screate = TRUE;
6766         bool_t fcreate = TRUE;
6767         uint32_t open_a, share_a;
6768         uint32_t open_d, share_d;
6769         rfs4_deleg_state_t *dsp;
6770         sysid_t sysid;
6771         nfsstat4 status;
6772         caller_context_t ct;
6773         int fflags = 0;
6774         int recall = 0;
6775         int err;
6776         int first_open;
6777 
6778         /* get the file struct and hold a lock on it during initial open */
6779         fp = rfs4_findfile_withlock(cs->vp, &cs->fh, &fcreate);
6780         if (fp == NULL) {
6781                 resp->status = NFS4ERR_RESOURCE;
6782                 DTRACE_PROBE1(nfss__e__do__open1, nfsstat4, resp->status);
6783                 return;
6784         }
6785 
6786         sp = rfs4_findstate_by_owner_file(oo, fp, &screate);
6787         if (sp == NULL) {
6788                 resp->status = NFS4ERR_RESOURCE;
6789                 DTRACE_PROBE1(nfss__e__do__open2, nfsstat4, resp->status);
6790                 /* No need to keep any reference */
6791                 rw_exit(&fp->rf_file_rwlock);
6792                 rfs4_file_rele(fp);
6793                 return;
6794         }
6795 
6796         /* try to get the sysid before continuing */
6797         if ((status = rfs4_client_sysid(oo->ro_client, &sysid)) != NFS4_OK) {
6798                 resp->status = status;
6799                 rfs4_file_rele(fp);
6800                 /* Not a fully formed open; "close" it */
6801                 if (screate == TRUE)
6802                         rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6803                 rfs4_state_rele(sp);
6804                 return;
6805         }
6806 
6807         /* Calculate the fflags for this OPEN. */
6808         if (access & OPEN4_SHARE_ACCESS_READ)
6809                 fflags |= FREAD;
6810         if (access & OPEN4_SHARE_ACCESS_WRITE)
6811                 fflags |= FWRITE;
6812 
6813         rfs4_dbe_lock(sp->rs_dbe);
6814 
6815         /*
6816          * Calculate the new deny and access mode that this open is adding to
6817          * the file for this open owner;
6818          */
6819         open_d = (deny & ~sp->rs_open_deny);
6820         open_a = (access & ~sp->rs_open_access);
6821 
6822         /*
6823          * Calculate the new share access and share deny modes that this open
6824          * is adding to the file for this open owner;
6825          */
6826         share_a = (access & ~sp->rs_share_access);
6827         share_d = (deny & ~sp->rs_share_deny);
6828 
6829         first_open = (sp->rs_open_access & OPEN4_SHARE_ACCESS_BOTH) == 0;
6830 
6831         /*
6832          * Check to see the client has already sent an open for this
6833          * open owner on this file with the same share/deny modes.
6834          * If so, we don't need to check for a conflict and we don't
6835          * need to add another shrlock.  If not, then we need to
6836          * check for conflicts in deny and access before checking for
6837          * conflicts in delegation.  We don't want to recall a
6838          * delegation based on an open that will eventually fail based
6839          * on shares modes.
6840          */
6841 
6842         if (share_a || share_d) {
6843                 if ((err = rfs4_share(sp, access, deny)) != 0) {
6844                         rfs4_dbe_unlock(sp->rs_dbe);
6845                         resp->status = err;
6846 
6847                         rfs4_file_rele(fp);
6848                         /* Not a fully formed open; "close" it */
6849                         if (screate == TRUE)
6850                                 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6851                         rfs4_state_rele(sp);
6852                         return;
6853                 }
6854         }
6855 
6856         rfs4_dbe_lock(fp->rf_dbe);
6857 
6858         /*
6859          * Check to see if this file is delegated and if so, if a
6860          * recall needs to be done.
6861          */
6862         if (rfs4_check_recall(sp, access)) {
6863                 rfs4_dbe_unlock(fp->rf_dbe);
6864                 rfs4_dbe_unlock(sp->rs_dbe);
6865                 rfs4_recall_deleg(fp, FALSE, sp->rs_owner->ro_client);
6866                 delay(NFS4_DELEGATION_CONFLICT_DELAY);
6867                 rfs4_dbe_lock(sp->rs_dbe);
6868 
6869                 /* if state closed while lock was dropped */
6870                 if (sp->rs_closed) {
6871                         if (share_a || share_d)
6872                                 (void) rfs4_unshare(sp);
6873                         rfs4_dbe_unlock(sp->rs_dbe);
6874                         rfs4_file_rele(fp);
6875                         /* Not a fully formed open; "close" it */
6876                         if (screate == TRUE)
6877                                 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6878                         rfs4_state_rele(sp);
6879                         resp->status = NFS4ERR_OLD_STATEID;
6880                         return;
6881                 }
6882 
6883                 rfs4_dbe_lock(fp->rf_dbe);
6884                 /* Let's see if the delegation was returned */
6885                 if (rfs4_check_recall(sp, access)) {
6886                         rfs4_dbe_unlock(fp->rf_dbe);
6887                         if (share_a || share_d)
6888                                 (void) rfs4_unshare(sp);
6889                         rfs4_dbe_unlock(sp->rs_dbe);
6890                         rfs4_file_rele(fp);
6891                         rfs4_update_lease(sp->rs_owner->ro_client);
6892 
6893                         /* Not a fully formed open; "close" it */
6894                         if (screate == TRUE)
6895                                 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6896                         rfs4_state_rele(sp);
6897                         resp->status = NFS4ERR_DELAY;
6898                         return;
6899                 }
6900         }
6901         /*
6902          * the share check passed and any delegation conflict has been
6903          * taken care of, now call vop_open.
6904          * if this is the first open then call vop_open with fflags.
6905          * if not, call vn_open_upgrade with just the upgrade flags.
6906          *
6907          * if the file has been opened already, it will have the current
6908          * access mode in the state struct.  if it has no share access, then
6909          * this is a new open.
6910          *
6911          * However, if this is open with CLAIM_DLEGATE_CUR, then don't
6912          * call VOP_OPEN(), just do the open upgrade.
6913          */
6914         if (first_open && !deleg_cur) {
6915                 ct.cc_sysid = sysid;
6916                 ct.cc_pid = rfs4_dbe_getid(sp->rs_owner->ro_dbe);
6917                 ct.cc_caller_id = nfs4_srv_caller_id;
6918                 ct.cc_flags = CC_DONTBLOCK;
6919                 err = VOP_OPEN(&cs->vp, fflags, cs->cr, &ct);
6920                 if (err) {
6921                         rfs4_dbe_unlock(fp->rf_dbe);
6922                         if (share_a || share_d)
6923                                 (void) rfs4_unshare(sp);
6924                         rfs4_dbe_unlock(sp->rs_dbe);
6925                         rfs4_file_rele(fp);
6926 
6927                         /* Not a fully formed open; "close" it */
6928                         if (screate == TRUE)
6929                                 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6930                         rfs4_state_rele(sp);
6931                         /* check if a monitor detected a delegation conflict */
6932                         if (err == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK))
6933                                 resp->status = NFS4ERR_DELAY;
6934                         else
6935                                 resp->status = NFS4ERR_SERVERFAULT;
6936                         return;
6937                 }
6938         } else { /* open upgrade */
6939                 /*
6940                  * calculate the fflags for the new mode that is being added
6941                  * by this upgrade.
6942                  */
6943                 fflags = 0;
6944                 if (open_a & OPEN4_SHARE_ACCESS_READ)
6945                         fflags |= FREAD;
6946                 if (open_a & OPEN4_SHARE_ACCESS_WRITE)
6947                         fflags |= FWRITE;
6948                 vn_open_upgrade(cs->vp, fflags);
6949         }
6950         sp->rs_open_access |= access;
6951         sp->rs_open_deny |= deny;
6952 
6953         if (open_d & OPEN4_SHARE_DENY_READ)
6954                 fp->rf_deny_read++;
6955         if (open_d & OPEN4_SHARE_DENY_WRITE)
6956                 fp->rf_deny_write++;
6957         fp->rf_share_deny |= deny;
6958 
6959         if (open_a & OPEN4_SHARE_ACCESS_READ)
6960                 fp->rf_access_read++;
6961         if (open_a & OPEN4_SHARE_ACCESS_WRITE)
6962                 fp->rf_access_write++;
6963         fp->rf_share_access |= access;
6964 
6965         /*
6966          * Check for delegation here. if the deleg argument is not
6967          * DELEG_ANY, then this is a reclaim from a client and
6968          * we must honor the delegation requested. If necessary we can
6969          * set the recall flag.
6970          */
6971 
6972         dsp = rfs4_grant_delegation(deleg, sp, &recall);
6973 
6974         cs->deleg = (fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_WRITE);
6975 
6976         next_stateid(&sp->rs_stateid);
6977 
6978         resp->stateid = sp->rs_stateid.stateid;
6979 
6980         rfs4_dbe_unlock(fp->rf_dbe);
6981         rfs4_dbe_unlock(sp->rs_dbe);
6982 
6983         if (dsp) {
6984                 rfs4_set_deleg_response(dsp, &resp->delegation, NULL, recall);
6985                 rfs4_deleg_state_rele(dsp);
6986         }
6987 
6988         rfs4_file_rele(fp);
6989         rfs4_state_rele(sp);
6990 
6991         resp->status = NFS4_OK;
6992 }
6993 
6994 /*ARGSUSED*/
6995 static void
6996 rfs4_do_opennull(struct compound_state *cs, struct svc_req *req,
6997     OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
6998 {
6999         change_info4 *cinfo = &resp->cinfo;
7000         bitmap4 *attrset = &resp->attrset;
7001 
7002         if (args->opentype == OPEN4_NOCREATE)
7003                 resp->status = rfs4_lookupfile(&args->open_claim4_u.file,
7004                     req, cs, args->share_access, cinfo);
7005         else {
7006                 /* inhibit delegation grants during exclusive create */
7007 
7008                 if (args->mode == EXCLUSIVE4)
7009                         rfs4_disable_delegation();
7010 
7011                 resp->status = rfs4_createfile(args, req, cs, cinfo, attrset,
7012                     oo->ro_client->rc_clientid);
7013         }
7014 
7015         if (resp->status == NFS4_OK) {
7016 
7017                 /* cs->vp cs->fh now reference the desired file */
7018 
7019                 rfs4_do_open(cs, req, oo,
7020                     oo->ro_need_confirm ? DELEG_NONE : DELEG_ANY,
7021                     args->share_access, args->share_deny, resp, 0);
7022 
7023                 /*
7024                  * If rfs4_createfile set attrset, we must
7025                  * clear this attrset before the response is copied.
7026                  */
7027                 if (resp->status != NFS4_OK && resp->attrset) {
7028                         resp->attrset = 0;
7029                 }
7030         }
7031         else
7032                 *cs->statusp = resp->status;
7033 
7034         if (args->mode == EXCLUSIVE4)
7035                 rfs4_enable_delegation();
7036 }
7037 
7038 /*ARGSUSED*/
7039 static void
7040 rfs4_do_openprev(struct compound_state *cs, struct svc_req *req,
7041     OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
7042 {
7043         change_info4 *cinfo = &resp->cinfo;
7044         vattr_t va;
7045         vtype_t v_type = cs->vp->v_type;
7046         int error = 0;
7047 
7048         /* Verify that we have a regular file */
7049         if (v_type != VREG) {
7050                 if (v_type == VDIR)
7051                         resp->status = NFS4ERR_ISDIR;
7052                 else if (v_type == VLNK)
7053                         resp->status = NFS4ERR_SYMLINK;
7054                 else
7055                         resp->status = NFS4ERR_INVAL;
7056                 return;
7057         }
7058 
7059         va.va_mask = AT_MODE|AT_UID;
7060         error = VOP_GETATTR(cs->vp, &va, 0, cs->cr, NULL);
7061         if (error) {
7062                 resp->status = puterrno4(error);
7063                 return;
7064         }
7065 
7066         cs->mandlock = MANDLOCK(cs->vp, va.va_mode);
7067 
7068         /*
7069          * Check if we have access to the file, Note the the file
7070          * could have originally been open UNCHECKED or GUARDED
7071          * with mode bits that will now fail, but there is nothing
7072          * we can really do about that except in the case that the
7073          * owner of the file is the one requesting the open.
7074          */
7075         if (crgetuid(cs->cr) != va.va_uid) {
7076                 resp->status = check_open_access(args->share_access, cs, req);
7077                 if (resp->status != NFS4_OK) {
7078                         return;
7079                 }
7080         }
7081 
7082         /*
7083          * cinfo on a CLAIM_PREVIOUS is undefined, initialize to zero
7084          */
7085         cinfo->before = 0;
7086         cinfo->after = 0;
7087         cinfo->atomic = FALSE;
7088 
7089         rfs4_do_open(cs, req, oo,
7090             NFS4_DELEG4TYPE2REQTYPE(args->open_claim4_u.delegate_type),
7091             args->share_access, args->share_deny, resp, 0);
7092 }
7093 
7094 static void
7095 rfs4_do_opendelcur(struct compound_state *cs, struct svc_req *req,
7096     OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
7097 {
7098         int error;
7099         nfsstat4 status;
7100         stateid4 stateid =
7101             args->open_claim4_u.delegate_cur_info.delegate_stateid;
7102         rfs4_deleg_state_t *dsp;
7103 
7104         /*
7105          * Find the state info from the stateid and confirm that the
7106          * file is delegated.  If the state openowner is the same as
7107          * the supplied openowner we're done. If not, get the file
7108          * info from the found state info. Use that file info to
7109          * create the state for this lock owner. Note solaris doen't
7110          * really need the pathname to find the file. We may want to
7111          * lookup the pathname and make sure that the vp exist and
7112          * matches the vp in the file structure. However it is
7113          * possible that the pathname nolonger exists (local process
7114          * unlinks the file), so this may not be that useful.
7115          */
7116 
7117         status = rfs4_get_deleg_state(&stateid, &dsp);
7118         if (status != NFS4_OK) {
7119                 resp->status = status;
7120                 return;
7121         }
7122 
7123         ASSERT(dsp->rds_finfo->rf_dinfo.rd_dtype != OPEN_DELEGATE_NONE);
7124 
7125         /*
7126          * New lock owner, create state. Since this was probably called
7127          * in response to a CB_RECALL we set deleg to DELEG_NONE
7128          */
7129 
7130         ASSERT(cs->vp != NULL);
7131         VN_RELE(cs->vp);
7132         VN_HOLD(dsp->rds_finfo->rf_vp);
7133         cs->vp = dsp->rds_finfo->rf_vp;
7134 
7135         if (error = makefh4(&cs->fh, cs->vp, cs->exi)) {
7136                 rfs4_deleg_state_rele(dsp);
7137                 *cs->statusp = resp->status = puterrno4(error);
7138                 return;
7139         }
7140 
7141         /* Mark progress for delegation returns */
7142         dsp->rds_finfo->rf_dinfo.rd_time_lastwrite = gethrestime_sec();
7143         rfs4_deleg_state_rele(dsp);
7144         rfs4_do_open(cs, req, oo, DELEG_NONE,
7145             args->share_access, args->share_deny, resp, 1);
7146 }
7147 
7148 /*ARGSUSED*/
7149 static void
7150 rfs4_do_opendelprev(struct compound_state *cs, struct svc_req *req,
7151     OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
7152 {
7153         /*
7154          * Lookup the pathname, it must already exist since this file
7155          * was delegated.
7156          *
7157          * Find the file and state info for this vp and open owner pair.
7158          *      check that they are in fact delegated.
7159          *      check that the state access and deny modes are the same.
7160          *
7161          * Return the delgation possibly seting the recall flag.
7162          */
7163         rfs4_file_t *fp;
7164         rfs4_state_t *sp;
7165         bool_t create = FALSE;
7166         bool_t dcreate = FALSE;
7167         rfs4_deleg_state_t *dsp;
7168         nfsace4 *ace;
7169 
7170         /* Note we ignore oflags */
7171         resp->status = rfs4_lookupfile(&args->open_claim4_u.file_delegate_prev,
7172             req, cs, args->share_access, &resp->cinfo);
7173 
7174         if (resp->status != NFS4_OK) {
7175                 return;
7176         }
7177 
7178         /* get the file struct and hold a lock on it during initial open */
7179         fp = rfs4_findfile_withlock(cs->vp, NULL, &create);
7180         if (fp == NULL) {
7181                 resp->status = NFS4ERR_RESOURCE;
7182                 DTRACE_PROBE1(nfss__e__do_opendelprev1, nfsstat4, resp->status);
7183                 return;
7184         }
7185 
7186         sp = rfs4_findstate_by_owner_file(oo, fp, &create);
7187         if (sp == NULL) {
7188                 resp->status = NFS4ERR_SERVERFAULT;
7189                 DTRACE_PROBE1(nfss__e__do_opendelprev2, nfsstat4, resp->status);
7190                 rw_exit(&fp->rf_file_rwlock);
7191                 rfs4_file_rele(fp);
7192                 return;
7193         }
7194 
7195         rfs4_dbe_lock(sp->rs_dbe);
7196         rfs4_dbe_lock(fp->rf_dbe);
7197         if (args->share_access != sp->rs_share_access ||
7198             args->share_deny != sp->rs_share_deny ||
7199             sp->rs_finfo->rf_dinfo.rd_dtype == OPEN_DELEGATE_NONE) {
7200                 NFS4_DEBUG(rfs4_debug,
7201                     (CE_NOTE, "rfs4_do_opendelprev: state mixup"));
7202                 rfs4_dbe_unlock(fp->rf_dbe);
7203                 rfs4_dbe_unlock(sp->rs_dbe);
7204                 rfs4_file_rele(fp);
7205                 rfs4_state_rele(sp);
7206                 resp->status = NFS4ERR_SERVERFAULT;
7207                 return;
7208         }
7209         rfs4_dbe_unlock(fp->rf_dbe);
7210         rfs4_dbe_unlock(sp->rs_dbe);
7211 
7212         dsp = rfs4_finddeleg(sp, &dcreate);
7213         if (dsp == NULL) {
7214                 rfs4_state_rele(sp);
7215                 rfs4_file_rele(fp);
7216                 resp->status = NFS4ERR_SERVERFAULT;
7217                 return;
7218         }
7219 
7220         next_stateid(&sp->rs_stateid);
7221 
7222         resp->stateid = sp->rs_stateid.stateid;
7223 
7224         resp->delegation.delegation_type = dsp->rds_dtype;
7225 
7226         if (dsp->rds_dtype == OPEN_DELEGATE_READ) {
7227                 open_read_delegation4 *rv =
7228                     &resp->delegation.open_delegation4_u.read;
7229 
7230                 rv->stateid = dsp->rds_delegid.stateid;
7231                 rv->recall = FALSE; /* no policy in place to set to TRUE */
7232                 ace = &rv->permissions;
7233         } else {
7234                 open_write_delegation4 *rv =
7235                     &resp->delegation.open_delegation4_u.write;
7236 
7237                 rv->stateid = dsp->rds_delegid.stateid;
7238                 rv->recall = FALSE;  /* no policy in place to set to TRUE */
7239                 ace = &rv->permissions;
7240                 rv->space_limit.limitby = NFS_LIMIT_SIZE;
7241                 rv->space_limit.nfs_space_limit4_u.filesize = UINT64_MAX;
7242         }
7243 
7244         /* XXX For now */
7245         ace->type = ACE4_ACCESS_ALLOWED_ACE_TYPE;
7246         ace->flag = 0;
7247         ace->access_mask = 0;
7248         ace->who.utf8string_len = 0;
7249         ace->who.utf8string_val = 0;
7250 
7251         rfs4_deleg_state_rele(dsp);
7252         rfs4_state_rele(sp);
7253         rfs4_file_rele(fp);
7254 }
7255 
7256 typedef enum {
7257         NFS4_CHKSEQ_OKAY = 0,
7258         NFS4_CHKSEQ_REPLAY = 1,
7259         NFS4_CHKSEQ_BAD = 2
7260 } rfs4_chkseq_t;
7261 
7262 /*
7263  * Generic function for sequence number checks.
7264  */
7265 static rfs4_chkseq_t
7266 rfs4_check_seqid(seqid4 seqid, nfs_resop4 *lastop,
7267     seqid4 rqst_seq, nfs_resop4 *resop, bool_t copyres)
7268 {
7269         /* Same sequence ids and matching operations? */
7270         if (seqid == rqst_seq && resop->resop == lastop->resop) {
7271                 if (copyres == TRUE) {
7272                         rfs4_free_reply(resop);
7273                         rfs4_copy_reply(resop, lastop);
7274                 }
7275                 NFS4_DEBUG(rfs4_debug, (CE_NOTE,
7276                     "Replayed SEQID %d\n", seqid));
7277                 return (NFS4_CHKSEQ_REPLAY);
7278         }
7279 
7280         /* If the incoming sequence is not the next expected then it is bad */
7281         if (rqst_seq != seqid + 1) {
7282                 if (rqst_seq == seqid) {
7283                         NFS4_DEBUG(rfs4_debug,
7284                             (CE_NOTE, "BAD SEQID: Replayed sequence id "
7285                             "but last op was %d current op is %d\n",
7286                             lastop->resop, resop->resop));
7287                         return (NFS4_CHKSEQ_BAD);
7288                 }
7289                 NFS4_DEBUG(rfs4_debug,
7290                     (CE_NOTE, "BAD SEQID: got %u expecting %u\n",
7291                     rqst_seq, seqid));
7292                 return (NFS4_CHKSEQ_BAD);
7293         }
7294 
7295         /* Everything okay -- next expected */
7296         return (NFS4_CHKSEQ_OKAY);
7297 }
7298 
7299 
7300 static rfs4_chkseq_t
7301 rfs4_check_open_seqid(seqid4 seqid, rfs4_openowner_t *op, nfs_resop4 *resop)
7302 {
7303         rfs4_chkseq_t rc;
7304 
7305         rfs4_dbe_lock(op->ro_dbe);
7306         rc = rfs4_check_seqid(op->ro_open_seqid, &op->ro_reply, seqid, resop,
7307             TRUE);
7308         rfs4_dbe_unlock(op->ro_dbe);
7309 
7310         if (rc == NFS4_CHKSEQ_OKAY)
7311                 rfs4_update_lease(op->ro_client);
7312 
7313         return (rc);
7314 }
7315 
7316 static rfs4_chkseq_t
7317 rfs4_check_olo_seqid(seqid4 olo_seqid, rfs4_openowner_t *op, nfs_resop4 *resop)
7318 {
7319         rfs4_chkseq_t rc;
7320 
7321         rfs4_dbe_lock(op->ro_dbe);
7322         rc = rfs4_check_seqid(op->ro_open_seqid, &op->ro_reply,
7323             olo_seqid, resop, FALSE);
7324         rfs4_dbe_unlock(op->ro_dbe);
7325 
7326         return (rc);
7327 }
7328 
7329 static rfs4_chkseq_t
7330 rfs4_check_lock_seqid(seqid4 seqid, rfs4_lo_state_t *lsp, nfs_resop4 *resop)
7331 {
7332         rfs4_chkseq_t rc = NFS4_CHKSEQ_OKAY;
7333 
7334         rfs4_dbe_lock(lsp->rls_dbe);
7335         if (!lsp->rls_skip_seqid_check)
7336                 rc = rfs4_check_seqid(lsp->rls_seqid, &lsp->rls_reply, seqid,
7337                     resop, TRUE);
7338         rfs4_dbe_unlock(lsp->rls_dbe);
7339 
7340         return (rc);
7341 }
7342 
7343 static void
7344 rfs4_op_open(nfs_argop4 *argop, nfs_resop4 *resop,
7345     struct svc_req *req, struct compound_state *cs)
7346 {
7347         OPEN4args *args = &argop->nfs_argop4_u.opopen;
7348         OPEN4res *resp = &resop->nfs_resop4_u.opopen;
7349         open_owner4 *owner = &args->owner;
7350         open_claim_type4 claim = args->claim;
7351         rfs4_client_t *cp;
7352         rfs4_openowner_t *oo;
7353         bool_t create;
7354         bool_t replay = FALSE;
7355         int can_reclaim;
7356 
7357         DTRACE_NFSV4_2(op__open__start, struct compound_state *, cs,
7358             OPEN4args *, args);
7359 
7360         if (cs->vp == NULL) {
7361                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
7362                 goto end;
7363         }
7364 
7365         /*
7366          * Need to check clientid and lease expiration first based on
7367          * error ordering and incrementing sequence id.
7368          */
7369         cp = rfs4_findclient_by_id(owner->clientid, FALSE);
7370         if (cp == NULL) {
7371                 *cs->statusp = resp->status =
7372                     rfs4_check_clientid(&owner->clientid, 0);
7373                 goto end;
7374         }
7375 
7376         if (rfs4_lease_expired(cp)) {
7377                 rfs4_client_close(cp);
7378                 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
7379                 goto end;
7380         }
7381         can_reclaim = cp->rc_can_reclaim;
7382 
7383         /*
7384          * Find the open_owner for use from this point forward.  Take
7385          * care in updating the sequence id based on the type of error
7386          * being returned.
7387          */
7388 retry:
7389         create = TRUE;
7390         oo = rfs4_findopenowner(owner, &create, args->seqid);
7391         if (oo == NULL) {
7392                 *cs->statusp = resp->status = NFS4ERR_RESOURCE;
7393                 rfs4_client_rele(cp);
7394                 goto end;
7395         }
7396 
7397         /* Hold off access to the sequence space while the open is done */
7398         rfs4_sw_enter(&oo->ro_sw);
7399 
7400         /*
7401          * If the open_owner existed before at the server, then check
7402          * the sequence id.
7403          */
7404         if (!create && !oo->ro_postpone_confirm) {
7405                 switch (rfs4_check_open_seqid(args->seqid, oo, resop)) {
7406                 case NFS4_CHKSEQ_BAD:
7407                         if ((args->seqid > oo->ro_open_seqid) &&
7408                             oo->ro_need_confirm) {
7409                                 rfs4_free_opens(oo, TRUE, FALSE);
7410                                 rfs4_sw_exit(&oo->ro_sw);
7411                                 rfs4_openowner_rele(oo);
7412                                 goto retry;
7413                         }
7414                         resp->status = NFS4ERR_BAD_SEQID;
7415                         goto out;
7416                 case NFS4_CHKSEQ_REPLAY: /* replay of previous request */
7417                         replay = TRUE;
7418                         goto out;
7419                 default:
7420                         break;
7421                 }
7422 
7423                 /*
7424                  * Sequence was ok and open owner exists
7425                  * check to see if we have yet to see an
7426                  * open_confirm.
7427                  */
7428                 if (oo->ro_need_confirm) {
7429                         rfs4_free_opens(oo, TRUE, FALSE);
7430                         rfs4_sw_exit(&oo->ro_sw);
7431                         rfs4_openowner_rele(oo);
7432                         goto retry;
7433                 }
7434         }
7435         /* Grace only applies to regular-type OPENs */
7436         if (rfs4_clnt_in_grace(cp) &&
7437             (claim == CLAIM_NULL || claim == CLAIM_DELEGATE_CUR)) {
7438                 *cs->statusp = resp->status = NFS4ERR_GRACE;
7439                 goto out;
7440         }
7441 
7442         /*
7443          * If previous state at the server existed then can_reclaim
7444          * will be set. If not reply NFS4ERR_NO_GRACE to the
7445          * client.
7446          */
7447         if (rfs4_clnt_in_grace(cp) && claim == CLAIM_PREVIOUS && !can_reclaim) {
7448                 *cs->statusp = resp->status = NFS4ERR_NO_GRACE;
7449                 goto out;
7450         }
7451 
7452 
7453         /*
7454          * Reject the open if the client has missed the grace period
7455          */
7456         if (!rfs4_clnt_in_grace(cp) && claim == CLAIM_PREVIOUS) {
7457                 *cs->statusp = resp->status = NFS4ERR_NO_GRACE;
7458                 goto out;
7459         }
7460 
7461         /* Couple of up-front bookkeeping items */
7462         if (oo->ro_need_confirm) {
7463                 /*
7464                  * If this is a reclaim OPEN then we should not ask
7465                  * for a confirmation of the open_owner per the
7466                  * protocol specification.
7467                  */
7468                 if (claim == CLAIM_PREVIOUS)
7469                         oo->ro_need_confirm = FALSE;
7470                 else
7471                         resp->rflags |= OPEN4_RESULT_CONFIRM;
7472         }
7473         resp->rflags |= OPEN4_RESULT_LOCKTYPE_POSIX;
7474 
7475         /*
7476          * If there is an unshared filesystem mounted on this vnode,
7477          * do not allow to open/create in this directory.
7478          */
7479         if (vn_ismntpt(cs->vp)) {
7480                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
7481                 goto out;
7482         }
7483 
7484         /*
7485          * access must READ, WRITE, or BOTH.  No access is invalid.
7486          * deny can be READ, WRITE, BOTH, or NONE.
7487          * bits not defined for access/deny are invalid.
7488          */
7489         if (! (args->share_access & OPEN4_SHARE_ACCESS_BOTH) ||
7490             (args->share_access & ~OPEN4_SHARE_ACCESS_BOTH) ||
7491             (args->share_deny & ~OPEN4_SHARE_DENY_BOTH)) {
7492                 *cs->statusp = resp->status = NFS4ERR_INVAL;
7493                 goto out;
7494         }
7495 
7496 
7497         /*
7498          * make sure attrset is zero before response is built.
7499          */
7500         resp->attrset = 0;
7501 
7502         switch (claim) {
7503         case CLAIM_NULL:
7504                 rfs4_do_opennull(cs, req, args, oo, resp);
7505                 break;
7506         case CLAIM_PREVIOUS:
7507                 rfs4_do_openprev(cs, req, args, oo, resp);
7508                 break;
7509         case CLAIM_DELEGATE_CUR:
7510                 rfs4_do_opendelcur(cs, req, args, oo, resp);
7511                 break;
7512         case CLAIM_DELEGATE_PREV:
7513                 rfs4_do_opendelprev(cs, req, args, oo, resp);
7514                 break;
7515         default:
7516                 resp->status = NFS4ERR_INVAL;
7517                 break;
7518         }
7519 
7520 out:
7521         rfs4_client_rele(cp);
7522 
7523         /* Catch sequence id handling here to make it a little easier */
7524         switch (resp->status) {
7525         case NFS4ERR_BADXDR:
7526         case NFS4ERR_BAD_SEQID:
7527         case NFS4ERR_BAD_STATEID:
7528         case NFS4ERR_NOFILEHANDLE:
7529         case NFS4ERR_RESOURCE:
7530         case NFS4ERR_STALE_CLIENTID:
7531         case NFS4ERR_STALE_STATEID:
7532                 /*
7533                  * The protocol states that if any of these errors are
7534                  * being returned, the sequence id should not be
7535                  * incremented.  Any other return requires an
7536                  * increment.
7537                  */
7538                 break;
7539         default:
7540                 /* Always update the lease in this case */
7541                 rfs4_update_lease(oo->ro_client);
7542 
7543                 /* Regular response - copy the result */
7544                 if (!replay)
7545                         rfs4_update_open_resp(oo, resop, &cs->fh);
7546 
7547                 /*
7548                  * REPLAY case: Only if the previous response was OK
7549                  * do we copy the filehandle.  If not OK, no
7550                  * filehandle to copy.
7551                  */
7552                 if (replay == TRUE &&
7553                     resp->status == NFS4_OK &&
7554                     oo->ro_reply_fh.nfs_fh4_val) {
7555                         /*
7556                          * If this is a replay, we must restore the
7557                          * current filehandle/vp to that of what was
7558                          * returned originally.  Try our best to do
7559                          * it.
7560                          */
7561                         nfs_fh4_fmt_t *fh_fmtp =
7562                             (nfs_fh4_fmt_t *)oo->ro_reply_fh.nfs_fh4_val;
7563 
7564                         cs->exi = checkexport4(&fh_fmtp->fh4_fsid,
7565                             (fid_t *)&fh_fmtp->fh4_xlen, NULL);
7566 
7567                         if (cs->exi == NULL) {
7568                                 resp->status = NFS4ERR_STALE;
7569                                 goto finish;
7570                         }
7571 
7572                         VN_RELE(cs->vp);
7573 
7574                         cs->vp = nfs4_fhtovp(&oo->ro_reply_fh, cs->exi,
7575                             &resp->status);
7576 
7577                         if (cs->vp == NULL)
7578                                 goto finish;
7579 
7580                         nfs_fh4_copy(&oo->ro_reply_fh, &cs->fh);
7581                 }
7582 
7583                 /*
7584                  * If this was a replay, no need to update the
7585                  * sequence id. If the open_owner was not created on
7586                  * this pass, then update.  The first use of an
7587                  * open_owner will not bump the sequence id.
7588                  */
7589                 if (replay == FALSE && !create)
7590                         rfs4_update_open_sequence(oo);
7591                 /*
7592                  * If the client is receiving an error and the
7593                  * open_owner needs to be confirmed, there is no way
7594                  * to notify the client of this fact ignoring the fact
7595                  * that the server has no method of returning a
7596                  * stateid to confirm.  Therefore, the server needs to
7597                  * mark this open_owner in a way as to avoid the
7598                  * sequence id checking the next time the client uses
7599                  * this open_owner.
7600                  */
7601                 if (resp->status != NFS4_OK && oo->ro_need_confirm)
7602                         oo->ro_postpone_confirm = TRUE;
7603                 /*
7604                  * If OK response then clear the postpone flag and
7605                  * reset the sequence id to keep in sync with the
7606                  * client.
7607                  */
7608                 if (resp->status == NFS4_OK && oo->ro_postpone_confirm) {
7609                         oo->ro_postpone_confirm = FALSE;
7610                         oo->ro_open_seqid = args->seqid;
7611                 }
7612                 break;
7613         }
7614 
7615 finish:
7616         *cs->statusp = resp->status;
7617 
7618         rfs4_sw_exit(&oo->ro_sw);
7619         rfs4_openowner_rele(oo);
7620 
7621 end:
7622         DTRACE_NFSV4_2(op__open__done, struct compound_state *, cs,
7623             OPEN4res *, resp);
7624 }
7625 
7626 /*ARGSUSED*/
7627 void
7628 rfs4_op_open_confirm(nfs_argop4 *argop, nfs_resop4 *resop,
7629     struct svc_req *req, struct compound_state *cs)
7630 {
7631         OPEN_CONFIRM4args *args = &argop->nfs_argop4_u.opopen_confirm;
7632         OPEN_CONFIRM4res *resp = &resop->nfs_resop4_u.opopen_confirm;
7633         rfs4_state_t *sp;
7634         nfsstat4 status;
7635 
7636         DTRACE_NFSV4_2(op__open__confirm__start, struct compound_state *, cs,
7637             OPEN_CONFIRM4args *, args);
7638 
7639         if (cs->vp == NULL) {
7640                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
7641                 goto out;
7642         }
7643 
7644         if (cs->vp->v_type != VREG) {
7645                 *cs->statusp = resp->status =
7646                     cs->vp->v_type == VDIR ? NFS4ERR_ISDIR : NFS4ERR_INVAL;
7647                 return;
7648         }
7649 
7650         status = rfs4_get_state(&args->open_stateid, &sp, RFS4_DBS_VALID);
7651         if (status != NFS4_OK) {
7652                 *cs->statusp = resp->status = status;
7653                 goto out;
7654         }
7655 
7656         /* Ensure specified filehandle matches */
7657         if (cs->vp != sp->rs_finfo->rf_vp) {
7658                 rfs4_state_rele(sp);
7659                 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7660                 goto out;
7661         }
7662 
7663         /* hold off other access to open_owner while we tinker */
7664         rfs4_sw_enter(&sp->rs_owner->ro_sw);
7665 
7666         switch (rfs4_check_stateid_seqid(sp, &args->open_stateid)) {
7667         case NFS4_CHECK_STATEID_OKAY:
7668                 if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7669                     resop) != 0) {
7670                         *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7671                         break;
7672                 }
7673                 /*
7674                  * If it is the appropriate stateid and determined to
7675                  * be "OKAY" then this means that the stateid does not
7676                  * need to be confirmed and the client is in error for
7677                  * sending an OPEN_CONFIRM.
7678                  */
7679                 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7680                 break;
7681         case NFS4_CHECK_STATEID_OLD:
7682                 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7683                 break;
7684         case NFS4_CHECK_STATEID_BAD:
7685                 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7686                 break;
7687         case NFS4_CHECK_STATEID_EXPIRED:
7688                 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
7689                 break;
7690         case NFS4_CHECK_STATEID_CLOSED:
7691                 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7692                 break;
7693         case NFS4_CHECK_STATEID_REPLAY:
7694                 switch (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7695                     resop)) {
7696                 case NFS4_CHKSEQ_OKAY:
7697                         /*
7698                          * This is replayed stateid; if seqid matches
7699                          * next expected, then client is using wrong seqid.
7700                          */
7701                         /* fall through */
7702                 case NFS4_CHKSEQ_BAD:
7703                         *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7704                         break;
7705                 case NFS4_CHKSEQ_REPLAY:
7706                         /*
7707                          * Note this case is the duplicate case so
7708                          * resp->status is already set.
7709                          */
7710                         *cs->statusp = resp->status;
7711                         rfs4_update_lease(sp->rs_owner->ro_client);
7712                         break;
7713                 }
7714                 break;
7715         case NFS4_CHECK_STATEID_UNCONFIRMED:
7716                 if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7717                     resop) != NFS4_CHKSEQ_OKAY) {
7718                         *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7719                         break;
7720                 }
7721                 *cs->statusp = resp->status = NFS4_OK;
7722 
7723                 next_stateid(&sp->rs_stateid);
7724                 resp->open_stateid = sp->rs_stateid.stateid;
7725                 sp->rs_owner->ro_need_confirm = FALSE;
7726                 rfs4_update_lease(sp->rs_owner->ro_client);
7727                 rfs4_update_open_sequence(sp->rs_owner);
7728                 rfs4_update_open_resp(sp->rs_owner, resop, NULL);
7729                 break;
7730         default:
7731                 ASSERT(FALSE);
7732                 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
7733                 break;
7734         }
7735         rfs4_sw_exit(&sp->rs_owner->ro_sw);
7736         rfs4_state_rele(sp);
7737 
7738 out:
7739         DTRACE_NFSV4_2(op__open__confirm__done, struct compound_state *, cs,
7740             OPEN_CONFIRM4res *, resp);
7741 }
7742 
7743 /*ARGSUSED*/
7744 void
7745 rfs4_op_open_downgrade(nfs_argop4 *argop, nfs_resop4 *resop,
7746     struct svc_req *req, struct compound_state *cs)
7747 {
7748         OPEN_DOWNGRADE4args *args = &argop->nfs_argop4_u.opopen_downgrade;
7749         OPEN_DOWNGRADE4res *resp = &resop->nfs_resop4_u.opopen_downgrade;
7750         uint32_t access = args->share_access;
7751         uint32_t deny = args->share_deny;
7752         nfsstat4 status;
7753         rfs4_state_t *sp;
7754         rfs4_file_t *fp;
7755         int fflags = 0;
7756 
7757         DTRACE_NFSV4_2(op__open__downgrade__start, struct compound_state *, cs,
7758             OPEN_DOWNGRADE4args *, args);
7759 
7760         if (cs->vp == NULL) {
7761                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
7762                 goto out;
7763         }
7764 
7765         if (cs->vp->v_type != VREG) {
7766                 *cs->statusp = resp->status = NFS4ERR_INVAL;
7767                 return;
7768         }
7769 
7770         status = rfs4_get_state(&args->open_stateid, &sp, RFS4_DBS_VALID);
7771         if (status != NFS4_OK) {
7772                 *cs->statusp = resp->status = status;
7773                 goto out;
7774         }
7775 
7776         /* Ensure specified filehandle matches */
7777         if (cs->vp != sp->rs_finfo->rf_vp) {
7778                 rfs4_state_rele(sp);
7779                 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7780                 goto out;
7781         }
7782 
7783         /* hold off other access to open_owner while we tinker */
7784         rfs4_sw_enter(&sp->rs_owner->ro_sw);
7785 
7786         switch (rfs4_check_stateid_seqid(sp, &args->open_stateid)) {
7787         case NFS4_CHECK_STATEID_OKAY:
7788                 if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7789                     resop) != NFS4_CHKSEQ_OKAY) {
7790                         *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7791                         goto end;
7792                 }
7793                 break;
7794         case NFS4_CHECK_STATEID_OLD:
7795                 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7796                 goto end;
7797         case NFS4_CHECK_STATEID_BAD:
7798                 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7799                 goto end;
7800         case NFS4_CHECK_STATEID_EXPIRED:
7801                 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
7802                 goto end;
7803         case NFS4_CHECK_STATEID_CLOSED:
7804                 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7805                 goto end;
7806         case NFS4_CHECK_STATEID_UNCONFIRMED:
7807                 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7808                 goto end;
7809         case NFS4_CHECK_STATEID_REPLAY:
7810                 /* Check the sequence id for the open owner */
7811                 switch (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7812                     resop)) {
7813                 case NFS4_CHKSEQ_OKAY:
7814                         /*
7815                          * This is replayed stateid; if seqid matches
7816                          * next expected, then client is using wrong seqid.
7817                          */
7818                         /* fall through */
7819                 case NFS4_CHKSEQ_BAD:
7820                         *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7821                         goto end;
7822                 case NFS4_CHKSEQ_REPLAY:
7823                         /*
7824                          * Note this case is the duplicate case so
7825                          * resp->status is already set.
7826                          */
7827                         *cs->statusp = resp->status;
7828                         rfs4_update_lease(sp->rs_owner->ro_client);
7829                         goto end;
7830                 }
7831                 break;
7832         default:
7833                 ASSERT(FALSE);
7834                 break;
7835         }
7836 
7837         rfs4_dbe_lock(sp->rs_dbe);
7838         /*
7839          * Check that the new access modes and deny modes are valid.
7840          * Check that no invalid bits are set.
7841          */
7842         if ((access & ~(OPEN4_SHARE_ACCESS_READ | OPEN4_SHARE_ACCESS_WRITE)) ||
7843             (deny & ~(OPEN4_SHARE_DENY_READ | OPEN4_SHARE_DENY_WRITE))) {
7844                 *cs->statusp = resp->status = NFS4ERR_INVAL;
7845                 rfs4_update_open_sequence(sp->rs_owner);
7846                 rfs4_dbe_unlock(sp->rs_dbe);
7847                 goto end;
7848         }
7849 
7850         /*
7851          * The new modes must be a subset of the current modes and
7852          * the access must specify at least one mode. To test that
7853          * the new mode is a subset of the current modes we bitwise
7854          * AND them together and check that the result equals the new
7855          * mode. For example:
7856          * New mode, access == R and current mode, sp->rs_open_access  == RW
7857          * access & sp->rs_open_access == R == access, so the new access mode
7858          * is valid. Consider access == RW, sp->rs_open_access = R
7859          * access & sp->rs_open_access == R != access, so the new access mode
7860          * is invalid.
7861          */
7862         if ((access & sp->rs_open_access) != access ||
7863             (deny & sp->rs_open_deny) != deny ||
7864             (access &
7865             (OPEN4_SHARE_ACCESS_READ | OPEN4_SHARE_ACCESS_WRITE)) == 0) {
7866                 *cs->statusp = resp->status = NFS4ERR_INVAL;
7867                 rfs4_update_open_sequence(sp->rs_owner);
7868                 rfs4_dbe_unlock(sp->rs_dbe);
7869                 goto end;
7870         }
7871 
7872         /*
7873          * Release any share locks associated with this stateID.
7874          * Strictly speaking, this violates the spec because the
7875          * spec effectively requires that open downgrade be atomic.
7876          * At present, fs_shrlock does not have this capability.
7877          */
7878         (void) rfs4_unshare(sp);
7879 
7880         status = rfs4_share(sp, access, deny);
7881         if (status != NFS4_OK) {
7882                 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
7883                 rfs4_update_open_sequence(sp->rs_owner);
7884                 rfs4_dbe_unlock(sp->rs_dbe);
7885                 goto end;
7886         }
7887 
7888         fp = sp->rs_finfo;
7889         rfs4_dbe_lock(fp->rf_dbe);
7890 
7891         /*
7892          * If the current mode has deny read and the new mode
7893          * does not, decrement the number of deny read mode bits
7894          * and if it goes to zero turn off the deny read bit
7895          * on the file.
7896          */
7897         if ((sp->rs_open_deny & OPEN4_SHARE_DENY_READ) &&
7898             (deny & OPEN4_SHARE_DENY_READ) == 0) {
7899                 fp->rf_deny_read--;
7900                 if (fp->rf_deny_read == 0)
7901                         fp->rf_share_deny &= ~OPEN4_SHARE_DENY_READ;
7902         }
7903 
7904         /*
7905          * If the current mode has deny write and the new mode
7906          * does not, decrement the number of deny write mode bits
7907          * and if it goes to zero turn off the deny write bit
7908          * on the file.
7909          */
7910         if ((sp->rs_open_deny & OPEN4_SHARE_DENY_WRITE) &&
7911             (deny & OPEN4_SHARE_DENY_WRITE) == 0) {
7912                 fp->rf_deny_write--;
7913                 if (fp->rf_deny_write == 0)
7914                         fp->rf_share_deny &= ~OPEN4_SHARE_DENY_WRITE;
7915         }
7916 
7917         /*
7918          * If the current mode has access read and the new mode
7919          * does not, decrement the number of access read mode bits
7920          * and if it goes to zero turn off the access read bit
7921          * on the file.  set fflags to FREAD for the call to
7922          * vn_open_downgrade().
7923          */
7924         if ((sp->rs_open_access & OPEN4_SHARE_ACCESS_READ) &&
7925             (access & OPEN4_SHARE_ACCESS_READ) == 0) {
7926                 fp->rf_access_read--;
7927                 if (fp->rf_access_read == 0)
7928                         fp->rf_share_access &= ~OPEN4_SHARE_ACCESS_READ;
7929                 fflags |= FREAD;
7930         }
7931 
7932         /*
7933          * If the current mode has access write and the new mode
7934          * does not, decrement the number of access write mode bits
7935          * and if it goes to zero turn off the access write bit
7936          * on the file.  set fflags to FWRITE for the call to
7937          * vn_open_downgrade().
7938          */
7939         if ((sp->rs_open_access & OPEN4_SHARE_ACCESS_WRITE) &&
7940             (access & OPEN4_SHARE_ACCESS_WRITE) == 0) {
7941                 fp->rf_access_write--;
7942                 if (fp->rf_access_write == 0)
7943                         fp->rf_share_deny &= ~OPEN4_SHARE_ACCESS_WRITE;
7944                 fflags |= FWRITE;
7945         }
7946 
7947         /* Check that the file is still accessible */
7948         ASSERT(fp->rf_share_access);
7949 
7950         rfs4_dbe_unlock(fp->rf_dbe);
7951 
7952         /* now set the new open access and deny modes */
7953         sp->rs_open_access = access;
7954         sp->rs_open_deny = deny;
7955 
7956         /*
7957          * we successfully downgraded the share lock, now we need to downgrade
7958          * the open. it is possible that the downgrade was only for a deny
7959          * mode and we have nothing else to do.
7960          */
7961         if ((fflags & (FREAD|FWRITE)) != 0)
7962                 vn_open_downgrade(cs->vp, fflags);
7963 
7964         /* Update the stateid */
7965         next_stateid(&sp->rs_stateid);
7966         resp->open_stateid = sp->rs_stateid.stateid;
7967 
7968         rfs4_dbe_unlock(sp->rs_dbe);
7969 
7970         *cs->statusp = resp->status = NFS4_OK;
7971         /* Update the lease */
7972         rfs4_update_lease(sp->rs_owner->ro_client);
7973         /* And the sequence */
7974         rfs4_update_open_sequence(sp->rs_owner);
7975         rfs4_update_open_resp(sp->rs_owner, resop, NULL);
7976 
7977 end:
7978         rfs4_sw_exit(&sp->rs_owner->ro_sw);
7979         rfs4_state_rele(sp);
7980 out:
7981         DTRACE_NFSV4_2(op__open__downgrade__done, struct compound_state *, cs,
7982             OPEN_DOWNGRADE4res *, resp);
7983 }
7984 
7985 static void *
7986 memstr(const void *s1, const char *s2, size_t n)
7987 {
7988         size_t l = strlen(s2);
7989         char *p = (char *)s1;
7990 
7991         while (n >= l) {
7992                 if (bcmp(p, s2, l) == 0)
7993                         return (p);
7994                 p++;
7995                 n--;
7996         }
7997 
7998         return (NULL);
7999 }
8000 
8001 /*
8002  * The logic behind this function is detailed in the NFSv4 RFC in the
8003  * SETCLIENTID operation description under IMPLEMENTATION.  Refer to
8004  * that section for explicit guidance to server behavior for
8005  * SETCLIENTID.
8006  */
8007 void
8008 rfs4_op_setclientid(nfs_argop4 *argop, nfs_resop4 *resop,
8009     struct svc_req *req, struct compound_state *cs)
8010 {
8011         SETCLIENTID4args *args = &argop->nfs_argop4_u.opsetclientid;
8012         SETCLIENTID4res *res = &resop->nfs_resop4_u.opsetclientid;
8013         rfs4_client_t *cp, *newcp, *cp_confirmed, *cp_unconfirmed;
8014         rfs4_clntip_t *ci;
8015         bool_t create;
8016         char *addr, *netid;
8017         int len;
8018 
8019         DTRACE_NFSV4_2(op__setclientid__start, struct compound_state *, cs,
8020             SETCLIENTID4args *, args);
8021 retry:
8022         newcp = cp_confirmed = cp_unconfirmed = NULL;
8023 
8024         /*
8025          * Save the caller's IP address
8026          */
8027         args->client.cl_addr =
8028             (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
8029 
8030         /*
8031          * Record if it is a Solaris client that cannot handle referrals.
8032          */
8033         if (memstr(args->client.id_val, "Solaris", args->client.id_len) &&
8034             !memstr(args->client.id_val, "+referrals", args->client.id_len)) {
8035                 /* Add a "yes, it's downrev" record */
8036                 create = TRUE;
8037                 ci = rfs4_find_clntip(args->client.cl_addr, &create);
8038                 ASSERT(ci != NULL);
8039                 rfs4_dbe_rele(ci->ri_dbe);
8040         } else {
8041                 /* Remove any previous record */
8042                 rfs4_invalidate_clntip(args->client.cl_addr);
8043         }
8044 
8045         /*
8046          * In search of an EXISTING client matching the incoming
8047          * request to establish a new client identifier at the server
8048          */
8049         create = TRUE;
8050         cp = rfs4_findclient(&args->client, &create, NULL);
8051 
8052         /* Should never happen */
8053         ASSERT(cp != NULL);
8054 
8055         if (cp == NULL) {
8056                 *cs->statusp = res->status = NFS4ERR_SERVERFAULT;
8057                 goto out;
8058         }
8059 
8060         /*
8061          * Easiest case. Client identifier is newly created and is
8062          * unconfirmed.  Also note that for this case, no other
8063          * entries exist for the client identifier.  Nothing else to
8064          * check.  Just setup the response and respond.
8065          */
8066         if (create) {
8067                 *cs->statusp = res->status = NFS4_OK;
8068                 res->SETCLIENTID4res_u.resok4.clientid = cp->rc_clientid;
8069                 res->SETCLIENTID4res_u.resok4.setclientid_confirm =
8070                     cp->rc_confirm_verf;
8071                 /* Setup callback information; CB_NULL confirmation later */
8072                 rfs4_client_setcb(cp, &args->callback, args->callback_ident);
8073 
8074                 rfs4_client_rele(cp);
8075                 goto out;
8076         }
8077 
8078         /*
8079          * An existing, confirmed client may exist but it may not have
8080          * been active for at least one lease period.  If so, then
8081          * "close" the client and create a new client identifier
8082          */
8083         if (rfs4_lease_expired(cp)) {
8084                 rfs4_client_close(cp);
8085                 goto retry;
8086         }
8087 
8088         if (cp->rc_need_confirm == TRUE)
8089                 cp_unconfirmed = cp;
8090         else
8091                 cp_confirmed = cp;
8092 
8093         cp = NULL;
8094 
8095         /*
8096          * We have a confirmed client, now check for an
8097          * unconfimred entry
8098          */
8099         if (cp_confirmed) {
8100                 /* If creds don't match then client identifier is inuse */
8101                 if (!creds_ok(cp_confirmed->rc_cr_set, req, cs)) {
8102                         rfs4_cbinfo_t *cbp;
8103                         /*
8104                          * Some one else has established this client
8105                          * id. Try and say * who they are. We will use
8106                          * the call back address supplied by * the
8107                          * first client.
8108                          */
8109                         *cs->statusp = res->status = NFS4ERR_CLID_INUSE;
8110 
8111                         addr = netid = NULL;
8112 
8113                         cbp = &cp_confirmed->rc_cbinfo;
8114                         if (cbp->cb_callback.cb_location.r_addr &&
8115                             cbp->cb_callback.cb_location.r_netid) {
8116                                 cb_client4 *cbcp = &cbp->cb_callback;
8117 
8118                                 len = strlen(cbcp->cb_location.r_addr)+1;
8119                                 addr = kmem_alloc(len, KM_SLEEP);
8120                                 bcopy(cbcp->cb_location.r_addr, addr, len);
8121                                 len = strlen(cbcp->cb_location.r_netid)+1;
8122                                 netid = kmem_alloc(len, KM_SLEEP);
8123                                 bcopy(cbcp->cb_location.r_netid, netid, len);
8124                         }
8125 
8126                         res->SETCLIENTID4res_u.client_using.r_addr = addr;
8127                         res->SETCLIENTID4res_u.client_using.r_netid = netid;
8128 
8129                         rfs4_client_rele(cp_confirmed);
8130                 }
8131 
8132                 /*
8133                  * Confirmed, creds match, and verifier matches; must
8134                  * be an update of the callback info
8135                  */
8136                 if (cp_confirmed->rc_nfs_client.verifier ==
8137                     args->client.verifier) {
8138                         /* Setup callback information */
8139                         rfs4_client_setcb(cp_confirmed, &args->callback,
8140                             args->callback_ident);
8141 
8142                         /* everything okay -- move ahead */
8143                         *cs->statusp = res->status = NFS4_OK;
8144                         res->SETCLIENTID4res_u.resok4.clientid =
8145                             cp_confirmed->rc_clientid;
8146 
8147                         /* update the confirm_verifier and return it */
8148                         rfs4_client_scv_next(cp_confirmed);
8149                         res->SETCLIENTID4res_u.resok4.setclientid_confirm =
8150                             cp_confirmed->rc_confirm_verf;
8151 
8152                         rfs4_client_rele(cp_confirmed);
8153                         goto out;
8154                 }
8155 
8156                 /*
8157                  * Creds match but the verifier doesn't.  Must search
8158                  * for an unconfirmed client that would be replaced by
8159                  * this request.
8160                  */
8161                 create = FALSE;
8162                 cp_unconfirmed = rfs4_findclient(&args->client, &create,
8163                     cp_confirmed);
8164         }
8165 
8166         /*
8167          * At this point, we have taken care of the brand new client
8168          * struct, INUSE case, update of an existing, and confirmed
8169          * client struct.
8170          */
8171 
8172         /*
8173          * check to see if things have changed while we originally
8174          * picked up the client struct.  If they have, then return and
8175          * retry the processing of this SETCLIENTID request.
8176          */
8177         if (cp_unconfirmed) {
8178                 rfs4_dbe_lock(cp_unconfirmed->rc_dbe);
8179                 if (!cp_unconfirmed->rc_need_confirm) {
8180                         rfs4_dbe_unlock(cp_unconfirmed->rc_dbe);
8181                         rfs4_client_rele(cp_unconfirmed);
8182                         if (cp_confirmed)
8183                                 rfs4_client_rele(cp_confirmed);
8184                         goto retry;
8185                 }
8186                 /* do away with the old unconfirmed one */
8187                 rfs4_dbe_invalidate(cp_unconfirmed->rc_dbe);
8188                 rfs4_dbe_unlock(cp_unconfirmed->rc_dbe);
8189                 rfs4_client_rele(cp_unconfirmed);
8190                 cp_unconfirmed = NULL;
8191         }
8192 
8193         /*
8194          * This search will temporarily hide the confirmed client
8195          * struct while a new client struct is created as the
8196          * unconfirmed one.
8197          */
8198         create = TRUE;
8199         newcp = rfs4_findclient(&args->client, &create, cp_confirmed);
8200 
8201         ASSERT(newcp != NULL);
8202 
8203         if (newcp == NULL) {
8204                 *cs->statusp = res->status = NFS4ERR_SERVERFAULT;
8205                 rfs4_client_rele(cp_confirmed);
8206                 goto out;
8207         }
8208 
8209         /*
8210          * If one was not created, then a similar request must be in
8211          * process so release and start over with this one
8212          */
8213         if (create != TRUE) {
8214                 rfs4_client_rele(newcp);
8215                 if (cp_confirmed)
8216                         rfs4_client_rele(cp_confirmed);
8217                 goto retry;
8218         }
8219 
8220         *cs->statusp = res->status = NFS4_OK;
8221         res->SETCLIENTID4res_u.resok4.clientid = newcp->rc_clientid;
8222         res->SETCLIENTID4res_u.resok4.setclientid_confirm =
8223             newcp->rc_confirm_verf;
8224         /* Setup callback information; CB_NULL confirmation later */
8225         rfs4_client_setcb(newcp, &args->callback, args->callback_ident);
8226 
8227         newcp->rc_cp_confirmed = cp_confirmed;
8228 
8229         rfs4_client_rele(newcp);
8230 
8231 out:
8232         DTRACE_NFSV4_2(op__setclientid__done, struct compound_state *, cs,
8233             SETCLIENTID4res *, res);
8234 }
8235 
8236 /*ARGSUSED*/
8237 void
8238 rfs4_op_setclientid_confirm(nfs_argop4 *argop, nfs_resop4 *resop,
8239     struct svc_req *req, struct compound_state *cs)
8240 {
8241         SETCLIENTID_CONFIRM4args *args =
8242             &argop->nfs_argop4_u.opsetclientid_confirm;
8243         SETCLIENTID_CONFIRM4res *res =
8244             &resop->nfs_resop4_u.opsetclientid_confirm;
8245         rfs4_client_t *cp, *cptoclose = NULL;
8246         nfs4_srv_t *nsrv4;
8247 
8248         DTRACE_NFSV4_2(op__setclientid__confirm__start,
8249             struct compound_state *, cs,
8250             SETCLIENTID_CONFIRM4args *, args);
8251 
8252         nsrv4 = nfs4_get_srv();
8253         *cs->statusp = res->status = NFS4_OK;
8254 
8255         cp = rfs4_findclient_by_id(args->clientid, TRUE);
8256 
8257         if (cp == NULL) {
8258                 *cs->statusp = res->status =
8259                     rfs4_check_clientid(&args->clientid, 1);
8260                 goto out;
8261         }
8262 
8263         if (!creds_ok(cp, req, cs)) {
8264                 *cs->statusp = res->status = NFS4ERR_CLID_INUSE;
8265                 rfs4_client_rele(cp);
8266                 goto out;
8267         }
8268 
8269         /* If the verifier doesn't match, the record doesn't match */
8270         if (cp->rc_confirm_verf != args->setclientid_confirm) {
8271                 *cs->statusp = res->status = NFS4ERR_STALE_CLIENTID;
8272                 rfs4_client_rele(cp);
8273                 goto out;
8274         }
8275 
8276         rfs4_dbe_lock(cp->rc_dbe);
8277         cp->rc_need_confirm = FALSE;
8278         if (cp->rc_cp_confirmed) {
8279                 cptoclose = cp->rc_cp_confirmed;
8280                 cptoclose->rc_ss_remove = 1;
8281                 cp->rc_cp_confirmed = NULL;
8282         }
8283 
8284         /*
8285          * Update the client's associated server instance, if it's changed
8286          * since the client was created.
8287          */
8288         if (rfs4_servinst(cp) != nsrv4->nfs4_cur_servinst)
8289                 rfs4_servinst_assign(nsrv4, cp, nsrv4->nfs4_cur_servinst);
8290 
8291         /*
8292          * Record clientid in stable storage.
8293          * Must be done after server instance has been assigned.
8294          */
8295         rfs4_ss_clid(nsrv4, cp);
8296 
8297         rfs4_dbe_unlock(cp->rc_dbe);
8298 
8299         if (cptoclose)
8300                 /* don't need to rele, client_close does it */
8301                 rfs4_client_close(cptoclose);
8302 
8303         /* If needed, initiate CB_NULL call for callback path */
8304         rfs4_deleg_cb_check(cp);
8305         rfs4_update_lease(cp);
8306 
8307         /*
8308          * Check to see if client can perform reclaims
8309          */
8310         rfs4_ss_chkclid(nsrv4, cp);
8311 
8312         rfs4_client_rele(cp);
8313 
8314 out:
8315         DTRACE_NFSV4_2(op__setclientid__confirm__done,
8316             struct compound_state *, cs,
8317             SETCLIENTID_CONFIRM4 *, res);
8318 }
8319 
8320 
8321 /*ARGSUSED*/
8322 void
8323 rfs4_op_close(nfs_argop4 *argop, nfs_resop4 *resop,
8324     struct svc_req *req, struct compound_state *cs)
8325 {
8326         CLOSE4args *args = &argop->nfs_argop4_u.opclose;
8327         CLOSE4res *resp = &resop->nfs_resop4_u.opclose;
8328         rfs4_state_t *sp;
8329         nfsstat4 status;
8330 
8331         DTRACE_NFSV4_2(op__close__start, struct compound_state *, cs,
8332             CLOSE4args *, args);
8333 
8334         if (cs->vp == NULL) {
8335                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
8336                 goto out;
8337         }
8338 
8339         status = rfs4_get_state(&args->open_stateid, &sp, RFS4_DBS_INVALID);
8340         if (status != NFS4_OK) {
8341                 *cs->statusp = resp->status = status;
8342                 goto out;
8343         }
8344 
8345         /* Ensure specified filehandle matches */
8346         if (cs->vp != sp->rs_finfo->rf_vp) {
8347                 rfs4_state_rele(sp);
8348                 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8349                 goto out;
8350         }
8351 
8352         /* hold off other access to open_owner while we tinker */
8353         rfs4_sw_enter(&sp->rs_owner->ro_sw);
8354 
8355         switch (rfs4_check_stateid_seqid(sp, &args->open_stateid)) {
8356         case NFS4_CHECK_STATEID_OKAY:
8357                 if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
8358                     resop) != NFS4_CHKSEQ_OKAY) {
8359                         *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8360                         goto end;
8361                 }
8362                 break;
8363         case NFS4_CHECK_STATEID_OLD:
8364                 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8365                 goto end;
8366         case NFS4_CHECK_STATEID_BAD:
8367                 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8368                 goto end;
8369         case NFS4_CHECK_STATEID_EXPIRED:
8370                 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
8371                 goto end;
8372         case NFS4_CHECK_STATEID_CLOSED:
8373                 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8374                 goto end;
8375         case NFS4_CHECK_STATEID_UNCONFIRMED:
8376                 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8377                 goto end;
8378         case NFS4_CHECK_STATEID_REPLAY:
8379                 /* Check the sequence id for the open owner */
8380                 switch (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
8381                     resop)) {
8382                 case NFS4_CHKSEQ_OKAY:
8383                         /*
8384                          * This is replayed stateid; if seqid matches
8385                          * next expected, then client is using wrong seqid.
8386                          */
8387                         /* FALL THROUGH */
8388                 case NFS4_CHKSEQ_BAD:
8389                         *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8390                         goto end;
8391                 case NFS4_CHKSEQ_REPLAY:
8392                         /*
8393                          * Note this case is the duplicate case so
8394                          * resp->status is already set.
8395                          */
8396                         *cs->statusp = resp->status;
8397                         rfs4_update_lease(sp->rs_owner->ro_client);
8398                         goto end;
8399                 }
8400                 break;
8401         default:
8402                 ASSERT(FALSE);
8403                 break;
8404         }
8405 
8406         rfs4_dbe_lock(sp->rs_dbe);
8407 
8408         /* Update the stateid. */
8409         next_stateid(&sp->rs_stateid);
8410         resp->open_stateid = sp->rs_stateid.stateid;
8411 
8412         rfs4_dbe_unlock(sp->rs_dbe);
8413 
8414         rfs4_update_lease(sp->rs_owner->ro_client);
8415         rfs4_update_open_sequence(sp->rs_owner);
8416         rfs4_update_open_resp(sp->rs_owner, resop, NULL);
8417 
8418         rfs4_state_close(sp, FALSE, FALSE, cs->cr);
8419 
8420         *cs->statusp = resp->status = status;
8421 
8422 end:
8423         rfs4_sw_exit(&sp->rs_owner->ro_sw);
8424         rfs4_state_rele(sp);
8425 out:
8426         DTRACE_NFSV4_2(op__close__done, struct compound_state *, cs,
8427             CLOSE4res *, resp);
8428 }
8429 
8430 /*
8431  * Manage the counts on the file struct and close all file locks
8432  */
8433 /*ARGSUSED*/
8434 void
8435 rfs4_release_share_lock_state(rfs4_state_t *sp, cred_t *cr,
8436     bool_t close_of_client)
8437 {
8438         rfs4_file_t *fp = sp->rs_finfo;
8439         rfs4_lo_state_t *lsp;
8440         int fflags = 0;
8441 
8442         /*
8443          * If this call is part of the larger closing down of client
8444          * state then it is just easier to release all locks
8445          * associated with this client instead of going through each
8446          * individual file and cleaning locks there.
8447          */
8448         if (close_of_client) {
8449                 if (sp->rs_owner->ro_client->rc_unlksys_completed == FALSE &&
8450                     !list_is_empty(&sp->rs_lostatelist) &&
8451                     sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID) {
8452                         /* Is the PxFS kernel module loaded? */
8453                         if (lm_remove_file_locks != NULL) {
8454                                 int new_sysid;
8455 
8456                                 /* Encode the cluster nodeid in new sysid */
8457                                 new_sysid = sp->rs_owner->ro_client->rc_sysidt;
8458                                 lm_set_nlmid_flk(&new_sysid);
8459 
8460                                 /*
8461                                  * This PxFS routine removes file locks for a
8462                                  * client over all nodes of a cluster.
8463                                  */
8464                                 NFS4_DEBUG(rfs4_debug, (CE_NOTE,
8465                                     "lm_remove_file_locks(sysid=0x%x)\n",
8466                                     new_sysid));
8467                                 (*lm_remove_file_locks)(new_sysid);
8468                         } else {
8469                                 struct flock64 flk;
8470 
8471                                 /* Release all locks for this client */
8472                                 flk.l_type = F_UNLKSYS;
8473                                 flk.l_whence = 0;
8474                                 flk.l_start = 0;
8475                                 flk.l_len = 0;
8476                                 flk.l_sysid =
8477                                     sp->rs_owner->ro_client->rc_sysidt;
8478                                 flk.l_pid = 0;
8479                                 (void) VOP_FRLOCK(sp->rs_finfo->rf_vp, F_SETLK,
8480                                     &flk, F_REMOTELOCK | FREAD | FWRITE,
8481                                     (u_offset_t)0, NULL, CRED(), NULL);
8482                         }
8483 
8484                         sp->rs_owner->ro_client->rc_unlksys_completed = TRUE;
8485                 }
8486         }
8487 
8488         /*
8489          * Release all locks on this file by this lock owner or at
8490          * least mark the locks as having been released
8491          */
8492         for (lsp = list_head(&sp->rs_lostatelist); lsp != NULL;
8493             lsp = list_next(&sp->rs_lostatelist, lsp)) {
8494                 lsp->rls_locks_cleaned = TRUE;
8495 
8496                 /* Was this already taken care of above? */
8497                 if (!close_of_client &&
8498                     sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID)
8499                         (void) cleanlocks(sp->rs_finfo->rf_vp,
8500                             lsp->rls_locker->rl_pid,
8501                             lsp->rls_locker->rl_client->rc_sysidt);
8502         }
8503 
8504         /*
8505          * Release any shrlocks associated with this open state ID.
8506          * This must be done before the rfs4_state gets marked closed.
8507          */
8508         if (sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID)
8509                 (void) rfs4_unshare(sp);
8510 
8511         if (sp->rs_open_access) {
8512                 rfs4_dbe_lock(fp->rf_dbe);
8513 
8514                 /*
8515                  * Decrement the count for each access and deny bit that this
8516                  * state has contributed to the file.
8517                  * If the file counts go to zero
8518                  * clear the appropriate bit in the appropriate mask.
8519                  */
8520                 if (sp->rs_open_access & OPEN4_SHARE_ACCESS_READ) {
8521                         fp->rf_access_read--;
8522                         fflags |= FREAD;
8523                         if (fp->rf_access_read == 0)
8524                                 fp->rf_share_access &= ~OPEN4_SHARE_ACCESS_READ;
8525                 }
8526                 if (sp->rs_open_access & OPEN4_SHARE_ACCESS_WRITE) {
8527                         fp->rf_access_write--;
8528                         fflags |= FWRITE;
8529                         if (fp->rf_access_write == 0)
8530                                 fp->rf_share_access &=
8531                                     ~OPEN4_SHARE_ACCESS_WRITE;
8532                 }
8533                 if (sp->rs_open_deny & OPEN4_SHARE_DENY_READ) {
8534                         fp->rf_deny_read--;
8535                         if (fp->rf_deny_read == 0)
8536                                 fp->rf_share_deny &= ~OPEN4_SHARE_DENY_READ;
8537                 }
8538                 if (sp->rs_open_deny & OPEN4_SHARE_DENY_WRITE) {
8539                         fp->rf_deny_write--;
8540                         if (fp->rf_deny_write == 0)
8541                                 fp->rf_share_deny &= ~OPEN4_SHARE_DENY_WRITE;
8542                 }
8543 
8544                 (void) VOP_CLOSE(fp->rf_vp, fflags, 1, (offset_t)0, cr, NULL);
8545 
8546                 rfs4_dbe_unlock(fp->rf_dbe);
8547 
8548                 sp->rs_open_access = 0;
8549                 sp->rs_open_deny = 0;
8550         }
8551 }
8552 
8553 /*
8554  * lock_denied: Fill in a LOCK4deneid structure given an flock64 structure.
8555  */
8556 static nfsstat4
8557 lock_denied(LOCK4denied *dp, struct flock64 *flk)
8558 {
8559         rfs4_lockowner_t *lo;
8560         rfs4_client_t *cp;
8561         uint32_t len;
8562 
8563         lo = rfs4_findlockowner_by_pid(flk->l_pid);
8564         if (lo != NULL) {
8565                 cp = lo->rl_client;
8566                 if (rfs4_lease_expired(cp)) {
8567                         rfs4_lockowner_rele(lo);
8568                         rfs4_dbe_hold(cp->rc_dbe);
8569                         rfs4_client_close(cp);
8570                         return (NFS4ERR_EXPIRED);
8571                 }
8572                 dp->owner.clientid = lo->rl_owner.clientid;
8573                 len = lo->rl_owner.owner_len;
8574                 dp->owner.owner_val = kmem_alloc(len, KM_SLEEP);
8575                 bcopy(lo->rl_owner.owner_val, dp->owner.owner_val, len);
8576                 dp->owner.owner_len = len;
8577                 rfs4_lockowner_rele(lo);
8578                 goto finish;
8579         }
8580 
8581         /*
8582          * Its not a NFS4 lock. We take advantage that the upper 32 bits
8583          * of the client id contain the boot time for a NFS4 lock. So we
8584          * fabricate and identity by setting clientid to the sysid, and
8585          * the lock owner to the pid.
8586          */
8587         dp->owner.clientid = flk->l_sysid;
8588         len = sizeof (pid_t);
8589         dp->owner.owner_len = len;
8590         dp->owner.owner_val = kmem_alloc(len, KM_SLEEP);
8591         bcopy(&flk->l_pid, dp->owner.owner_val, len);
8592 finish:
8593         dp->offset = flk->l_start;
8594         dp->length = flk->l_len;
8595 
8596         if (flk->l_type == F_RDLCK)
8597                 dp->locktype = READ_LT;
8598         else if (flk->l_type == F_WRLCK)
8599                 dp->locktype = WRITE_LT;
8600         else
8601                 return (NFS4ERR_INVAL); /* no mapping from POSIX ltype to v4 */
8602 
8603         return (NFS4_OK);
8604 }
8605 
8606 /*
8607  * The NFSv4.0 LOCK operation does not support the blocking lock (at the
8608  * NFSv4.0 protocol level) so the client needs to resend the LOCK request in a
8609  * case the lock is denied by the NFSv4.0 server.  NFSv4.0 clients are prepared
8610  * for that (obviously); they are sending the LOCK requests with some delays
8611  * between the attempts.  See nfs4frlock() and nfs4_block_and_wait() for the
8612  * locking and delay implementation at the client side.
8613  *
8614  * To make the life of the clients easier, the NFSv4.0 server tries to do some
8615  * fast retries on its own (the for loop below) in a hope the lock will be
8616  * available soon.  And if not, the client won't need to resend the LOCK
8617  * requests so fast to check the lock availability.  This basically saves some
8618  * network traffic and tries to make sure the client gets the lock ASAP.
8619  */
8620 static int
8621 setlock(vnode_t *vp, struct flock64 *flock, int flag, cred_t *cred)
8622 {
8623         int error;
8624         struct flock64 flk;
8625         int i;
8626         clock_t delaytime;
8627         int cmd;
8628         int spin_cnt = 0;
8629 
8630         cmd = nbl_need_check(vp) ? F_SETLK_NBMAND : F_SETLK;
8631 retry:
8632         delaytime = MSEC_TO_TICK_ROUNDUP(rfs4_lock_delay);
8633 
8634         for (i = 0; i < rfs4_maxlock_tries; i++) {
8635                 LOCK_PRINT(rfs4_debug, "setlock", cmd, flock);
8636                 error = VOP_FRLOCK(vp, cmd,
8637                     flock, flag, (u_offset_t)0, NULL, cred, NULL);
8638 
8639                 if (error != EAGAIN && error != EACCES)
8640                         break;
8641 
8642                 if (i < rfs4_maxlock_tries - 1) {
8643                         delay(delaytime);
8644                         delaytime *= 2;
8645                 }
8646         }
8647 
8648         if (error == EAGAIN || error == EACCES) {
8649                 /* Get the owner of the lock */
8650                 flk = *flock;
8651                 LOCK_PRINT(rfs4_debug, "setlock", F_GETLK, &flk);
8652                 if (VOP_FRLOCK(vp, F_GETLK, &flk, flag, 0, NULL, cred,
8653                     NULL) == 0) {
8654                         /*
8655                          * There's a race inherent in the current VOP_FRLOCK
8656                          * design where:
8657                          * a: "other guy" takes a lock that conflicts with a
8658                          * lock we want
8659                          * b: we attempt to take our lock (non-blocking) and
8660                          * the attempt fails.
8661                          * c: "other guy" releases the conflicting lock
8662                          * d: we ask what lock conflicts with the lock we want,
8663                          * getting F_UNLCK (no lock blocks us)
8664                          *
8665                          * If we retry the non-blocking lock attempt in this
8666                          * case (restart at step 'b') there's some possibility
8667                          * that many such attempts might fail.  However a test
8668                          * designed to actually provoke this race shows that
8669                          * the vast majority of cases require no retry, and
8670                          * only a few took as many as three retries.  Here's
8671                          * the test outcome:
8672                          *
8673                          *         number of retries    how many times we needed
8674                          *                              that many retries
8675                          *         0                    79461
8676                          *         1                      862
8677                          *         2                       49
8678                          *         3                        5
8679                          *
8680                          * Given those empirical results, we arbitrarily limit
8681                          * the retry count to ten.
8682                          *
8683                          * If we actually make to ten retries and give up,
8684                          * nothing catastrophic happens, but we're unable to
8685                          * return the information about the conflicting lock to
8686                          * the NFS client.  That's an acceptable trade off vs.
8687                          * letting this retry loop run forever.
8688                          */
8689                         if (flk.l_type == F_UNLCK) {
8690                                 if (spin_cnt++ < 10) {
8691                                         /* No longer locked, retry */
8692                                         goto retry;
8693                                 }
8694                         } else {
8695                                 *flock = flk;
8696                                 LOCK_PRINT(rfs4_debug, "setlock(blocking lock)",
8697                                     F_GETLK, &flk);
8698                         }
8699                 }
8700         }
8701 
8702         return (error);
8703 }
8704 
8705 /*ARGSUSED*/
8706 static nfsstat4
8707 rfs4_do_lock(rfs4_lo_state_t *lsp, nfs_lock_type4 locktype,
8708     offset4 offset, length4 length, cred_t *cred, nfs_resop4 *resop)
8709 {
8710         nfsstat4 status;
8711         rfs4_lockowner_t *lo = lsp->rls_locker;
8712         rfs4_state_t *sp = lsp->rls_state;
8713         struct flock64 flock;
8714         int16_t ltype;
8715         int flag;
8716         int error;
8717         sysid_t sysid;
8718         LOCK4res *lres;
8719         vnode_t *vp;
8720 
8721         if (rfs4_lease_expired(lo->rl_client)) {
8722                 return (NFS4ERR_EXPIRED);
8723         }
8724 
8725         if ((status = rfs4_client_sysid(lo->rl_client, &sysid)) != NFS4_OK)
8726                 return (status);
8727 
8728         /* Check for zero length. To lock to end of file use all ones for V4 */
8729         if (length == 0)
8730                 return (NFS4ERR_INVAL);
8731         else if (length == (length4)(~0))
8732                 length = 0;             /* Posix to end of file  */
8733 
8734 retry:
8735         rfs4_dbe_lock(sp->rs_dbe);
8736         if (sp->rs_closed == TRUE) {
8737                 rfs4_dbe_unlock(sp->rs_dbe);
8738                 return (NFS4ERR_OLD_STATEID);
8739         }
8740 
8741         if (resop->resop != OP_LOCKU) {
8742                 switch (locktype) {
8743                 case READ_LT:
8744                 case READW_LT:
8745                         if ((sp->rs_share_access
8746                             & OPEN4_SHARE_ACCESS_READ) == 0) {
8747                                 rfs4_dbe_unlock(sp->rs_dbe);
8748 
8749                                 return (NFS4ERR_OPENMODE);
8750                         }
8751                         ltype = F_RDLCK;
8752                         break;
8753                 case WRITE_LT:
8754                 case WRITEW_LT:
8755                         if ((sp->rs_share_access
8756                             & OPEN4_SHARE_ACCESS_WRITE) == 0) {
8757                                 rfs4_dbe_unlock(sp->rs_dbe);
8758 
8759                                 return (NFS4ERR_OPENMODE);
8760                         }
8761                         ltype = F_WRLCK;
8762                         break;
8763                 }
8764         } else
8765                 ltype = F_UNLCK;
8766 
8767         flock.l_type = ltype;
8768         flock.l_whence = 0;             /* SEEK_SET */
8769         flock.l_start = offset;
8770         flock.l_len = length;
8771         flock.l_sysid = sysid;
8772         flock.l_pid = lsp->rls_locker->rl_pid;
8773 
8774         /* Note that length4 is uint64_t but l_len and l_start are off64_t */
8775         if (flock.l_len < 0 || flock.l_start < 0) {
8776                 rfs4_dbe_unlock(sp->rs_dbe);
8777                 return (NFS4ERR_INVAL);
8778         }
8779 
8780         /*
8781          * N.B. FREAD has the same value as OPEN4_SHARE_ACCESS_READ and
8782          * FWRITE has the same value as OPEN4_SHARE_ACCESS_WRITE.
8783          */
8784         flag = (int)sp->rs_share_access | F_REMOTELOCK;
8785 
8786         vp = sp->rs_finfo->rf_vp;
8787         VN_HOLD(vp);
8788 
8789         /*
8790          * We need to unlock sp before we call the underlying filesystem to
8791          * acquire the file lock.
8792          */
8793         rfs4_dbe_unlock(sp->rs_dbe);
8794 
8795         error = setlock(vp, &flock, flag, cred);
8796 
8797         /*
8798          * Make sure the file is still open.  In a case the file was closed in
8799          * the meantime, clean the lock we acquired using the setlock() call
8800          * above, and return the appropriate error.
8801          */
8802         rfs4_dbe_lock(sp->rs_dbe);
8803         if (sp->rs_closed == TRUE) {
8804                 cleanlocks(vp, lsp->rls_locker->rl_pid, sysid);
8805                 rfs4_dbe_unlock(sp->rs_dbe);
8806 
8807                 VN_RELE(vp);
8808 
8809                 return (NFS4ERR_OLD_STATEID);
8810         }
8811         rfs4_dbe_unlock(sp->rs_dbe);
8812 
8813         VN_RELE(vp);
8814 
8815         if (error == 0) {
8816                 rfs4_dbe_lock(lsp->rls_dbe);
8817                 next_stateid(&lsp->rls_lockid);
8818                 rfs4_dbe_unlock(lsp->rls_dbe);
8819         }
8820 
8821         /*
8822          * N.B. We map error values to nfsv4 errors. This is differrent
8823          * than puterrno4 routine.
8824          */
8825         switch (error) {
8826         case 0:
8827                 status = NFS4_OK;
8828                 break;
8829         case EAGAIN:
8830         case EACCES:            /* Old value */
8831                 /* Can only get here if op is OP_LOCK */
8832                 ASSERT(resop->resop == OP_LOCK);
8833                 lres = &resop->nfs_resop4_u.oplock;
8834                 status = NFS4ERR_DENIED;
8835                 if (lock_denied(&lres->LOCK4res_u.denied, &flock)
8836                     == NFS4ERR_EXPIRED)
8837                         goto retry;
8838                 break;
8839         case ENOLCK:
8840                 status = NFS4ERR_DELAY;
8841                 break;
8842         case EOVERFLOW:
8843                 status = NFS4ERR_INVAL;
8844                 break;
8845         case EINVAL:
8846                 status = NFS4ERR_NOTSUPP;
8847                 break;
8848         default:
8849                 status = NFS4ERR_SERVERFAULT;
8850                 break;
8851         }
8852 
8853         return (status);
8854 }
8855 
8856 /*ARGSUSED*/
8857 void
8858 rfs4_op_lock(nfs_argop4 *argop, nfs_resop4 *resop,
8859     struct svc_req *req, struct compound_state *cs)
8860 {
8861         LOCK4args *args = &argop->nfs_argop4_u.oplock;
8862         LOCK4res *resp = &resop->nfs_resop4_u.oplock;
8863         nfsstat4 status;
8864         stateid4 *stateid;
8865         rfs4_lockowner_t *lo;
8866         rfs4_client_t *cp;
8867         rfs4_state_t *sp = NULL;
8868         rfs4_lo_state_t *lsp = NULL;
8869         bool_t ls_sw_held = FALSE;
8870         bool_t create = TRUE;
8871         bool_t lcreate = TRUE;
8872         bool_t dup_lock = FALSE;
8873         int rc;
8874 
8875         DTRACE_NFSV4_2(op__lock__start, struct compound_state *, cs,
8876             LOCK4args *, args);
8877 
8878         if (cs->vp == NULL) {
8879                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
8880                 DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
8881                     cs, LOCK4res *, resp);
8882                 return;
8883         }
8884 
8885         if (args->locker.new_lock_owner) {
8886                 /* Create a new lockowner for this instance */
8887                 open_to_lock_owner4 *olo = &args->locker.locker4_u.open_owner;
8888 
8889                 NFS4_DEBUG(rfs4_debug, (CE_NOTE, "Creating new lock owner"));
8890 
8891                 stateid = &olo->open_stateid;
8892                 status = rfs4_get_state(stateid, &sp, RFS4_DBS_VALID);
8893                 if (status != NFS4_OK) {
8894                         NFS4_DEBUG(rfs4_debug,
8895                             (CE_NOTE, "Get state failed in lock %d", status));
8896                         *cs->statusp = resp->status = status;
8897                         DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
8898                             cs, LOCK4res *, resp);
8899                         return;
8900                 }
8901 
8902                 /* Ensure specified filehandle matches */
8903                 if (cs->vp != sp->rs_finfo->rf_vp) {
8904                         rfs4_state_rele(sp);
8905                         *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8906                         DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
8907                             cs, LOCK4res *, resp);
8908                         return;
8909                 }
8910 
8911                 /* hold off other access to open_owner while we tinker */
8912                 rfs4_sw_enter(&sp->rs_owner->ro_sw);
8913 
8914                 switch (rc = rfs4_check_stateid_seqid(sp, stateid)) {
8915                 case NFS4_CHECK_STATEID_OLD:
8916                         *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8917                         goto end;
8918                 case NFS4_CHECK_STATEID_BAD:
8919                         *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8920                         goto end;
8921                 case NFS4_CHECK_STATEID_EXPIRED:
8922                         *cs->statusp = resp->status = NFS4ERR_EXPIRED;
8923                         goto end;
8924                 case NFS4_CHECK_STATEID_UNCONFIRMED:
8925                         *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8926                         goto end;
8927                 case NFS4_CHECK_STATEID_CLOSED:
8928                         *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8929                         goto end;
8930                 case NFS4_CHECK_STATEID_OKAY:
8931                 case NFS4_CHECK_STATEID_REPLAY:
8932                         switch (rfs4_check_olo_seqid(olo->open_seqid,
8933                             sp->rs_owner, resop)) {
8934                         case NFS4_CHKSEQ_OKAY:
8935                                 if (rc == NFS4_CHECK_STATEID_OKAY)
8936                                         break;
8937                                 /*
8938                                  * This is replayed stateid; if seqid
8939                                  * matches next expected, then client
8940                                  * is using wrong seqid.
8941                                  */
8942                                 /* FALLTHROUGH */
8943                         case NFS4_CHKSEQ_BAD:
8944                                 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8945                                 goto end;
8946                         case NFS4_CHKSEQ_REPLAY:
8947                                 /* This is a duplicate LOCK request */
8948                                 dup_lock = TRUE;
8949 
8950                                 /*
8951                                  * For a duplicate we do not want to
8952                                  * create a new lockowner as it should
8953                                  * already exist.
8954                                  * Turn off the lockowner create flag.
8955                                  */
8956                                 lcreate = FALSE;
8957                         }
8958                         break;
8959                 }
8960 
8961                 lo = rfs4_findlockowner(&olo->lock_owner, &lcreate);
8962                 if (lo == NULL) {
8963                         NFS4_DEBUG(rfs4_debug,
8964                             (CE_NOTE, "rfs4_op_lock: no lock owner"));
8965                         *cs->statusp = resp->status = NFS4ERR_RESOURCE;
8966                         goto end;
8967                 }
8968 
8969                 lsp = rfs4_findlo_state_by_owner(lo, sp, &create);
8970                 if (lsp == NULL) {
8971                         rfs4_update_lease(sp->rs_owner->ro_client);
8972                         /*
8973                          * Only update theh open_seqid if this is not
8974                          * a duplicate request
8975                          */
8976                         if (dup_lock == FALSE) {
8977                                 rfs4_update_open_sequence(sp->rs_owner);
8978                         }
8979 
8980                         NFS4_DEBUG(rfs4_debug,
8981                             (CE_NOTE, "rfs4_op_lock: no state"));
8982                         *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
8983                         rfs4_update_open_resp(sp->rs_owner, resop, NULL);
8984                         rfs4_lockowner_rele(lo);
8985                         goto end;
8986                 }
8987 
8988                 /*
8989                  * This is the new_lock_owner branch and the client is
8990                  * supposed to be associating a new lock_owner with
8991                  * the open file at this point.  If we find that a
8992                  * lock_owner/state association already exists and a
8993                  * successful LOCK request was returned to the client,
8994                  * an error is returned to the client since this is
8995                  * not appropriate.  The client should be using the
8996                  * existing lock_owner branch.
8997                  */
8998                 if (dup_lock == FALSE && create == FALSE) {
8999                         if (lsp->rls_lock_completed == TRUE) {
9000                                 *cs->statusp =
9001                                     resp->status = NFS4ERR_BAD_SEQID;
9002                                 rfs4_lockowner_rele(lo);
9003                                 goto end;
9004                         }
9005                 }
9006 
9007                 rfs4_update_lease(sp->rs_owner->ro_client);
9008 
9009                 /*
9010                  * Only update theh open_seqid if this is not
9011                  * a duplicate request
9012                  */
9013                 if (dup_lock == FALSE) {
9014                         rfs4_update_open_sequence(sp->rs_owner);
9015                 }
9016 
9017                 /*
9018                  * If this is a duplicate lock request, just copy the
9019                  * previously saved reply and return.
9020                  */
9021                 if (dup_lock == TRUE) {
9022                         /* verify that lock_seqid's match */
9023                         if (lsp->rls_seqid != olo->lock_seqid) {
9024                                 NFS4_DEBUG(rfs4_debug,
9025                                     (CE_NOTE, "rfs4_op_lock: Dup-Lock seqid bad"
9026                                     "lsp->seqid=%d old->seqid=%d",
9027                                     lsp->rls_seqid, olo->lock_seqid));
9028                                 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9029                         } else {
9030                                 rfs4_copy_reply(resop, &lsp->rls_reply);
9031                                 /*
9032                                  * Make sure to copy the just
9033                                  * retrieved reply status into the
9034                                  * overall compound status
9035                                  */
9036                                 *cs->statusp = resp->status;
9037                         }
9038                         rfs4_lockowner_rele(lo);
9039                         goto end;
9040                 }
9041 
9042                 rfs4_dbe_lock(lsp->rls_dbe);
9043 
9044                 /* Make sure to update the lock sequence id */
9045                 lsp->rls_seqid = olo->lock_seqid;
9046 
9047                 NFS4_DEBUG(rfs4_debug,
9048                     (CE_NOTE, "Lock seqid established as %d", lsp->rls_seqid));
9049 
9050                 /*
9051                  * This is used to signify the newly created lockowner
9052                  * stateid and its sequence number.  The checks for
9053                  * sequence number and increment don't occur on the
9054                  * very first lock request for a lockowner.
9055                  */
9056                 lsp->rls_skip_seqid_check = TRUE;
9057 
9058                 /* hold off other access to lsp while we tinker */
9059                 rfs4_sw_enter(&lsp->rls_sw);
9060                 ls_sw_held = TRUE;
9061 
9062                 rfs4_dbe_unlock(lsp->rls_dbe);
9063 
9064                 rfs4_lockowner_rele(lo);
9065         } else {
9066                 stateid = &args->locker.locker4_u.lock_owner.lock_stateid;
9067                 /* get lsp and hold the lock on the underlying file struct */
9068                 if ((status = rfs4_get_lo_state(stateid, &lsp, TRUE))
9069                     != NFS4_OK) {
9070                         *cs->statusp = resp->status = status;
9071                         DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
9072                             cs, LOCK4res *, resp);
9073                         return;
9074                 }
9075                 create = FALSE; /* We didn't create lsp */
9076 
9077                 /* Ensure specified filehandle matches */
9078                 if (cs->vp != lsp->rls_state->rs_finfo->rf_vp) {
9079                         rfs4_lo_state_rele(lsp, TRUE);
9080                         *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9081                         DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
9082                             cs, LOCK4res *, resp);
9083                         return;
9084                 }
9085 
9086                 /* hold off other access to lsp while we tinker */
9087                 rfs4_sw_enter(&lsp->rls_sw);
9088                 ls_sw_held = TRUE;
9089 
9090                 switch (rfs4_check_lo_stateid_seqid(lsp, stateid)) {
9091                 /*
9092                  * The stateid looks like it was okay (expected to be
9093                  * the next one)
9094                  */
9095                 case NFS4_CHECK_STATEID_OKAY:
9096                         /*
9097                          * The sequence id is now checked.  Determine
9098                          * if this is a replay or if it is in the
9099                          * expected (next) sequence.  In the case of a
9100                          * replay, there are two replay conditions
9101                          * that may occur.  The first is the normal
9102                          * condition where a LOCK is done with a
9103                          * NFS4_OK response and the stateid is
9104                          * updated.  That case is handled below when
9105                          * the stateid is identified as a REPLAY.  The
9106                          * second is the case where an error is
9107                          * returned, like NFS4ERR_DENIED, and the
9108                          * sequence number is updated but the stateid
9109                          * is not updated.  This second case is dealt
9110                          * with here.  So it may seem odd that the
9111                          * stateid is okay but the sequence id is a
9112                          * replay but it is okay.
9113                          */
9114                         switch (rfs4_check_lock_seqid(
9115                             args->locker.locker4_u.lock_owner.lock_seqid,
9116                             lsp, resop)) {
9117                         case NFS4_CHKSEQ_REPLAY:
9118                                 if (resp->status != NFS4_OK) {
9119                                         /*
9120                                          * Here is our replay and need
9121                                          * to verify that the last
9122                                          * response was an error.
9123                                          */
9124                                         *cs->statusp = resp->status;
9125                                         goto end;
9126                                 }
9127                                 /*
9128                                  * This is done since the sequence id
9129                                  * looked like a replay but it didn't
9130                                  * pass our check so a BAD_SEQID is
9131                                  * returned as a result.
9132                                  */
9133                                 /*FALLTHROUGH*/
9134                         case NFS4_CHKSEQ_BAD:
9135                                 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9136                                 goto end;
9137                         case NFS4_CHKSEQ_OKAY:
9138                                 /* Everything looks okay move ahead */
9139                                 break;
9140                         }
9141                         break;
9142                 case NFS4_CHECK_STATEID_OLD:
9143                         *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9144                         goto end;
9145                 case NFS4_CHECK_STATEID_BAD:
9146                         *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9147                         goto end;
9148                 case NFS4_CHECK_STATEID_EXPIRED:
9149                         *cs->statusp = resp->status = NFS4ERR_EXPIRED;
9150                         goto end;
9151                 case NFS4_CHECK_STATEID_CLOSED:
9152                         *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9153                         goto end;
9154                 case NFS4_CHECK_STATEID_REPLAY:
9155                         switch (rfs4_check_lock_seqid(
9156                             args->locker.locker4_u.lock_owner.lock_seqid,
9157                             lsp, resop)) {
9158                         case NFS4_CHKSEQ_OKAY:
9159                                 /*
9160                                  * This is a replayed stateid; if
9161                                  * seqid matches the next expected,
9162                                  * then client is using wrong seqid.
9163                                  */
9164                         case NFS4_CHKSEQ_BAD:
9165                                 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9166                                 goto end;
9167                         case NFS4_CHKSEQ_REPLAY:
9168                                 rfs4_update_lease(lsp->rls_locker->rl_client);
9169                                 *cs->statusp = status = resp->status;
9170                                 goto end;
9171                         }
9172                         break;
9173                 default:
9174                         ASSERT(FALSE);
9175                         break;
9176                 }
9177 
9178                 rfs4_update_lock_sequence(lsp);
9179                 rfs4_update_lease(lsp->rls_locker->rl_client);
9180         }
9181 
9182         /*
9183          * NFS4 only allows locking on regular files, so
9184          * verify type of object.
9185          */
9186         if (cs->vp->v_type != VREG) {
9187                 if (cs->vp->v_type == VDIR)
9188                         status = NFS4ERR_ISDIR;
9189                 else
9190                         status = NFS4ERR_INVAL;
9191                 goto out;
9192         }
9193 
9194         cp = lsp->rls_state->rs_owner->ro_client;
9195 
9196         if (rfs4_clnt_in_grace(cp) && !args->reclaim) {
9197                 status = NFS4ERR_GRACE;
9198                 goto out;
9199         }
9200 
9201         if (rfs4_clnt_in_grace(cp) && args->reclaim && !cp->rc_can_reclaim) {
9202                 status = NFS4ERR_NO_GRACE;
9203                 goto out;
9204         }
9205 
9206         if (!rfs4_clnt_in_grace(cp) && args->reclaim) {
9207                 status = NFS4ERR_NO_GRACE;
9208                 goto out;
9209         }
9210 
9211         if (lsp->rls_state->rs_finfo->rf_dinfo.rd_dtype == OPEN_DELEGATE_WRITE)
9212                 cs->deleg = TRUE;
9213 
9214         status = rfs4_do_lock(lsp, args->locktype,
9215             args->offset, args->length, cs->cr, resop);
9216 
9217 out:
9218         lsp->rls_skip_seqid_check = FALSE;
9219 
9220         *cs->statusp = resp->status = status;
9221 
9222         if (status == NFS4_OK) {
9223                 resp->LOCK4res_u.lock_stateid = lsp->rls_lockid.stateid;
9224                 lsp->rls_lock_completed = TRUE;
9225         }
9226         /*
9227          * Only update the "OPEN" response here if this was a new
9228          * lock_owner
9229          */
9230         if (sp)
9231                 rfs4_update_open_resp(sp->rs_owner, resop, NULL);
9232 
9233         rfs4_update_lock_resp(lsp, resop);
9234 
9235 end:
9236         if (lsp) {
9237                 if (ls_sw_held)
9238                         rfs4_sw_exit(&lsp->rls_sw);
9239                 /*
9240                  * If an sp obtained, then the lsp does not represent
9241                  * a lock on the file struct.
9242                  */
9243                 if (sp != NULL)
9244                         rfs4_lo_state_rele(lsp, FALSE);
9245                 else
9246                         rfs4_lo_state_rele(lsp, TRUE);
9247         }
9248         if (sp) {
9249                 rfs4_sw_exit(&sp->rs_owner->ro_sw);
9250                 rfs4_state_rele(sp);
9251         }
9252 
9253         DTRACE_NFSV4_2(op__lock__done, struct compound_state *, cs,
9254             LOCK4res *, resp);
9255 }
9256 
9257 /* free function for LOCK/LOCKT */
9258 static void
9259 lock_denied_free(nfs_resop4 *resop)
9260 {
9261         LOCK4denied *dp = NULL;
9262 
9263         switch (resop->resop) {
9264         case OP_LOCK:
9265                 if (resop->nfs_resop4_u.oplock.status == NFS4ERR_DENIED)
9266                         dp = &resop->nfs_resop4_u.oplock.LOCK4res_u.denied;
9267                 break;
9268         case OP_LOCKT:
9269                 if (resop->nfs_resop4_u.oplockt.status == NFS4ERR_DENIED)
9270                         dp = &resop->nfs_resop4_u.oplockt.denied;
9271                 break;
9272         default:
9273                 break;
9274         }
9275 
9276         if (dp)
9277                 kmem_free(dp->owner.owner_val, dp->owner.owner_len);
9278 }
9279 
9280 /*ARGSUSED*/
9281 void
9282 rfs4_op_locku(nfs_argop4 *argop, nfs_resop4 *resop,
9283     struct svc_req *req, struct compound_state *cs)
9284 {
9285         LOCKU4args *args = &argop->nfs_argop4_u.oplocku;
9286         LOCKU4res *resp = &resop->nfs_resop4_u.oplocku;
9287         nfsstat4 status;
9288         stateid4 *stateid = &args->lock_stateid;
9289         rfs4_lo_state_t *lsp;
9290 
9291         DTRACE_NFSV4_2(op__locku__start, struct compound_state *, cs,
9292             LOCKU4args *, args);
9293 
9294         if (cs->vp == NULL) {
9295                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
9296                 DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
9297                     LOCKU4res *, resp);
9298                 return;
9299         }
9300 
9301         if ((status = rfs4_get_lo_state(stateid, &lsp, TRUE)) != NFS4_OK) {
9302                 *cs->statusp = resp->status = status;
9303                 DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
9304                     LOCKU4res *, resp);
9305                 return;
9306         }
9307 
9308         /* Ensure specified filehandle matches */
9309         if (cs->vp != lsp->rls_state->rs_finfo->rf_vp) {
9310                 rfs4_lo_state_rele(lsp, TRUE);
9311                 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9312                 DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
9313                     LOCKU4res *, resp);
9314                 return;
9315         }
9316 
9317         /* hold off other access to lsp while we tinker */
9318         rfs4_sw_enter(&lsp->rls_sw);
9319 
9320         switch (rfs4_check_lo_stateid_seqid(lsp, stateid)) {
9321         case NFS4_CHECK_STATEID_OKAY:
9322                 if (rfs4_check_lock_seqid(args->seqid, lsp, resop)
9323                     != NFS4_CHKSEQ_OKAY) {
9324                         *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9325                         goto end;
9326                 }
9327                 break;
9328         case NFS4_CHECK_STATEID_OLD:
9329                 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9330                 goto end;
9331         case NFS4_CHECK_STATEID_BAD:
9332                 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9333                 goto end;
9334         case NFS4_CHECK_STATEID_EXPIRED:
9335                 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
9336                 goto end;
9337         case NFS4_CHECK_STATEID_CLOSED:
9338                 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9339                 goto end;
9340         case NFS4_CHECK_STATEID_REPLAY:
9341                 switch (rfs4_check_lock_seqid(args->seqid, lsp, resop)) {
9342                 case NFS4_CHKSEQ_OKAY:
9343                                 /*
9344                                  * This is a replayed stateid; if
9345                                  * seqid matches the next expected,
9346                                  * then client is using wrong seqid.
9347                                  */
9348                 case NFS4_CHKSEQ_BAD:
9349                         *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9350                         goto end;
9351                 case NFS4_CHKSEQ_REPLAY:
9352                         rfs4_update_lease(lsp->rls_locker->rl_client);
9353                         *cs->statusp = status = resp->status;
9354                         goto end;
9355                 }
9356                 break;
9357         default:
9358                 ASSERT(FALSE);
9359                 break;
9360         }
9361 
9362         rfs4_update_lock_sequence(lsp);
9363         rfs4_update_lease(lsp->rls_locker->rl_client);
9364 
9365         /*
9366          * NFS4 only allows locking on regular files, so
9367          * verify type of object.
9368          */
9369         if (cs->vp->v_type != VREG) {
9370                 if (cs->vp->v_type == VDIR)
9371                         status = NFS4ERR_ISDIR;
9372                 else
9373                         status = NFS4ERR_INVAL;
9374                 goto out;
9375         }
9376 
9377         if (rfs4_clnt_in_grace(lsp->rls_state->rs_owner->ro_client)) {
9378                 status = NFS4ERR_GRACE;
9379                 goto out;
9380         }
9381 
9382         status = rfs4_do_lock(lsp, args->locktype,
9383             args->offset, args->length, cs->cr, resop);
9384 
9385 out:
9386         *cs->statusp = resp->status = status;
9387 
9388         if (status == NFS4_OK)
9389                 resp->lock_stateid = lsp->rls_lockid.stateid;
9390 
9391         rfs4_update_lock_resp(lsp, resop);
9392 
9393 end:
9394         rfs4_sw_exit(&lsp->rls_sw);
9395         rfs4_lo_state_rele(lsp, TRUE);
9396 
9397         DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
9398             LOCKU4res *, resp);
9399 }
9400 
9401 /*
9402  * LOCKT is a best effort routine, the client can not be guaranteed that
9403  * the status return is still in effect by the time the reply is received.
9404  * They are numerous race conditions in this routine, but we are not required
9405  * and can not be accurate.
9406  */
9407 /*ARGSUSED*/
9408 void
9409 rfs4_op_lockt(nfs_argop4 *argop, nfs_resop4 *resop,
9410     struct svc_req *req, struct compound_state *cs)
9411 {
9412         LOCKT4args *args = &argop->nfs_argop4_u.oplockt;
9413         LOCKT4res *resp = &resop->nfs_resop4_u.oplockt;
9414         rfs4_lockowner_t *lo;
9415         rfs4_client_t *cp;
9416         bool_t create = FALSE;
9417         struct flock64 flk;
9418         int error;
9419         int flag = FREAD | FWRITE;
9420         int ltype;
9421         length4 posix_length;
9422         sysid_t sysid;
9423         pid_t pid;
9424 
9425         DTRACE_NFSV4_2(op__lockt__start, struct compound_state *, cs,
9426             LOCKT4args *, args);
9427 
9428         if (cs->vp == NULL) {
9429                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
9430                 goto out;
9431         }
9432 
9433         /*
9434          * NFS4 only allows locking on regular files, so
9435          * verify type of object.
9436          */
9437         if (cs->vp->v_type != VREG) {
9438                 if (cs->vp->v_type == VDIR)
9439                         *cs->statusp = resp->status = NFS4ERR_ISDIR;
9440                 else
9441                         *cs->statusp = resp->status =  NFS4ERR_INVAL;
9442                 goto out;
9443         }
9444 
9445         /*
9446          * Check out the clientid to ensure the server knows about it
9447          * so that we correctly inform the client of a server reboot.
9448          */
9449         if ((cp = rfs4_findclient_by_id(args->owner.clientid, FALSE))
9450             == NULL) {
9451                 *cs->statusp = resp->status =
9452                     rfs4_check_clientid(&args->owner.clientid, 0);
9453                 goto out;
9454         }
9455         if (rfs4_lease_expired(cp)) {
9456                 rfs4_client_close(cp);
9457                 /*
9458                  * Protocol doesn't allow returning NFS4ERR_STALE as
9459                  * other operations do on this check so STALE_CLIENTID
9460                  * is returned instead
9461                  */
9462                 *cs->statusp = resp->status = NFS4ERR_STALE_CLIENTID;
9463                 goto out;
9464         }
9465 
9466         if (rfs4_clnt_in_grace(cp) && !(cp->rc_can_reclaim)) {
9467                 *cs->statusp = resp->status = NFS4ERR_GRACE;
9468                 rfs4_client_rele(cp);
9469                 goto out;
9470         }
9471         rfs4_client_rele(cp);
9472 
9473         resp->status = NFS4_OK;
9474 
9475         switch (args->locktype) {
9476         case READ_LT:
9477         case READW_LT:
9478                 ltype = F_RDLCK;
9479                 break;
9480         case WRITE_LT:
9481         case WRITEW_LT:
9482                 ltype = F_WRLCK;
9483                 break;
9484         }
9485 
9486         posix_length = args->length;
9487         /* Check for zero length. To lock to end of file use all ones for V4 */
9488         if (posix_length == 0) {
9489                 *cs->statusp = resp->status = NFS4ERR_INVAL;
9490                 goto out;
9491         } else if (posix_length == (length4)(~0)) {
9492                 posix_length = 0;       /* Posix to end of file  */
9493         }
9494 
9495         /* Find or create a lockowner */
9496         lo = rfs4_findlockowner(&args->owner, &create);
9497 
9498         if (lo) {
9499                 pid = lo->rl_pid;
9500                 if ((resp->status =
9501                     rfs4_client_sysid(lo->rl_client, &sysid)) != NFS4_OK)
9502                         goto err;
9503         } else {
9504                 pid = 0;
9505                 sysid = lockt_sysid;
9506         }
9507 retry:
9508         flk.l_type = ltype;
9509         flk.l_whence = 0;               /* SEEK_SET */
9510         flk.l_start = args->offset;
9511         flk.l_len = posix_length;
9512         flk.l_sysid = sysid;
9513         flk.l_pid = pid;
9514         flag |= F_REMOTELOCK;
9515 
9516         LOCK_PRINT(rfs4_debug, "rfs4_op_lockt", F_GETLK, &flk);
9517 
9518         /* Note that length4 is uint64_t but l_len and l_start are off64_t */
9519         if (flk.l_len < 0 || flk.l_start < 0) {
9520                 resp->status = NFS4ERR_INVAL;
9521                 goto err;
9522         }
9523         error = VOP_FRLOCK(cs->vp, F_GETLK, &flk, flag, (u_offset_t)0,
9524             NULL, cs->cr, NULL);
9525 
9526         /*
9527          * N.B. We map error values to nfsv4 errors. This is differrent
9528          * than puterrno4 routine.
9529          */
9530         switch (error) {
9531         case 0:
9532                 if (flk.l_type == F_UNLCK)
9533                         resp->status = NFS4_OK;
9534                 else {
9535                         if (lock_denied(&resp->denied, &flk) == NFS4ERR_EXPIRED)
9536                                 goto retry;
9537                         resp->status = NFS4ERR_DENIED;
9538                 }
9539                 break;
9540         case EOVERFLOW:
9541                 resp->status = NFS4ERR_INVAL;
9542                 break;
9543         case EINVAL:
9544                 resp->status = NFS4ERR_NOTSUPP;
9545                 break;
9546         default:
9547                 cmn_err(CE_WARN, "rfs4_op_lockt: unexpected errno (%d)",
9548                     error);
9549                 resp->status = NFS4ERR_SERVERFAULT;
9550                 break;
9551         }
9552 
9553 err:
9554         if (lo)
9555                 rfs4_lockowner_rele(lo);
9556         *cs->statusp = resp->status;
9557 out:
9558         DTRACE_NFSV4_2(op__lockt__done, struct compound_state *, cs,
9559             LOCKT4res *, resp);
9560 }
9561 
9562 int
9563 rfs4_share(rfs4_state_t *sp, uint32_t access, uint32_t deny)
9564 {
9565         int err;
9566         int cmd;
9567         vnode_t *vp;
9568         struct shrlock shr;
9569         struct shr_locowner shr_loco;
9570         int fflags = 0;
9571 
9572         ASSERT(rfs4_dbe_islocked(sp->rs_dbe));
9573         ASSERT(sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID);
9574 
9575         if (sp->rs_closed)
9576                 return (NFS4ERR_OLD_STATEID);
9577 
9578         vp = sp->rs_finfo->rf_vp;
9579         ASSERT(vp);
9580 
9581         shr.s_access = shr.s_deny = 0;
9582 
9583         if (access & OPEN4_SHARE_ACCESS_READ) {
9584                 fflags |= FREAD;
9585                 shr.s_access |= F_RDACC;
9586         }
9587         if (access & OPEN4_SHARE_ACCESS_WRITE) {
9588                 fflags |= FWRITE;
9589                 shr.s_access |= F_WRACC;
9590         }
9591         ASSERT(shr.s_access);
9592 
9593         if (deny & OPEN4_SHARE_DENY_READ)
9594                 shr.s_deny |= F_RDDNY;
9595         if (deny & OPEN4_SHARE_DENY_WRITE)
9596                 shr.s_deny |= F_WRDNY;
9597 
9598         shr.s_pid = rfs4_dbe_getid(sp->rs_owner->ro_dbe);
9599         shr.s_sysid = sp->rs_owner->ro_client->rc_sysidt;
9600         shr_loco.sl_pid = shr.s_pid;
9601         shr_loco.sl_id = shr.s_sysid;
9602         shr.s_owner = (caddr_t)&shr_loco;
9603         shr.s_own_len = sizeof (shr_loco);
9604 
9605         cmd = nbl_need_check(vp) ? F_SHARE_NBMAND : F_SHARE;
9606 
9607         err = VOP_SHRLOCK(vp, cmd, &shr, fflags, CRED(), NULL);
9608         if (err != 0) {
9609                 if (err == EAGAIN)
9610                         err = NFS4ERR_SHARE_DENIED;
9611                 else
9612                         err = puterrno4(err);
9613                 return (err);
9614         }
9615 
9616         sp->rs_share_access |= access;
9617         sp->rs_share_deny |= deny;
9618 
9619         return (0);
9620 }
9621 
9622 int
9623 rfs4_unshare(rfs4_state_t *sp)
9624 {
9625         int err;
9626         struct shrlock shr;
9627         struct shr_locowner shr_loco;
9628 
9629         ASSERT(rfs4_dbe_islocked(sp->rs_dbe));
9630 
9631         if (sp->rs_closed || sp->rs_share_access == 0)
9632                 return (0);
9633 
9634         ASSERT(sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID);
9635         ASSERT(sp->rs_finfo->rf_vp);
9636 
9637         shr.s_access = shr.s_deny = 0;
9638         shr.s_pid = rfs4_dbe_getid(sp->rs_owner->ro_dbe);
9639         shr.s_sysid = sp->rs_owner->ro_client->rc_sysidt;
9640         shr_loco.sl_pid = shr.s_pid;
9641         shr_loco.sl_id = shr.s_sysid;
9642         shr.s_owner = (caddr_t)&shr_loco;
9643         shr.s_own_len = sizeof (shr_loco);
9644 
9645         err = VOP_SHRLOCK(sp->rs_finfo->rf_vp, F_UNSHARE, &shr, 0, CRED(),
9646             NULL);
9647         if (err != 0) {
9648                 err = puterrno4(err);
9649                 return (err);
9650         }
9651 
9652         sp->rs_share_access = 0;
9653         sp->rs_share_deny = 0;
9654 
9655         return (0);
9656 
9657 }
9658 
9659 static int
9660 rdma_setup_read_data4(READ4args *args, READ4res *rok)
9661 {
9662         struct clist    *wcl;
9663         count4          count = rok->data_len;
9664         int             wlist_len;
9665 
9666         wcl = args->wlist;
9667         if (rdma_setup_read_chunks(wcl, count, &wlist_len) == FALSE) {
9668                 return (FALSE);
9669         }
9670         wcl = args->wlist;
9671         rok->wlist_len = wlist_len;
9672         rok->wlist = wcl;
9673         return (TRUE);
9674 }
9675 
9676 /* tunable to disable server referrals */
9677 int rfs4_no_referrals = 0;
9678 
9679 /*
9680  * Find an NFS record in reparse point data.
9681  * Returns 0 for success and <0 or an errno value on failure.
9682  */
9683 int
9684 vn_find_nfs_record(vnode_t *vp, nvlist_t **nvlp, char **svcp, char **datap)
9685 {
9686         int err;
9687         char *stype, *val;
9688         nvlist_t *nvl;
9689         nvpair_t *curr;
9690 
9691         if ((nvl = reparse_init()) == NULL)
9692                 return (-1);
9693 
9694         if ((err = reparse_vnode_parse(vp, nvl)) != 0) {
9695                 reparse_free(nvl);
9696                 return (err);
9697         }
9698 
9699         curr = NULL;
9700         while ((curr = nvlist_next_nvpair(nvl, curr)) != NULL) {
9701                 if ((stype = nvpair_name(curr)) == NULL) {
9702                         reparse_free(nvl);
9703                         return (-2);
9704                 }
9705                 if (strncasecmp(stype, "NFS", 3) == 0)
9706                         break;
9707         }
9708 
9709         if ((curr == NULL) ||
9710             (nvpair_value_string(curr, &val))) {
9711                 reparse_free(nvl);
9712                 return (-3);
9713         }
9714         *nvlp = nvl;
9715         *svcp = stype;
9716         *datap = val;
9717         return (0);
9718 }
9719 
9720 int
9721 vn_is_nfs_reparse(vnode_t *vp, cred_t *cr)
9722 {
9723         nvlist_t *nvl;
9724         char *s, *d;
9725 
9726         if (rfs4_no_referrals != 0)
9727                 return (B_FALSE);
9728 
9729         if (vn_is_reparse(vp, cr, NULL) == B_FALSE)
9730                 return (B_FALSE);
9731 
9732         if (vn_find_nfs_record(vp, &nvl, &s, &d) != 0)
9733                 return (B_FALSE);
9734 
9735         reparse_free(nvl);
9736 
9737         return (B_TRUE);
9738 }
9739 
9740 /*
9741  * There is a user-level copy of this routine in ref_subr.c.
9742  * Changes should be kept in sync.
9743  */
9744 static int
9745 nfs4_create_components(char *path, component4 *comp4)
9746 {
9747         int slen, plen, ncomp;
9748         char *ori_path, *nxtc, buf[MAXNAMELEN];
9749 
9750         if (path == NULL)
9751                 return (0);
9752 
9753         plen = strlen(path) + 1;        /* include the terminator */
9754         ori_path = path;
9755         ncomp = 0;
9756 
9757         /* count number of components in the path */
9758         for (nxtc = path; nxtc < ori_path + plen; nxtc++) {
9759                 if (*nxtc == '/' || *nxtc == '\0' || *nxtc == '\n') {
9760                         if ((slen = nxtc - path) == 0) {
9761                                 path = nxtc + 1;
9762                                 continue;
9763                         }
9764 
9765                         if (comp4 != NULL) {
9766                                 bcopy(path, buf, slen);
9767                                 buf[slen] = '\0';
9768                                 (void) str_to_utf8(buf, &comp4[ncomp]);
9769                         }
9770 
9771                         ncomp++;        /* 1 valid component */
9772                         path = nxtc + 1;
9773                 }
9774                 if (*nxtc == '\0' || *nxtc == '\n')
9775                         break;
9776         }
9777 
9778         return (ncomp);
9779 }
9780 
9781 /*
9782  * There is a user-level copy of this routine in ref_subr.c.
9783  * Changes should be kept in sync.
9784  */
9785 static int
9786 make_pathname4(char *path, pathname4 *pathname)
9787 {
9788         int ncomp;
9789         component4 *comp4;
9790 
9791         if (pathname == NULL)
9792                 return (0);
9793 
9794         if (path == NULL) {
9795                 pathname->pathname4_val = NULL;
9796                 pathname->pathname4_len = 0;
9797                 return (0);
9798         }
9799 
9800         /* count number of components to alloc buffer */
9801         if ((ncomp = nfs4_create_components(path, NULL)) == 0) {
9802                 pathname->pathname4_val = NULL;
9803                 pathname->pathname4_len = 0;
9804                 return (0);
9805         }
9806         comp4 = kmem_zalloc(ncomp * sizeof (component4), KM_SLEEP);
9807 
9808         /* copy components into allocated buffer */
9809         ncomp = nfs4_create_components(path, comp4);
9810 
9811         pathname->pathname4_val = comp4;
9812         pathname->pathname4_len = ncomp;
9813 
9814         return (ncomp);
9815 }
9816 
9817 #define xdr_fs_locations4 xdr_fattr4_fs_locations
9818 
9819 fs_locations4 *
9820 fetch_referral(vnode_t *vp, cred_t *cr)
9821 {
9822         nvlist_t *nvl;
9823         char *stype, *sdata;
9824         fs_locations4 *result;
9825         char buf[1024];
9826         size_t bufsize;
9827         XDR xdr;
9828         int err;
9829 
9830         /*
9831          * Check attrs to ensure it's a reparse point
9832          */
9833         if (vn_is_reparse(vp, cr, NULL) == B_FALSE)
9834                 return (NULL);
9835 
9836         /*
9837          * Look for an NFS record and get the type and data
9838          */
9839         if (vn_find_nfs_record(vp, &nvl, &stype, &sdata) != 0)
9840                 return (NULL);
9841 
9842         /*
9843          * With the type and data, upcall to get the referral
9844          */
9845         bufsize = sizeof (buf);
9846         bzero(buf, sizeof (buf));
9847         err = reparse_kderef((const char *)stype, (const char *)sdata,
9848             buf, &bufsize);
9849         reparse_free(nvl);
9850 
9851         DTRACE_PROBE4(nfs4serv__func__referral__upcall,
9852             char *, stype, char *, sdata, char *, buf, int, err);
9853         if (err) {
9854                 cmn_err(CE_NOTE,
9855                     "reparsed daemon not running: unable to get referral (%d)",
9856                     err);
9857                 return (NULL);
9858         }
9859 
9860         /*
9861          * We get an XDR'ed record back from the kderef call
9862          */
9863         xdrmem_create(&xdr, buf, bufsize, XDR_DECODE);
9864         result = kmem_alloc(sizeof (fs_locations4), KM_SLEEP);
9865         err = xdr_fs_locations4(&xdr, result);
9866         XDR_DESTROY(&xdr);
9867         if (err != TRUE) {
9868                 DTRACE_PROBE1(nfs4serv__func__referral__upcall__xdrfail,
9869                     int, err);
9870                 return (NULL);
9871         }
9872 
9873         /*
9874          * Look at path to recover fs_root, ignoring the leading '/'
9875          */
9876         (void) make_pathname4(vp->v_path, &result->fs_root);
9877 
9878         return (result);
9879 }
9880 
9881 char *
9882 build_symlink(vnode_t *vp, cred_t *cr, size_t *strsz)
9883 {
9884         fs_locations4 *fsl;
9885         fs_location4 *fs;
9886         char *server, *path, *symbuf;
9887         static char *prefix = "/net/";
9888         int i, size, npaths;
9889         uint_t len;
9890 
9891         /* Get the referral */
9892         if ((fsl = fetch_referral(vp, cr)) == NULL)
9893                 return (NULL);
9894 
9895         /* Deal with only the first location and first server */
9896         fs = &fsl->locations_val[0];
9897         server = utf8_to_str(&fs->server_val[0], &len, NULL);
9898         if (server == NULL) {
9899                 rfs4_free_fs_locations4(fsl);
9900                 kmem_free(fsl, sizeof (fs_locations4));
9901                 return (NULL);
9902         }
9903 
9904         /* Figure out size for "/net/" + host + /path/path/path + NULL */
9905         size = strlen(prefix) + len;
9906         for (i = 0; i < fs->rootpath.pathname4_len; i++)
9907                 size += fs->rootpath.pathname4_val[i].utf8string_len + 1;
9908 
9909         /* Allocate the symlink buffer and fill it */
9910         symbuf = kmem_zalloc(size, KM_SLEEP);
9911         (void) strcat(symbuf, prefix);
9912         (void) strcat(symbuf, server);
9913         kmem_free(server, len);
9914 
9915         npaths = 0;
9916         for (i = 0; i < fs->rootpath.pathname4_len; i++) {
9917                 path = utf8_to_str(&fs->rootpath.pathname4_val[i], &len, NULL);
9918                 if (path == NULL)
9919                         continue;
9920                 (void) strcat(symbuf, "/");
9921                 (void) strcat(symbuf, path);
9922                 npaths++;
9923                 kmem_free(path, len);
9924         }
9925 
9926         rfs4_free_fs_locations4(fsl);
9927         kmem_free(fsl, sizeof (fs_locations4));
9928 
9929         if (strsz != NULL)
9930                 *strsz = size;
9931         return (symbuf);
9932 }
9933 
9934 /*
9935  * Check to see if we have a downrev Solaris client, so that we
9936  * can send it a symlink instead of a referral.
9937  */
9938 int
9939 client_is_downrev(struct svc_req *req)
9940 {
9941         struct sockaddr *ca;
9942         rfs4_clntip_t *ci;
9943         bool_t create = FALSE;
9944         int is_downrev;
9945 
9946         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
9947         ASSERT(ca);
9948         ci = rfs4_find_clntip(ca, &create);
9949         if (ci == NULL)
9950                 return (0);
9951         is_downrev = ci->ri_no_referrals;
9952         rfs4_dbe_rele(ci->ri_dbe);
9953         return (is_downrev);
9954 }
9955 
9956 /*
9957  * Do the main work of handling HA-NFSv4 Resource Group failover on
9958  * Sun Cluster.
9959  * We need to detect whether any RG admin paths have been added or removed,
9960  * and adjust resources accordingly.
9961  * Currently we're using a very inefficient algorithm, ~ 2 * O(n**2). In
9962  * order to scale, the list and array of paths need to be held in more
9963  * suitable data structures.
9964  */
9965 static void
9966 hanfsv4_failover(nfs4_srv_t *nsrv4)
9967 {
9968         int i, start_grace, numadded_paths = 0;
9969         char **added_paths = NULL;
9970         rfs4_dss_path_t *dss_path;
9971 
9972         /*
9973          * Note: currently, dss_pathlist cannot be NULL, since
9974          * it will always include an entry for NFS4_DSS_VAR_DIR. If we
9975          * make the latter dynamically specified too, the following will
9976          * need to be adjusted.
9977          */
9978 
9979         /*
9980          * First, look for removed paths: RGs that have been failed-over
9981          * away from this node.
9982          * Walk the "currently-serving" dss_pathlist and, for each
9983          * path, check if it is on the "passed-in" rfs4_dss_newpaths array
9984          * from nfsd. If not, that RG path has been removed.
9985          *
9986          * Note that nfsd has sorted rfs4_dss_newpaths for us, and removed
9987          * any duplicates.
9988          */
9989         dss_path = nsrv4->dss_pathlist;
9990         do {
9991                 int found = 0;
9992                 char *path = dss_path->path;
9993 
9994                 /* used only for non-HA so may not be removed */
9995                 if (strcmp(path, NFS4_DSS_VAR_DIR) == 0) {
9996                         dss_path = dss_path->next;
9997                         continue;
9998                 }
9999 
10000                 for (i = 0; i < rfs4_dss_numnewpaths; i++) {
10001                         int cmpret;
10002                         char *newpath = rfs4_dss_newpaths[i];
10003 
10004                         /*
10005                          * Since nfsd has sorted rfs4_dss_newpaths for us,
10006                          * once the return from strcmp is negative we know
10007                          * we've passed the point where "path" should be,
10008                          * and can stop searching: "path" has been removed.
10009                          */
10010                         cmpret = strcmp(path, newpath);
10011                         if (cmpret < 0)
10012                                 break;
10013                         if (cmpret == 0) {
10014                                 found = 1;
10015                                 break;
10016                         }
10017                 }
10018 
10019                 if (found == 0) {
10020                         unsigned index = dss_path->index;
10021                         rfs4_servinst_t *sip = dss_path->sip;
10022                         rfs4_dss_path_t *path_next = dss_path->next;
10023 
10024                         /*
10025                          * This path has been removed.
10026                          * We must clear out the servinst reference to
10027                          * it, since it's now owned by another
10028                          * node: we should not attempt to touch it.
10029                          */
10030                         ASSERT(dss_path == sip->dss_paths[index]);
10031                         sip->dss_paths[index] = NULL;
10032 
10033                         /* remove from "currently-serving" list, and destroy */
10034                         remque(dss_path);
10035                         /* allow for NUL */
10036                         kmem_free(dss_path->path, strlen(dss_path->path) + 1);
10037                         kmem_free(dss_path, sizeof (rfs4_dss_path_t));
10038 
10039                         dss_path = path_next;
10040                 } else {
10041                         /* path was found; not removed */
10042                         dss_path = dss_path->next;
10043                 }
10044         } while (dss_path != nsrv4->dss_pathlist);
10045 
10046         /*
10047          * Now, look for added paths: RGs that have been failed-over
10048          * to this node.
10049          * Walk the "passed-in" rfs4_dss_newpaths array from nfsd and,
10050          * for each path, check if it is on the "currently-serving"
10051          * dss_pathlist. If not, that RG path has been added.
10052          *
10053          * Note: we don't do duplicate detection here; nfsd does that for us.
10054          *
10055          * Note: numadded_paths <= rfs4_dss_numnewpaths, which gives us
10056          * an upper bound for the size needed for added_paths[numadded_paths].
10057          */
10058 
10059         /* probably more space than we need, but guaranteed to be enough */
10060         if (rfs4_dss_numnewpaths > 0) {
10061                 size_t sz = rfs4_dss_numnewpaths * sizeof (char *);
10062                 added_paths = kmem_zalloc(sz, KM_SLEEP);
10063         }
10064 
10065         /* walk the "passed-in" rfs4_dss_newpaths array from nfsd */
10066         for (i = 0; i < rfs4_dss_numnewpaths; i++) {
10067                 int found = 0;
10068                 char *newpath = rfs4_dss_newpaths[i];
10069 
10070                 dss_path = nsrv4->dss_pathlist;
10071                 do {
10072                         char *path = dss_path->path;
10073 
10074                         /* used only for non-HA */
10075                         if (strcmp(path, NFS4_DSS_VAR_DIR) == 0) {
10076                                 dss_path = dss_path->next;
10077                                 continue;
10078                         }
10079 
10080                         if (strncmp(path, newpath, strlen(path)) == 0) {
10081                                 found = 1;
10082                                 break;
10083                         }
10084 
10085                         dss_path = dss_path->next;
10086                 } while (dss_path != nsrv4->dss_pathlist);
10087 
10088                 if (found == 0) {
10089                         added_paths[numadded_paths] = newpath;
10090                         numadded_paths++;
10091                 }
10092         }
10093 
10094         /* did we find any added paths? */
10095         if (numadded_paths > 0) {
10096 
10097                 /* create a new server instance, and start its grace period */
10098                 start_grace = 1;
10099                 /* CSTYLED */
10100                 rfs4_servinst_create(nsrv4, start_grace, numadded_paths, added_paths);
10101 
10102                 /* read in the stable storage state from these paths */
10103                 rfs4_dss_readstate(nsrv4, numadded_paths, added_paths);
10104 
10105                 /*
10106                  * Multiple failovers during a grace period will cause
10107                  * clients of the same resource group to be partitioned
10108                  * into different server instances, with different
10109                  * grace periods.  Since clients of the same resource
10110                  * group must be subject to the same grace period,
10111                  * we need to reset all currently active grace periods.
10112                  */
10113                 rfs4_grace_reset_all(nsrv4);
10114         }
10115 
10116         if (rfs4_dss_numnewpaths > 0)
10117                 kmem_free(added_paths, rfs4_dss_numnewpaths * sizeof (char *));
10118 }