1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
  24  */
  25 
  26 /*
  27  *      Copyright (c) 1983,1984,1985,1986,1987,1988,1989  AT&T.
  28  *      All Rights Reserved
  29  */
  30 
  31 /*
  32  * Copyright (c) 2012, 2016 by Delphix. All rights reserved.
  33  * Copyright 2019 Nexenta Systems, Inc.
  34  * Copyright 2019 Nexenta by DDN, Inc.
  35  */
  36 
  37 #include <sys/param.h>
  38 #include <sys/types.h>
  39 #include <sys/systm.h>
  40 #include <sys/cred.h>
  41 #include <sys/buf.h>
  42 #include <sys/vfs.h>
  43 #include <sys/vfs_opreg.h>
  44 #include <sys/vnode.h>
  45 #include <sys/uio.h>
  46 #include <sys/errno.h>
  47 #include <sys/sysmacros.h>
  48 #include <sys/statvfs.h>
  49 #include <sys/kmem.h>
  50 #include <sys/dirent.h>
  51 #include <sys/cmn_err.h>
  52 #include <sys/debug.h>
  53 #include <sys/systeminfo.h>
  54 #include <sys/flock.h>
  55 #include <sys/pathname.h>
  56 #include <sys/nbmlock.h>
  57 #include <sys/share.h>
  58 #include <sys/atomic.h>
  59 #include <sys/policy.h>
  60 #include <sys/fem.h>
  61 #include <sys/sdt.h>
  62 #include <sys/ddi.h>
  63 #include <sys/zone.h>
  64 
  65 #include <fs/fs_reparse.h>
  66 
  67 #include <rpc/types.h>
  68 #include <rpc/auth.h>
  69 #include <rpc/rpcsec_gss.h>
  70 #include <rpc/svc.h>
  71 
  72 #include <nfs/nfs.h>
  73 #include <nfs/nfssys.h>
  74 #include <nfs/export.h>
  75 #include <nfs/nfs_cmd.h>
  76 #include <nfs/lm.h>
  77 #include <nfs/nfs4.h>
  78 #include <nfs/nfs4_drc.h>
  79 
  80 #include <sys/strsubr.h>
  81 #include <sys/strsun.h>
  82 
  83 #include <inet/common.h>
  84 #include <inet/ip.h>
  85 #include <inet/ip6.h>
  86 
  87 #include <sys/tsol/label.h>
  88 #include <sys/tsol/tndb.h>
  89 
  90 #define RFS4_MAXLOCK_TRIES 4    /* Try to get the lock this many times */
  91 static int rfs4_maxlock_tries = RFS4_MAXLOCK_TRIES;
  92 #define RFS4_LOCK_DELAY 10      /* Milliseconds */
  93 static clock_t  rfs4_lock_delay = RFS4_LOCK_DELAY;
  94 extern struct svc_ops rdma_svc_ops;
  95 extern int nfs_loaned_buffers;
  96 /* End of Tunables */
  97 
  98 static int rdma_setup_read_data4(READ4args *, READ4res *);
  99 
 100 /*
 101  * Used to bump the stateid4.seqid value and show changes in the stateid
 102  */
 103 #define next_stateid(sp) (++(sp)->bits.chgseq)
 104 
 105 /*
 106  * RFS4_MINLEN_ENTRY4: XDR-encoded size of smallest possible dirent.
 107  *      This is used to return NFS4ERR_TOOSMALL when clients specify
 108  *      maxcount that isn't large enough to hold the smallest possible
 109  *      XDR encoded dirent.
 110  *
 111  *          sizeof cookie (8 bytes) +
 112  *          sizeof name_len (4 bytes) +
 113  *          sizeof smallest (padded) name (4 bytes) +
 114  *          sizeof bitmap4_len (12 bytes) +   NOTE: we always encode len=2 bm4
 115  *          sizeof attrlist4_len (4 bytes) +
 116  *          sizeof next boolean (4 bytes)
 117  *
 118  * RFS4_MINLEN_RDDIR4: XDR-encoded size of READDIR op reply containing
 119  * the smallest possible entry4 (assumes no attrs requested).
 120  *      sizeof nfsstat4 (4 bytes) +
 121  *      sizeof verifier4 (8 bytes) +
 122  *      sizeof entry4list bool (4 bytes) +
 123  *      sizeof entry4   (36 bytes) +
 124  *      sizeof eof bool  (4 bytes)
 125  *
 126  * RFS4_MINLEN_RDDIR_BUF: minimum length of buffer server will provide to
 127  *      VOP_READDIR.  Its value is the size of the maximum possible dirent
 128  *      for solaris.  The DIRENT64_RECLEN macro returns the size of dirent
 129  *      required for a given name length.  MAXNAMELEN is the maximum
 130  *      filename length allowed in Solaris.  The first two DIRENT64_RECLEN()
 131  *      macros are to allow for . and .. entries -- just a minor tweak to try
 132  *      and guarantee that buffer we give to VOP_READDIR will be large enough
 133  *      to hold ., .., and the largest possible solaris dirent64.
 134  */
 135 #define RFS4_MINLEN_ENTRY4 36
 136 #define RFS4_MINLEN_RDDIR4 (4 + NFS4_VERIFIER_SIZE + 4 + RFS4_MINLEN_ENTRY4 + 4)
 137 #define RFS4_MINLEN_RDDIR_BUF \
 138         (DIRENT64_RECLEN(1) + DIRENT64_RECLEN(2) + DIRENT64_RECLEN(MAXNAMELEN))
 139 
 140 /*
 141  * It would be better to pad to 4 bytes since that's what XDR would do,
 142  * but the dirents UFS gives us are already padded to 8, so just take
 143  * what we're given.  Dircount is only a hint anyway.  Currently the
 144  * solaris kernel is ASCII only, so there's no point in calling the
 145  * UTF8 functions.
 146  *
 147  * dirent64: named padded to provide 8 byte struct alignment
 148  *      d_ino(8) + d_off(8) + d_reclen(2) + d_name(namelen + null(1) + pad)
 149  *
 150  * cookie: uint64_t   +  utf8namelen: uint_t  +   utf8name padded to 8 bytes
 151  *
 152  */
 153 #define DIRENT64_TO_DIRCOUNT(dp) \
 154         (3 * BYTES_PER_XDR_UNIT + DIRENT64_NAMELEN((dp)->d_reclen))
 155 
 156 zone_key_t      rfs4_zone_key;
 157 
 158 static sysid_t          lockt_sysid;    /* dummy sysid for all LOCKT calls */
 159 
 160 u_longlong_t    nfs4_srv_caller_id;
 161 uint_t          nfs4_srv_vkey = 0;
 162 
 163 void    rfs4_init_compound_state(struct compound_state *);
 164 
 165 static void     nullfree(caddr_t);
 166 static void     rfs4_op_inval(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 167                     struct compound_state *);
 168 static void     rfs4_op_access(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 169                     struct compound_state *);
 170 static void     rfs4_op_close(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 171                     struct compound_state *);
 172 static void     rfs4_op_commit(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 173                     struct compound_state *);
 174 static void     rfs4_op_create(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 175                     struct compound_state *);
 176 static void     rfs4_op_create_free(nfs_resop4 *resop);
 177 static void     rfs4_op_delegreturn(nfs_argop4 *, nfs_resop4 *,
 178                     struct svc_req *, struct compound_state *);
 179 static void     rfs4_op_delegpurge(nfs_argop4 *, nfs_resop4 *,
 180                     struct svc_req *, struct compound_state *);
 181 static void     rfs4_op_getattr(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 182                     struct compound_state *);
 183 static void     rfs4_op_getattr_free(nfs_resop4 *);
 184 static void     rfs4_op_getfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 185                     struct compound_state *);
 186 static void     rfs4_op_getfh_free(nfs_resop4 *);
 187 static void     rfs4_op_illegal(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 188                     struct compound_state *);
 189 static void     rfs4_op_link(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 190                     struct compound_state *);
 191 static void     rfs4_op_lock(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 192                     struct compound_state *);
 193 static void     lock_denied_free(nfs_resop4 *);
 194 static void     rfs4_op_locku(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 195                     struct compound_state *);
 196 static void     rfs4_op_lockt(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 197                     struct compound_state *);
 198 static void     rfs4_op_lookup(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 199                     struct compound_state *);
 200 static void     rfs4_op_lookupp(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 201                     struct compound_state *);
 202 static void     rfs4_op_openattr(nfs_argop4 *argop, nfs_resop4 *resop,
 203                     struct svc_req *req, struct compound_state *cs);
 204 static void     rfs4_op_nverify(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 205                     struct compound_state *);
 206 static void     rfs4_op_open(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 207                     struct compound_state *);
 208 static void     rfs4_op_open_confirm(nfs_argop4 *, nfs_resop4 *,
 209                     struct svc_req *, struct compound_state *);
 210 static void     rfs4_op_open_downgrade(nfs_argop4 *, nfs_resop4 *,
 211                     struct svc_req *, struct compound_state *);
 212 static void     rfs4_op_putfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 213                     struct compound_state *);
 214 static void     rfs4_op_putpubfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 215                     struct compound_state *);
 216 static void     rfs4_op_putrootfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 217                     struct compound_state *);
 218 static void     rfs4_op_read(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 219                     struct compound_state *);
 220 static void     rfs4_op_read_free(nfs_resop4 *);
 221 static void     rfs4_op_readdir_free(nfs_resop4 *resop);
 222 static void     rfs4_op_readlink(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 223                     struct compound_state *);
 224 static void     rfs4_op_readlink_free(nfs_resop4 *);
 225 static void     rfs4_op_release_lockowner(nfs_argop4 *, nfs_resop4 *,
 226                     struct svc_req *, struct compound_state *);
 227 static void     rfs4_op_remove(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 228                     struct compound_state *);
 229 static void     rfs4_op_rename(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 230                     struct compound_state *);
 231 static void     rfs4_op_renew(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 232                     struct compound_state *);
 233 static void     rfs4_op_restorefh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 234                     struct compound_state *);
 235 static void     rfs4_op_savefh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 236                     struct compound_state *);
 237 static void     rfs4_op_setattr(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 238                     struct compound_state *);
 239 static void     rfs4_op_verify(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 240                     struct compound_state *);
 241 static void     rfs4_op_write(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 242                     struct compound_state *);
 243 static void     rfs4_op_setclientid(nfs_argop4 *, nfs_resop4 *,
 244                     struct svc_req *, struct compound_state *);
 245 static void     rfs4_op_setclientid_confirm(nfs_argop4 *, nfs_resop4 *,
 246                     struct svc_req *req, struct compound_state *);
 247 static void     rfs4_op_secinfo(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 248                     struct compound_state *);
 249 static void     rfs4_op_secinfo_free(nfs_resop4 *);
 250 
 251 static nfsstat4 check_open_access(uint32_t, struct compound_state *,
 252                     struct svc_req *);
 253 nfsstat4        rfs4_client_sysid(rfs4_client_t *, sysid_t *);
 254 void            rfs4_ss_clid(nfs4_srv_t *, rfs4_client_t *);
 255 
 256 
 257 /*
 258  * translation table for attrs
 259  */
 260 struct nfs4_ntov_table {
 261         union nfs4_attr_u *na;
 262         uint8_t amap[NFS4_MAXNUM_ATTRS];
 263         int attrcnt;
 264         bool_t vfsstat;
 265 };
 266 
 267 static void     nfs4_ntov_table_init(struct nfs4_ntov_table *ntovp);
 268 static void     nfs4_ntov_table_free(struct nfs4_ntov_table *ntovp,
 269                     struct nfs4_svgetit_arg *sargp);
 270 
 271 static nfsstat4 do_rfs4_set_attrs(bitmap4 *resp, fattr4 *fattrp,
 272                     struct compound_state *cs, struct nfs4_svgetit_arg *sargp,
 273                     struct nfs4_ntov_table *ntovp, nfs4_attr_cmd_t cmd);
 274 
 275 static void     hanfsv4_failover(nfs4_srv_t *);
 276 
 277 fem_t           *deleg_rdops;
 278 fem_t           *deleg_wrops;
 279 
 280 /*
 281  * NFS4 op dispatch table
 282  */
 283 
 284 struct rfsv4disp {
 285         void    (*dis_proc)();          /* proc to call */
 286         void    (*dis_resfree)();       /* frees space allocated by proc */
 287         int     dis_flags;              /* RPC_IDEMPOTENT, etc... */
 288 };
 289 
 290 static struct rfsv4disp rfsv4disptab[] = {
 291         /*
 292          * NFS VERSION 4
 293          */
 294 
 295         /* RFS_NULL = 0 */
 296         {rfs4_op_illegal, nullfree, 0},
 297 
 298         /* UNUSED = 1 */
 299         {rfs4_op_illegal, nullfree, 0},
 300 
 301         /* UNUSED = 2 */
 302         {rfs4_op_illegal, nullfree, 0},
 303 
 304         /* OP_ACCESS = 3 */
 305         {rfs4_op_access, nullfree, RPC_IDEMPOTENT},
 306 
 307         /* OP_CLOSE = 4 */
 308         {rfs4_op_close, nullfree, 0},
 309 
 310         /* OP_COMMIT = 5 */
 311         {rfs4_op_commit, nullfree, RPC_IDEMPOTENT},
 312 
 313         /* OP_CREATE = 6 */
 314         {rfs4_op_create, nullfree, 0},
 315 
 316         /* OP_DELEGPURGE = 7 */
 317         {rfs4_op_delegpurge, nullfree, 0},
 318 
 319         /* OP_DELEGRETURN = 8 */
 320         {rfs4_op_delegreturn, nullfree, 0},
 321 
 322         /* OP_GETATTR = 9 */
 323         {rfs4_op_getattr, rfs4_op_getattr_free, RPC_IDEMPOTENT},
 324 
 325         /* OP_GETFH = 10 */
 326         {rfs4_op_getfh, rfs4_op_getfh_free, RPC_ALL},
 327 
 328         /* OP_LINK = 11 */
 329         {rfs4_op_link, nullfree, 0},
 330 
 331         /* OP_LOCK = 12 */
 332         {rfs4_op_lock, lock_denied_free, 0},
 333 
 334         /* OP_LOCKT = 13 */
 335         {rfs4_op_lockt, lock_denied_free, 0},
 336 
 337         /* OP_LOCKU = 14 */
 338         {rfs4_op_locku, nullfree, 0},
 339 
 340         /* OP_LOOKUP = 15 */
 341         {rfs4_op_lookup, nullfree, (RPC_IDEMPOTENT | RPC_PUBLICFH_OK)},
 342 
 343         /* OP_LOOKUPP = 16 */
 344         {rfs4_op_lookupp, nullfree, (RPC_IDEMPOTENT | RPC_PUBLICFH_OK)},
 345 
 346         /* OP_NVERIFY = 17 */
 347         {rfs4_op_nverify, nullfree, RPC_IDEMPOTENT},
 348 
 349         /* OP_OPEN = 18 */
 350         {rfs4_op_open, rfs4_free_reply, 0},
 351 
 352         /* OP_OPENATTR = 19 */
 353         {rfs4_op_openattr, nullfree, 0},
 354 
 355         /* OP_OPEN_CONFIRM = 20 */
 356         {rfs4_op_open_confirm, nullfree, 0},
 357 
 358         /* OP_OPEN_DOWNGRADE = 21 */
 359         {rfs4_op_open_downgrade, nullfree, 0},
 360 
 361         /* OP_OPEN_PUTFH = 22 */
 362         {rfs4_op_putfh, nullfree, RPC_ALL},
 363 
 364         /* OP_PUTPUBFH = 23 */
 365         {rfs4_op_putpubfh, nullfree, RPC_ALL},
 366 
 367         /* OP_PUTROOTFH = 24 */
 368         {rfs4_op_putrootfh, nullfree, RPC_ALL},
 369 
 370         /* OP_READ = 25 */
 371         {rfs4_op_read, rfs4_op_read_free, RPC_IDEMPOTENT},
 372 
 373         /* OP_READDIR = 26 */
 374         {rfs4_op_readdir, rfs4_op_readdir_free, RPC_IDEMPOTENT},
 375 
 376         /* OP_READLINK = 27 */
 377         {rfs4_op_readlink, rfs4_op_readlink_free, RPC_IDEMPOTENT},
 378 
 379         /* OP_REMOVE = 28 */
 380         {rfs4_op_remove, nullfree, 0},
 381 
 382         /* OP_RENAME = 29 */
 383         {rfs4_op_rename, nullfree, 0},
 384 
 385         /* OP_RENEW = 30 */
 386         {rfs4_op_renew, nullfree, 0},
 387 
 388         /* OP_RESTOREFH = 31 */
 389         {rfs4_op_restorefh, nullfree, RPC_ALL},
 390 
 391         /* OP_SAVEFH = 32 */
 392         {rfs4_op_savefh, nullfree, RPC_ALL},
 393 
 394         /* OP_SECINFO = 33 */
 395         {rfs4_op_secinfo, rfs4_op_secinfo_free, 0},
 396 
 397         /* OP_SETATTR = 34 */
 398         {rfs4_op_setattr, nullfree, 0},
 399 
 400         /* OP_SETCLIENTID = 35 */
 401         {rfs4_op_setclientid, nullfree, 0},
 402 
 403         /* OP_SETCLIENTID_CONFIRM = 36 */
 404         {rfs4_op_setclientid_confirm, nullfree, 0},
 405 
 406         /* OP_VERIFY = 37 */
 407         {rfs4_op_verify, nullfree, RPC_IDEMPOTENT},
 408 
 409         /* OP_WRITE = 38 */
 410         {rfs4_op_write, nullfree, 0},
 411 
 412         /* OP_RELEASE_LOCKOWNER = 39 */
 413         {rfs4_op_release_lockowner, nullfree, 0},
 414 };
 415 
 416 static uint_t rfsv4disp_cnt = sizeof (rfsv4disptab) / sizeof (rfsv4disptab[0]);
 417 
 418 #define OP_ILLEGAL_IDX (rfsv4disp_cnt)
 419 
 420 #ifdef DEBUG
 421 
 422 int             rfs4_fillone_debug = 0;
 423 int             rfs4_no_stub_access = 1;
 424 int             rfs4_rddir_debug = 0;
 425 
 426 static char    *rfs4_op_string[] = {
 427         "rfs4_op_null",
 428         "rfs4_op_1 unused",
 429         "rfs4_op_2 unused",
 430         "rfs4_op_access",
 431         "rfs4_op_close",
 432         "rfs4_op_commit",
 433         "rfs4_op_create",
 434         "rfs4_op_delegpurge",
 435         "rfs4_op_delegreturn",
 436         "rfs4_op_getattr",
 437         "rfs4_op_getfh",
 438         "rfs4_op_link",
 439         "rfs4_op_lock",
 440         "rfs4_op_lockt",
 441         "rfs4_op_locku",
 442         "rfs4_op_lookup",
 443         "rfs4_op_lookupp",
 444         "rfs4_op_nverify",
 445         "rfs4_op_open",
 446         "rfs4_op_openattr",
 447         "rfs4_op_open_confirm",
 448         "rfs4_op_open_downgrade",
 449         "rfs4_op_putfh",
 450         "rfs4_op_putpubfh",
 451         "rfs4_op_putrootfh",
 452         "rfs4_op_read",
 453         "rfs4_op_readdir",
 454         "rfs4_op_readlink",
 455         "rfs4_op_remove",
 456         "rfs4_op_rename",
 457         "rfs4_op_renew",
 458         "rfs4_op_restorefh",
 459         "rfs4_op_savefh",
 460         "rfs4_op_secinfo",
 461         "rfs4_op_setattr",
 462         "rfs4_op_setclientid",
 463         "rfs4_op_setclient_confirm",
 464         "rfs4_op_verify",
 465         "rfs4_op_write",
 466         "rfs4_op_release_lockowner",
 467         "rfs4_op_illegal"
 468 };
 469 #endif
 470 
 471 void    rfs4_ss_chkclid(nfs4_srv_t *, rfs4_client_t *);
 472 
 473 extern size_t   strlcpy(char *dst, const char *src, size_t dstsize);
 474 
 475 extern void     rfs4_free_fs_locations4(fs_locations4 *);
 476 
 477 #ifdef  nextdp
 478 #undef nextdp
 479 #endif
 480 #define nextdp(dp)      ((struct dirent64 *)((char *)(dp) + (dp)->d_reclen))
 481 
 482 static const fs_operation_def_t nfs4_rd_deleg_tmpl[] = {
 483         VOPNAME_OPEN,           { .femop_open = deleg_rd_open },
 484         VOPNAME_WRITE,          { .femop_write = deleg_rd_write },
 485         VOPNAME_SETATTR,        { .femop_setattr = deleg_rd_setattr },
 486         VOPNAME_RWLOCK,         { .femop_rwlock = deleg_rd_rwlock },
 487         VOPNAME_SPACE,          { .femop_space = deleg_rd_space },
 488         VOPNAME_SETSECATTR,     { .femop_setsecattr = deleg_rd_setsecattr },
 489         VOPNAME_VNEVENT,        { .femop_vnevent = deleg_rd_vnevent },
 490         NULL,                   NULL
 491 };
 492 static const fs_operation_def_t nfs4_wr_deleg_tmpl[] = {
 493         VOPNAME_OPEN,           { .femop_open = deleg_wr_open },
 494         VOPNAME_READ,           { .femop_read = deleg_wr_read },
 495         VOPNAME_WRITE,          { .femop_write = deleg_wr_write },
 496         VOPNAME_SETATTR,        { .femop_setattr = deleg_wr_setattr },
 497         VOPNAME_RWLOCK,         { .femop_rwlock = deleg_wr_rwlock },
 498         VOPNAME_SPACE,          { .femop_space = deleg_wr_space },
 499         VOPNAME_SETSECATTR,     { .femop_setsecattr = deleg_wr_setsecattr },
 500         VOPNAME_VNEVENT,        { .femop_vnevent = deleg_wr_vnevent },
 501         NULL,                   NULL
 502 };
 503 
 504 /* ARGSUSED */
 505 static void *
 506 rfs4_zone_init(zoneid_t zoneid)
 507 {
 508         nfs4_srv_t *nsrv4;
 509         timespec32_t verf;
 510 
 511         nsrv4 = kmem_zalloc(sizeof (*nsrv4), KM_SLEEP);
 512 
 513         /*
 514          * The following algorithm attempts to find a unique verifier
 515          * to be used as the write verifier returned from the server
 516          * to the client.  It is important that this verifier change
 517          * whenever the server reboots.  Of secondary importance, it
 518          * is important for the verifier to be unique between two
 519          * different servers.
 520          *
 521          * Thus, an attempt is made to use the system hostid and the
 522          * current time in seconds when the nfssrv kernel module is
 523          * loaded.  It is assumed that an NFS server will not be able
 524          * to boot and then to reboot in less than a second.  If the
 525          * hostid has not been set, then the current high resolution
 526          * time is used.  This will ensure different verifiers each
 527          * time the server reboots and minimize the chances that two
 528          * different servers will have the same verifier.
 529          * XXX - this is broken on LP64 kernels.
 530          */
 531         verf.tv_sec = (time_t)zone_get_hostid(NULL);
 532         if (verf.tv_sec != 0) {
 533                 verf.tv_nsec = gethrestime_sec();
 534         } else {
 535                 timespec_t tverf;
 536 
 537                 gethrestime(&tverf);
 538                 verf.tv_sec = (time_t)tverf.tv_sec;
 539                 verf.tv_nsec = tverf.tv_nsec;
 540         }
 541         nsrv4->write4verf = *(uint64_t *)&verf;
 542 
 543         /* Used to manage create/destroy of server state */
 544         nsrv4->nfs4_server_state = NULL;
 545         nsrv4->nfs4_cur_servinst = NULL;
 546         nsrv4->nfs4_deleg_policy = SRV_NEVER_DELEGATE;
 547         mutex_init(&nsrv4->deleg_lock, NULL, MUTEX_DEFAULT, NULL);
 548         mutex_init(&nsrv4->state_lock, NULL, MUTEX_DEFAULT, NULL);
 549         mutex_init(&nsrv4->servinst_lock, NULL, MUTEX_DEFAULT, NULL);
 550         rw_init(&nsrv4->deleg_policy_lock, NULL, RW_DEFAULT, NULL);
 551 
 552         return (nsrv4);
 553 }
 554 
 555 /* ARGSUSED */
 556 static void
 557 rfs4_zone_fini(zoneid_t zoneid, void *data)
 558 {
 559         nfs4_srv_t *nsrv4 = data;
 560 
 561         mutex_destroy(&nsrv4->deleg_lock);
 562         mutex_destroy(&nsrv4->state_lock);
 563         mutex_destroy(&nsrv4->servinst_lock);
 564         rw_destroy(&nsrv4->deleg_policy_lock);
 565 
 566         kmem_free(nsrv4, sizeof (*nsrv4));
 567 }
 568 
 569 void
 570 rfs4_srvrinit(void)
 571 {
 572         extern void rfs4_attr_init();
 573 
 574         zone_key_create(&rfs4_zone_key, rfs4_zone_init, NULL, rfs4_zone_fini);
 575 
 576         rfs4_attr_init();
 577 
 578 
 579         if (fem_create("deleg_rdops", nfs4_rd_deleg_tmpl, &deleg_rdops) != 0) {
 580                 rfs4_disable_delegation();
 581         } else if (fem_create("deleg_wrops", nfs4_wr_deleg_tmpl,
 582             &deleg_wrops) != 0) {
 583                 rfs4_disable_delegation();
 584                 fem_free(deleg_rdops);
 585         }
 586 
 587         nfs4_srv_caller_id = fs_new_caller_id();
 588         lockt_sysid = lm_alloc_sysidt();
 589         vsd_create(&nfs4_srv_vkey, NULL);
 590         rfs4_state_g_init();
 591 }
 592 
 593 void
 594 rfs4_srvrfini(void)
 595 {
 596         if (lockt_sysid != LM_NOSYSID) {
 597                 lm_free_sysidt(lockt_sysid);
 598                 lockt_sysid = LM_NOSYSID;
 599         }
 600 
 601         rfs4_state_g_fini();
 602 
 603         fem_free(deleg_rdops);
 604         fem_free(deleg_wrops);
 605 
 606         (void) zone_key_delete(rfs4_zone_key);
 607 }
 608 
 609 void
 610 rfs4_do_server_start(int server_upordown,
 611     int srv_delegation, int cluster_booted)
 612 {
 613         nfs4_srv_t *nsrv4 = zone_getspecific(rfs4_zone_key, curzone);
 614 
 615         /* Is this a warm start? */
 616         if (server_upordown == NFS_SERVER_QUIESCED) {
 617                 cmn_err(CE_NOTE, "nfs4_srv: "
 618                     "server was previously quiesced; "
 619                     "existing NFSv4 state will be re-used");
 620 
 621                 /*
 622                  * HA-NFSv4: this is also the signal
 623                  * that a Resource Group failover has
 624                  * occurred.
 625                  */
 626                 if (cluster_booted)
 627                         hanfsv4_failover(nsrv4);
 628         } else {
 629                 /* Cold start */
 630                 nsrv4->rfs4_start_time = 0;
 631                 rfs4_state_zone_init(nsrv4);
 632                 nsrv4->nfs4_drc = rfs4_init_drc(nfs4_drc_max,
 633                     nfs4_drc_hash);
 634 
 635                 /*
 636                  * The nfsd service was started with the -s option
 637                  * we need to pull in any state from the paths indicated.
 638                  */
 639                 if (curzone == global_zone && rfs4_dss_numnewpaths > 0) {
 640                         /* read in the stable storage state from these paths */
 641                         rfs4_dss_readstate(nsrv4, rfs4_dss_numnewpaths,
 642                             rfs4_dss_newpaths);
 643                 }
 644         }
 645 
 646         /* Check if delegation is to be enabled */
 647         if (srv_delegation != FALSE)
 648                 rfs4_set_deleg_policy(nsrv4, SRV_NORMAL_DELEGATE);
 649 }
 650 
 651 void
 652 rfs4_init_compound_state(struct compound_state *cs)
 653 {
 654         bzero(cs, sizeof (*cs));
 655         cs->cont = TRUE;
 656         cs->access = CS_ACCESS_DENIED;
 657         cs->deleg = FALSE;
 658         cs->mandlock = FALSE;
 659         cs->fh.nfs_fh4_val = cs->fhbuf;
 660 }
 661 
 662 void
 663 rfs4_grace_start(rfs4_servinst_t *sip)
 664 {
 665         rw_enter(&sip->rwlock, RW_WRITER);
 666         sip->start_time = (time_t)TICK_TO_SEC(ddi_get_lbolt());
 667         sip->grace_period = rfs4_grace_period;
 668         rw_exit(&sip->rwlock);
 669 }
 670 
 671 /*
 672  * returns true if the instance's grace period has never been started
 673  */
 674 int
 675 rfs4_servinst_grace_new(rfs4_servinst_t *sip)
 676 {
 677         time_t start_time;
 678 
 679         rw_enter(&sip->rwlock, RW_READER);
 680         start_time = sip->start_time;
 681         rw_exit(&sip->rwlock);
 682 
 683         return (start_time == 0);
 684 }
 685 
 686 /*
 687  * Indicates if server instance is within the
 688  * grace period.
 689  */
 690 int
 691 rfs4_servinst_in_grace(rfs4_servinst_t *sip)
 692 {
 693         time_t grace_expiry;
 694 
 695         rw_enter(&sip->rwlock, RW_READER);
 696         grace_expiry = sip->start_time + sip->grace_period;
 697         rw_exit(&sip->rwlock);
 698 
 699         return (((time_t)TICK_TO_SEC(ddi_get_lbolt())) < grace_expiry);
 700 }
 701 
 702 int
 703 rfs4_clnt_in_grace(rfs4_client_t *cp)
 704 {
 705         ASSERT(rfs4_dbe_refcnt(cp->rc_dbe) > 0);
 706 
 707         return (rfs4_servinst_in_grace(cp->rc_server_instance));
 708 }
 709 
 710 /*
 711  * reset all currently active grace periods
 712  */
 713 void
 714 rfs4_grace_reset_all(nfs4_srv_t *nsrv4)
 715 {
 716         rfs4_servinst_t *sip;
 717 
 718         mutex_enter(&nsrv4->servinst_lock);
 719         for (sip = nsrv4->nfs4_cur_servinst; sip != NULL; sip = sip->prev)
 720                 if (rfs4_servinst_in_grace(sip))
 721                         rfs4_grace_start(sip);
 722         mutex_exit(&nsrv4->servinst_lock);
 723 }
 724 
 725 /*
 726  * start any new instances' grace periods
 727  */
 728 void
 729 rfs4_grace_start_new(nfs4_srv_t *nsrv4)
 730 {
 731         rfs4_servinst_t *sip;
 732 
 733         mutex_enter(&nsrv4->servinst_lock);
 734         for (sip = nsrv4->nfs4_cur_servinst; sip != NULL; sip = sip->prev)
 735                 if (rfs4_servinst_grace_new(sip))
 736                         rfs4_grace_start(sip);
 737         mutex_exit(&nsrv4->servinst_lock);
 738 }
 739 
 740 static rfs4_dss_path_t *
 741 rfs4_dss_newpath(nfs4_srv_t *nsrv4, rfs4_servinst_t *sip,
 742     char *path, unsigned index)
 743 {
 744         size_t len;
 745         rfs4_dss_path_t *dss_path;
 746 
 747         dss_path = kmem_alloc(sizeof (rfs4_dss_path_t), KM_SLEEP);
 748 
 749         /*
 750          * Take a copy of the string, since the original may be overwritten.
 751          * Sadly, no strdup() in the kernel.
 752          */
 753         /* allow for NUL */
 754         len = strlen(path) + 1;
 755         dss_path->path = kmem_alloc(len, KM_SLEEP);
 756         (void) strlcpy(dss_path->path, path, len);
 757 
 758         /* associate with servinst */
 759         dss_path->sip = sip;
 760         dss_path->index = index;
 761 
 762         /*
 763          * Add to list of served paths.
 764          * No locking required, as we're only ever called at startup.
 765          */
 766         if (nsrv4->dss_pathlist == NULL) {
 767                 /* this is the first dss_path_t */
 768 
 769                 /* needed for insque/remque */
 770                 dss_path->next = dss_path->prev = dss_path;
 771 
 772                 nsrv4->dss_pathlist = dss_path;
 773         } else {
 774                 insque(dss_path, nsrv4->dss_pathlist);
 775         }
 776 
 777         return (dss_path);
 778 }
 779 
 780 /*
 781  * Create a new server instance, and make it the currently active instance.
 782  * Note that starting the grace period too early will reduce the clients'
 783  * recovery window.
 784  */
 785 void
 786 rfs4_servinst_create(nfs4_srv_t *nsrv4, int start_grace,
 787     int dss_npaths, char **dss_paths)
 788 {
 789         unsigned i;
 790         rfs4_servinst_t *sip;
 791         rfs4_oldstate_t *oldstate;
 792 
 793         sip = kmem_alloc(sizeof (rfs4_servinst_t), KM_SLEEP);
 794         rw_init(&sip->rwlock, NULL, RW_DEFAULT, NULL);
 795 
 796         sip->start_time = (time_t)0;
 797         sip->grace_period = (time_t)0;
 798         sip->next = NULL;
 799         sip->prev = NULL;
 800 
 801         rw_init(&sip->oldstate_lock, NULL, RW_DEFAULT, NULL);
 802         /*
 803          * This initial dummy entry is required to setup for insque/remque.
 804          * It must be skipped over whenever the list is traversed.
 805          */
 806         oldstate = kmem_alloc(sizeof (rfs4_oldstate_t), KM_SLEEP);
 807         /* insque/remque require initial list entry to be self-terminated */
 808         oldstate->next = oldstate;
 809         oldstate->prev = oldstate;
 810         sip->oldstate = oldstate;
 811 
 812 
 813         sip->dss_npaths = dss_npaths;
 814         sip->dss_paths = kmem_alloc(dss_npaths *
 815             sizeof (rfs4_dss_path_t *), KM_SLEEP);
 816 
 817         for (i = 0; i < dss_npaths; i++) {
 818                 /* CSTYLED */
 819                 sip->dss_paths[i] = rfs4_dss_newpath(nsrv4, sip, dss_paths[i], i);
 820         }
 821 
 822         mutex_enter(&nsrv4->servinst_lock);
 823         if (nsrv4->nfs4_cur_servinst != NULL) {
 824                 /* add to linked list */
 825                 sip->prev = nsrv4->nfs4_cur_servinst;
 826                 nsrv4->nfs4_cur_servinst->next = sip;
 827         }
 828         if (start_grace)
 829                 rfs4_grace_start(sip);
 830         /* make the new instance "current" */
 831         nsrv4->nfs4_cur_servinst = sip;
 832 
 833         mutex_exit(&nsrv4->servinst_lock);
 834 }
 835 
 836 /*
 837  * In future, we might add a rfs4_servinst_destroy(sip) but, for now, destroy
 838  * all instances directly.
 839  */
 840 void
 841 rfs4_servinst_destroy_all(nfs4_srv_t *nsrv4)
 842 {
 843         rfs4_servinst_t *sip, *prev, *current;
 844 #ifdef DEBUG
 845         int n = 0;
 846 #endif
 847 
 848         mutex_enter(&nsrv4->servinst_lock);
 849         ASSERT(nsrv4->nfs4_cur_servinst != NULL);
 850         current = nsrv4->nfs4_cur_servinst;
 851         nsrv4->nfs4_cur_servinst = NULL;
 852         for (sip = current; sip != NULL; sip = prev) {
 853                 prev = sip->prev;
 854                 rw_destroy(&sip->rwlock);
 855                 if (sip->oldstate)
 856                         kmem_free(sip->oldstate, sizeof (rfs4_oldstate_t));
 857                 if (sip->dss_paths)
 858                         kmem_free(sip->dss_paths,
 859                             sip->dss_npaths * sizeof (rfs4_dss_path_t *));
 860                 kmem_free(sip, sizeof (rfs4_servinst_t));
 861 #ifdef DEBUG
 862                 n++;
 863 #endif
 864         }
 865         mutex_exit(&nsrv4->servinst_lock);
 866 }
 867 
 868 /*
 869  * Assign the current server instance to a client_t.
 870  * Should be called with cp->rc_dbe held.
 871  */
 872 void
 873 rfs4_servinst_assign(nfs4_srv_t *nsrv4, rfs4_client_t *cp,
 874     rfs4_servinst_t *sip)
 875 {
 876         ASSERT(rfs4_dbe_refcnt(cp->rc_dbe) > 0);
 877 
 878         /*
 879          * The lock ensures that if the current instance is in the process
 880          * of changing, we will see the new one.
 881          */
 882         mutex_enter(&nsrv4->servinst_lock);
 883         cp->rc_server_instance = sip;
 884         mutex_exit(&nsrv4->servinst_lock);
 885 }
 886 
 887 rfs4_servinst_t *
 888 rfs4_servinst(rfs4_client_t *cp)
 889 {
 890         ASSERT(rfs4_dbe_refcnt(cp->rc_dbe) > 0);
 891 
 892         return (cp->rc_server_instance);
 893 }
 894 
 895 /* ARGSUSED */
 896 static void
 897 nullfree(caddr_t resop)
 898 {
 899 }
 900 
 901 /*
 902  * This is a fall-through for invalid or not implemented (yet) ops
 903  */
 904 /* ARGSUSED */
 905 static void
 906 rfs4_op_inval(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
 907     struct compound_state *cs)
 908 {
 909         *cs->statusp = *((nfsstat4 *)&(resop)->nfs_resop4_u) = NFS4ERR_INVAL;
 910 }
 911 
 912 /*
 913  * Check if the security flavor, nfsnum, is in the flavor_list.
 914  */
 915 bool_t
 916 in_flavor_list(int nfsnum, int *flavor_list, int count)
 917 {
 918         int i;
 919 
 920         for (i = 0; i < count; i++) {
 921                 if (nfsnum == flavor_list[i])
 922                         return (TRUE);
 923         }
 924         return (FALSE);
 925 }
 926 
 927 /*
 928  * Used by rfs4_op_secinfo to get the security information from the
 929  * export structure associated with the component.
 930  */
 931 /* ARGSUSED */
 932 static nfsstat4
 933 do_rfs4_op_secinfo(struct compound_state *cs, char *nm, SECINFO4res *resp)
 934 {
 935         int error, different_export = 0;
 936         vnode_t *dvp, *vp;
 937         struct exportinfo *exi = NULL;
 938         fid_t fid;
 939         uint_t count, i;
 940         secinfo4 *resok_val;
 941         struct secinfo *secp;
 942         seconfig_t *si;
 943         bool_t did_traverse = FALSE;
 944         int dotdot, walk;
 945         nfs_export_t *ne = nfs_get_export();
 946 
 947         dvp = cs->vp;
 948         dotdot = (nm[0] == '.' && nm[1] == '.' && nm[2] == '\0');
 949 
 950         /*
 951          * If dotdotting, then need to check whether it's above the
 952          * root of a filesystem, or above an export point.
 953          */
 954         if (dotdot) {
 955 
 956                 /*
 957                  * If dotdotting at the root of a filesystem, then
 958                  * need to traverse back to the mounted-on filesystem
 959                  * and do the dotdot lookup there.
 960                  */
 961                 if ((cs->vp->v_flag & VROOT) || VN_IS_CURZONEROOT(cs->vp)) {
 962 
 963                         /*
 964                          * If at the system root, then can
 965                          * go up no further.
 966                          */
 967                         if (VN_CMP(dvp, ZONE_ROOTVP()))
 968                                 return (puterrno4(ENOENT));
 969 
 970                         /*
 971                          * Traverse back to the mounted-on filesystem
 972                          */
 973                         dvp = untraverse(cs->vp);
 974 
 975                         /*
 976                          * Set the different_export flag so we remember
 977                          * to pick up a new exportinfo entry for
 978                          * this new filesystem.
 979                          */
 980                         different_export = 1;
 981                 } else {
 982 
 983                         /*
 984                          * If dotdotting above an export point then set
 985                          * the different_export to get new export info.
 986                          */
 987                         different_export = nfs_exported(cs->exi, cs->vp);
 988                 }
 989         }
 990 
 991         /*
 992          * Get the vnode for the component "nm".
 993          */
 994         error = VOP_LOOKUP(dvp, nm, &vp, NULL, 0, NULL, cs->cr,
 995             NULL, NULL, NULL);
 996         if (error)
 997                 return (puterrno4(error));
 998 
 999         /*
1000          * If the vnode is in a pseudo filesystem, or if the security flavor
1001          * used in the request is valid but not an explicitly shared flavor,
1002          * or the access bit indicates that this is a limited access,
1003          * check whether this vnode is visible.
1004          */
1005         if (!different_export &&
1006             (PSEUDO(cs->exi) || ! is_exported_sec(cs->nfsflavor, cs->exi) ||
1007             cs->access & CS_ACCESS_LIMITED)) {
1008                 if (! nfs_visible(cs->exi, vp, &different_export)) {
1009                         VN_RELE(vp);
1010                         return (puterrno4(ENOENT));
1011                 }
1012         }
1013 
1014         /*
1015          * If it's a mountpoint, then traverse it.
1016          */
1017         if (vn_ismntpt(vp)) {
1018                 if ((error = traverse(&vp)) != 0) {
1019                         VN_RELE(vp);
1020                         return (puterrno4(error));
1021                 }
1022                 /* remember that we had to traverse mountpoint */
1023                 did_traverse = TRUE;
1024                 different_export = 1;
1025         } else if (vp->v_vfsp != dvp->v_vfsp) {
1026                 /*
1027                  * If vp isn't a mountpoint and the vfs ptrs aren't the same,
1028                  * then vp is probably an LOFS object.  We don't need the
1029                  * realvp, we just need to know that we might have crossed
1030                  * a server fs boundary and need to call checkexport4.
1031                  * (LOFS lookup hides server fs mountpoints, and actually calls
1032                  * traverse)
1033                  */
1034                 different_export = 1;
1035         }
1036 
1037         /*
1038          * Get the export information for it.
1039          */
1040         if (different_export) {
1041 
1042                 bzero(&fid, sizeof (fid));
1043                 fid.fid_len = MAXFIDSZ;
1044                 error = vop_fid_pseudo(vp, &fid);
1045                 if (error) {
1046                         VN_RELE(vp);
1047                         return (puterrno4(error));
1048                 }
1049 
1050                 if (dotdot)
1051                         exi = nfs_vptoexi(NULL, vp, cs->cr, &walk, NULL, TRUE);
1052                 else
1053                         exi = checkexport4(&vp->v_vfsp->vfs_fsid, &fid, vp);
1054 
1055                 if (exi == NULL) {
1056                         if (did_traverse == TRUE) {
1057                                 /*
1058                                  * If this vnode is a mounted-on vnode,
1059                                  * but the mounted-on file system is not
1060                                  * exported, send back the secinfo for
1061                                  * the exported node that the mounted-on
1062                                  * vnode lives in.
1063                                  */
1064                                 exi = cs->exi;
1065                         } else {
1066                                 VN_RELE(vp);
1067                                 return (puterrno4(EACCES));
1068                         }
1069                 }
1070         } else {
1071                 exi = cs->exi;
1072         }
1073         ASSERT(exi != NULL);
1074 
1075 
1076         /*
1077          * Create the secinfo result based on the security information
1078          * from the exportinfo structure (exi).
1079          *
1080          * Return all flavors for a pseudo node.
1081          * For a real export node, return the flavor that the client
1082          * has access with.
1083          */
1084         ASSERT(RW_LOCK_HELD(&ne->exported_lock));
1085         if (PSEUDO(exi)) {
1086                 count = exi->exi_export.ex_seccnt; /* total sec count */
1087                 resok_val = kmem_alloc(count * sizeof (secinfo4), KM_SLEEP);
1088                 secp = exi->exi_export.ex_secinfo;
1089 
1090                 for (i = 0; i < count; i++) {
1091                         si = &secp[i].s_secinfo;
1092                         resok_val[i].flavor = si->sc_rpcnum;
1093                         if (resok_val[i].flavor == RPCSEC_GSS) {
1094                                 rpcsec_gss_info *info;
1095 
1096                                 info = &resok_val[i].flavor_info;
1097                                 info->qop = si->sc_qop;
1098                                 info->service = (rpc_gss_svc_t)si->sc_service;
1099 
1100                                 /* get oid opaque data */
1101                                 info->oid.sec_oid4_len =
1102                                     si->sc_gss_mech_type->length;
1103                                 info->oid.sec_oid4_val = kmem_alloc(
1104                                     si->sc_gss_mech_type->length, KM_SLEEP);
1105                                 bcopy(
1106                                     si->sc_gss_mech_type->elements,
1107                                     info->oid.sec_oid4_val,
1108                                     info->oid.sec_oid4_len);
1109                         }
1110                 }
1111                 resp->SECINFO4resok_len = count;
1112                 resp->SECINFO4resok_val = resok_val;
1113         } else {
1114                 int ret_cnt = 0, k = 0;
1115                 int *flavor_list;
1116 
1117                 count = exi->exi_export.ex_seccnt; /* total sec count */
1118                 secp = exi->exi_export.ex_secinfo;
1119 
1120                 flavor_list = kmem_alloc(count * sizeof (int), KM_SLEEP);
1121                 /* find out which flavors to return */
1122                 for (i = 0; i < count; i ++) {
1123                         int access, flavor, perm;
1124 
1125                         flavor = secp[i].s_secinfo.sc_nfsnum;
1126                         perm = secp[i].s_flags;
1127 
1128                         access = nfsauth4_secinfo_access(exi, cs->req,
1129                             flavor, perm, cs->basecr);
1130 
1131                         if (! (access & NFSAUTH_DENIED) &&
1132                             ! (access & NFSAUTH_WRONGSEC)) {
1133                                 flavor_list[ret_cnt] = flavor;
1134                                 ret_cnt++;
1135                         }
1136                 }
1137 
1138                 /* Create the returning SECINFO value */
1139                 resok_val = kmem_alloc(ret_cnt * sizeof (secinfo4), KM_SLEEP);
1140 
1141                 for (i = 0; i < count; i++) {
1142                         /*
1143                          * If the flavor is in the flavor list,
1144                          * fill in resok_val.
1145                          */
1146                         si = &secp[i].s_secinfo;
1147                         if (in_flavor_list(si->sc_nfsnum,
1148                             flavor_list, ret_cnt)) {
1149                                 resok_val[k].flavor = si->sc_rpcnum;
1150                                 if (resok_val[k].flavor == RPCSEC_GSS) {
1151                                         rpcsec_gss_info *info;
1152 
1153                                         info = &resok_val[k].flavor_info;
1154                                         info->qop = si->sc_qop;
1155                                         info->service = (rpc_gss_svc_t)
1156                                             si->sc_service;
1157 
1158                                         /* get oid opaque data */
1159                                         info->oid.sec_oid4_len =
1160                                             si->sc_gss_mech_type->length;
1161                                         info->oid.sec_oid4_val = kmem_alloc(
1162                                             si->sc_gss_mech_type->length,
1163                                             KM_SLEEP);
1164                                         bcopy(si->sc_gss_mech_type->elements,
1165                                             info->oid.sec_oid4_val,
1166                                             info->oid.sec_oid4_len);
1167                                 }
1168                                 k++;
1169                         }
1170                         if (k >= ret_cnt)
1171                                 break;
1172                 }
1173                 resp->SECINFO4resok_len = ret_cnt;
1174                 resp->SECINFO4resok_val = resok_val;
1175                 kmem_free(flavor_list, count * sizeof (int));
1176         }
1177 
1178         VN_RELE(vp);
1179         return (NFS4_OK);
1180 }
1181 
1182 /*
1183  * SECINFO (Operation 33): Obtain required security information on
1184  * the component name in the format of (security-mechanism-oid, qop, service)
1185  * triplets.
1186  */
1187 /* ARGSUSED */
1188 static void
1189 rfs4_op_secinfo(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1190     struct compound_state *cs)
1191 {
1192         SECINFO4args *args = &argop->nfs_argop4_u.opsecinfo;
1193         SECINFO4res *resp = &resop->nfs_resop4_u.opsecinfo;
1194         utf8string *utfnm = &args->name;
1195         uint_t len;
1196         char *nm;
1197         struct sockaddr *ca;
1198         char *name = NULL;
1199         nfsstat4 status = NFS4_OK;
1200 
1201         DTRACE_NFSV4_2(op__secinfo__start, struct compound_state *, cs,
1202             SECINFO4args *, args);
1203 
1204         /*
1205          * Current file handle (cfh) should have been set before getting
1206          * into this function. If not, return error.
1207          */
1208         if (cs->vp == NULL) {
1209                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1210                 goto out;
1211         }
1212 
1213         if (cs->vp->v_type != VDIR) {
1214                 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
1215                 goto out;
1216         }
1217 
1218         /*
1219          * Verify the component name. If failed, error out, but
1220          * do not error out if the component name is a "..".
1221          * SECINFO will return its parents secinfo data for SECINFO "..".
1222          */
1223         status = utf8_dir_verify(utfnm);
1224         if (status != NFS4_OK) {
1225                 if (utfnm->utf8string_len != 2 ||
1226                     utfnm->utf8string_val[0] != '.' ||
1227                     utfnm->utf8string_val[1] != '.') {
1228                         *cs->statusp = resp->status = status;
1229                         goto out;
1230                 }
1231         }
1232 
1233         nm = utf8_to_str(utfnm, &len, NULL);
1234         if (nm == NULL) {
1235                 *cs->statusp = resp->status = NFS4ERR_INVAL;
1236                 goto out;
1237         }
1238 
1239         if (len > MAXNAMELEN) {
1240                 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
1241                 kmem_free(nm, len);
1242                 goto out;
1243         }
1244 
1245         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
1246         name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
1247             MAXPATHLEN  + 1);
1248 
1249         if (name == NULL) {
1250                 *cs->statusp = resp->status = NFS4ERR_INVAL;
1251                 kmem_free(nm, len);
1252                 goto out;
1253         }
1254 
1255 
1256         *cs->statusp = resp->status = do_rfs4_op_secinfo(cs, name, resp);
1257 
1258         if (name != nm)
1259                 kmem_free(name, MAXPATHLEN + 1);
1260         kmem_free(nm, len);
1261 
1262 out:
1263         DTRACE_NFSV4_2(op__secinfo__done, struct compound_state *, cs,
1264             SECINFO4res *, resp);
1265 }
1266 
1267 /*
1268  * Free SECINFO result.
1269  */
1270 /* ARGSUSED */
1271 static void
1272 rfs4_op_secinfo_free(nfs_resop4 *resop)
1273 {
1274         SECINFO4res *resp = &resop->nfs_resop4_u.opsecinfo;
1275         int count, i;
1276         secinfo4 *resok_val;
1277 
1278         /* If this is not an Ok result, nothing to free. */
1279         if (resp->status != NFS4_OK) {
1280                 return;
1281         }
1282 
1283         count = resp->SECINFO4resok_len;
1284         resok_val = resp->SECINFO4resok_val;
1285 
1286         for (i = 0; i < count; i++) {
1287                 if (resok_val[i].flavor == RPCSEC_GSS) {
1288                         rpcsec_gss_info *info;
1289 
1290                         info = &resok_val[i].flavor_info;
1291                         kmem_free(info->oid.sec_oid4_val,
1292                             info->oid.sec_oid4_len);
1293                 }
1294         }
1295         kmem_free(resok_val, count * sizeof (secinfo4));
1296         resp->SECINFO4resok_len = 0;
1297         resp->SECINFO4resok_val = NULL;
1298 }
1299 
1300 /* ARGSUSED */
1301 static void
1302 rfs4_op_access(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1303     struct compound_state *cs)
1304 {
1305         ACCESS4args *args = &argop->nfs_argop4_u.opaccess;
1306         ACCESS4res *resp = &resop->nfs_resop4_u.opaccess;
1307         int error;
1308         vnode_t *vp;
1309         struct vattr va;
1310         int checkwriteperm;
1311         cred_t *cr = cs->cr;
1312         bslabel_t *clabel, *slabel;
1313         ts_label_t *tslabel;
1314         boolean_t admin_low_client;
1315 
1316         DTRACE_NFSV4_2(op__access__start, struct compound_state *, cs,
1317             ACCESS4args *, args);
1318 
1319 #if 0   /* XXX allow access even if !cs->access. Eventually only pseudo fs */
1320         if (cs->access == CS_ACCESS_DENIED) {
1321                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
1322                 goto out;
1323         }
1324 #endif
1325         if (cs->vp == NULL) {
1326                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1327                 goto out;
1328         }
1329 
1330         ASSERT(cr != NULL);
1331 
1332         vp = cs->vp;
1333 
1334         /*
1335          * If the file system is exported read only, it is not appropriate
1336          * to check write permissions for regular files and directories.
1337          * Special files are interpreted by the client, so the underlying
1338          * permissions are sent back to the client for interpretation.
1339          */
1340         if (rdonly4(req, cs) &&
1341             (vp->v_type == VREG || vp->v_type == VDIR))
1342                 checkwriteperm = 0;
1343         else
1344                 checkwriteperm = 1;
1345 
1346         /*
1347          * XXX
1348          * We need the mode so that we can correctly determine access
1349          * permissions relative to a mandatory lock file.  Access to
1350          * mandatory lock files is denied on the server, so it might
1351          * as well be reflected to the server during the open.
1352          */
1353         va.va_mask = AT_MODE;
1354         error = VOP_GETATTR(vp, &va, 0, cr, NULL);
1355         if (error) {
1356                 *cs->statusp = resp->status = puterrno4(error);
1357                 goto out;
1358         }
1359         resp->access = 0;
1360         resp->supported = 0;
1361 
1362         if (is_system_labeled()) {
1363                 ASSERT(req->rq_label != NULL);
1364                 clabel = req->rq_label;
1365                 DTRACE_PROBE2(tx__rfs4__log__info__opaccess__clabel, char *,
1366                     "got client label from request(1)",
1367                     struct svc_req *, req);
1368                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
1369                         if ((tslabel = nfs_getflabel(vp, cs->exi)) == NULL) {
1370                                 *cs->statusp = resp->status = puterrno4(EACCES);
1371                                 goto out;
1372                         }
1373                         slabel = label2bslabel(tslabel);
1374                         DTRACE_PROBE3(tx__rfs4__log__info__opaccess__slabel,
1375                             char *, "got server label(1) for vp(2)",
1376                             bslabel_t *, slabel, vnode_t *, vp);
1377 
1378                         admin_low_client = B_FALSE;
1379                 } else
1380                         admin_low_client = B_TRUE;
1381         }
1382 
1383         if (args->access & ACCESS4_READ) {
1384                 error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
1385                 if (!error && !MANDLOCK(vp, va.va_mode) &&
1386                     (!is_system_labeled() || admin_low_client ||
1387                     bldominates(clabel, slabel)))
1388                         resp->access |= ACCESS4_READ;
1389                 resp->supported |= ACCESS4_READ;
1390         }
1391         if ((args->access & ACCESS4_LOOKUP) && vp->v_type == VDIR) {
1392                 error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL);
1393                 if (!error && (!is_system_labeled() || admin_low_client ||
1394                     bldominates(clabel, slabel)))
1395                         resp->access |= ACCESS4_LOOKUP;
1396                 resp->supported |= ACCESS4_LOOKUP;
1397         }
1398         if (checkwriteperm &&
1399             (args->access & (ACCESS4_MODIFY|ACCESS4_EXTEND))) {
1400                 error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
1401                 if (!error && !MANDLOCK(vp, va.va_mode) &&
1402                     (!is_system_labeled() || admin_low_client ||
1403                     blequal(clabel, slabel)))
1404                         resp->access |=
1405                             (args->access & (ACCESS4_MODIFY | ACCESS4_EXTEND));
1406                 resp->supported |=
1407                     resp->access & (ACCESS4_MODIFY | ACCESS4_EXTEND);
1408         }
1409 
1410         if (checkwriteperm &&
1411             (args->access & ACCESS4_DELETE) && vp->v_type == VDIR) {
1412                 error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
1413                 if (!error && (!is_system_labeled() || admin_low_client ||
1414                     blequal(clabel, slabel)))
1415                         resp->access |= ACCESS4_DELETE;
1416                 resp->supported |= ACCESS4_DELETE;
1417         }
1418         if (args->access & ACCESS4_EXECUTE && vp->v_type != VDIR) {
1419                 error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL);
1420                 if (!error && !MANDLOCK(vp, va.va_mode) &&
1421                     (!is_system_labeled() || admin_low_client ||
1422                     bldominates(clabel, slabel)))
1423                         resp->access |= ACCESS4_EXECUTE;
1424                 resp->supported |= ACCESS4_EXECUTE;
1425         }
1426 
1427         if (is_system_labeled() && !admin_low_client)
1428                 label_rele(tslabel);
1429 
1430         *cs->statusp = resp->status = NFS4_OK;
1431 out:
1432         DTRACE_NFSV4_2(op__access__done, struct compound_state *, cs,
1433             ACCESS4res *, resp);
1434 }
1435 
1436 /* ARGSUSED */
1437 static void
1438 rfs4_op_commit(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1439     struct compound_state *cs)
1440 {
1441         COMMIT4args *args = &argop->nfs_argop4_u.opcommit;
1442         COMMIT4res *resp = &resop->nfs_resop4_u.opcommit;
1443         int error;
1444         vnode_t *vp = cs->vp;
1445         cred_t *cr = cs->cr;
1446         vattr_t va;
1447         nfs4_srv_t *nsrv4;
1448 
1449         DTRACE_NFSV4_2(op__commit__start, struct compound_state *, cs,
1450             COMMIT4args *, args);
1451 
1452         if (vp == NULL) {
1453                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1454                 goto out;
1455         }
1456         if (cs->access == CS_ACCESS_DENIED) {
1457                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
1458                 goto out;
1459         }
1460 
1461         if (args->offset + args->count < args->offset) {
1462                 *cs->statusp = resp->status = NFS4ERR_INVAL;
1463                 goto out;
1464         }
1465 
1466         va.va_mask = AT_UID;
1467         error = VOP_GETATTR(vp, &va, 0, cr, NULL);
1468 
1469         /*
1470          * If we can't get the attributes, then we can't do the
1471          * right access checking.  So, we'll fail the request.
1472          */
1473         if (error) {
1474                 *cs->statusp = resp->status = puterrno4(error);
1475                 goto out;
1476         }
1477         if (rdonly4(req, cs)) {
1478                 *cs->statusp = resp->status = NFS4ERR_ROFS;
1479                 goto out;
1480         }
1481 
1482         if (vp->v_type != VREG) {
1483                 if (vp->v_type == VDIR)
1484                         resp->status = NFS4ERR_ISDIR;
1485                 else
1486                         resp->status = NFS4ERR_INVAL;
1487                 *cs->statusp = resp->status;
1488                 goto out;
1489         }
1490 
1491         if (crgetuid(cr) != va.va_uid &&
1492             (error = VOP_ACCESS(vp, VWRITE, 0, cs->cr, NULL))) {
1493                 *cs->statusp = resp->status = puterrno4(error);
1494                 goto out;
1495         }
1496 
1497         error = VOP_FSYNC(vp, FSYNC, cr, NULL);
1498 
1499         if (error) {
1500                 *cs->statusp = resp->status = puterrno4(error);
1501                 goto out;
1502         }
1503 
1504         nsrv4 = zone_getspecific(rfs4_zone_key, curzone);
1505         *cs->statusp = resp->status = NFS4_OK;
1506         resp->writeverf = nsrv4->write4verf;
1507 out:
1508         DTRACE_NFSV4_2(op__commit__done, struct compound_state *, cs,
1509             COMMIT4res *, resp);
1510 }
1511 
1512 /*
1513  * rfs4_op_mknod is called from rfs4_op_create after all initial verification
1514  * was completed. It does the nfsv4 create for special files.
1515  */
1516 /* ARGSUSED */
1517 static vnode_t *
1518 do_rfs4_op_mknod(CREATE4args *args, CREATE4res *resp, struct svc_req *req,
1519     struct compound_state *cs, vattr_t *vap, char *nm)
1520 {
1521         int error;
1522         cred_t *cr = cs->cr;
1523         vnode_t *dvp = cs->vp;
1524         vnode_t *vp = NULL;
1525         int mode;
1526         enum vcexcl excl;
1527 
1528         switch (args->type) {
1529         case NF4CHR:
1530         case NF4BLK:
1531                 if (secpolicy_sys_devices(cr) != 0) {
1532                         *cs->statusp = resp->status = NFS4ERR_PERM;
1533                         return (NULL);
1534                 }
1535                 if (args->type == NF4CHR)
1536                         vap->va_type = VCHR;
1537                 else
1538                         vap->va_type = VBLK;
1539                 vap->va_rdev = makedevice(args->ftype4_u.devdata.specdata1,
1540                     args->ftype4_u.devdata.specdata2);
1541                 vap->va_mask |= AT_RDEV;
1542                 break;
1543         case NF4SOCK:
1544                 vap->va_type = VSOCK;
1545                 break;
1546         case NF4FIFO:
1547                 vap->va_type = VFIFO;
1548                 break;
1549         default:
1550                 *cs->statusp = resp->status = NFS4ERR_BADTYPE;
1551                 return (NULL);
1552         }
1553 
1554         /*
1555          * Must specify the mode.
1556          */
1557         if (!(vap->va_mask & AT_MODE)) {
1558                 *cs->statusp = resp->status = NFS4ERR_INVAL;
1559                 return (NULL);
1560         }
1561 
1562         excl = EXCL;
1563 
1564         mode = 0;
1565 
1566         error = VOP_CREATE(dvp, nm, vap, excl, mode, &vp, cr, 0, NULL, NULL);
1567         if (error) {
1568                 *cs->statusp = resp->status = puterrno4(error);
1569                 return (NULL);
1570         }
1571         return (vp);
1572 }
1573 
1574 /*
1575  * nfsv4 create is used to create non-regular files. For regular files,
1576  * use nfsv4 open.
1577  */
1578 /* ARGSUSED */
1579 static void
1580 rfs4_op_create(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1581     struct compound_state *cs)
1582 {
1583         CREATE4args *args = &argop->nfs_argop4_u.opcreate;
1584         CREATE4res *resp = &resop->nfs_resop4_u.opcreate;
1585         int error;
1586         struct vattr bva, iva, iva2, ava, *vap;
1587         cred_t *cr = cs->cr;
1588         vnode_t *dvp = cs->vp;
1589         vnode_t *vp = NULL;
1590         vnode_t *realvp;
1591         char *nm, *lnm;
1592         uint_t len, llen;
1593         int syncval = 0;
1594         struct nfs4_svgetit_arg sarg;
1595         struct nfs4_ntov_table ntov;
1596         struct statvfs64 sb;
1597         nfsstat4 status;
1598         struct sockaddr *ca;
1599         char *name = NULL;
1600         char *lname = NULL;
1601 
1602         DTRACE_NFSV4_2(op__create__start, struct compound_state *, cs,
1603             CREATE4args *, args);
1604 
1605         resp->attrset = 0;
1606 
1607         if (dvp == NULL) {
1608                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1609                 goto out;
1610         }
1611 
1612         /*
1613          * If there is an unshared filesystem mounted on this vnode,
1614          * do not allow to create an object in this directory.
1615          */
1616         if (vn_ismntpt(dvp)) {
1617                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
1618                 goto out;
1619         }
1620 
1621         /* Verify that type is correct */
1622         switch (args->type) {
1623         case NF4LNK:
1624         case NF4BLK:
1625         case NF4CHR:
1626         case NF4SOCK:
1627         case NF4FIFO:
1628         case NF4DIR:
1629                 break;
1630         default:
1631                 *cs->statusp = resp->status = NFS4ERR_BADTYPE;
1632                 goto out;
1633         };
1634 
1635         if (cs->access == CS_ACCESS_DENIED) {
1636                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
1637                 goto out;
1638         }
1639         if (dvp->v_type != VDIR) {
1640                 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
1641                 goto out;
1642         }
1643         status = utf8_dir_verify(&args->objname);
1644         if (status != NFS4_OK) {
1645                 *cs->statusp = resp->status = status;
1646                 goto out;
1647         }
1648 
1649         if (rdonly4(req, cs)) {
1650                 *cs->statusp = resp->status = NFS4ERR_ROFS;
1651                 goto out;
1652         }
1653 
1654         /*
1655          * Name of newly created object
1656          */
1657         nm = utf8_to_fn(&args->objname, &len, NULL);
1658         if (nm == NULL) {
1659                 *cs->statusp = resp->status = NFS4ERR_INVAL;
1660                 goto out;
1661         }
1662 
1663         if (len > MAXNAMELEN) {
1664                 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
1665                 kmem_free(nm, len);
1666                 goto out;
1667         }
1668 
1669         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
1670         name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
1671             MAXPATHLEN  + 1);
1672 
1673         if (name == NULL) {
1674                 *cs->statusp = resp->status = NFS4ERR_INVAL;
1675                 kmem_free(nm, len);
1676                 goto out;
1677         }
1678 
1679         resp->attrset = 0;
1680 
1681         sarg.sbp = &sb;
1682         sarg.is_referral = B_FALSE;
1683         nfs4_ntov_table_init(&ntov);
1684 
1685         status = do_rfs4_set_attrs(&resp->attrset,
1686             &args->createattrs, cs, &sarg, &ntov, NFS4ATTR_SETIT);
1687 
1688         if (sarg.vap->va_mask == 0 && status == NFS4_OK)
1689                 status = NFS4ERR_INVAL;
1690 
1691         if (status != NFS4_OK) {
1692                 *cs->statusp = resp->status = status;
1693                 if (name != nm)
1694                         kmem_free(name, MAXPATHLEN + 1);
1695                 kmem_free(nm, len);
1696                 nfs4_ntov_table_free(&ntov, &sarg);
1697                 resp->attrset = 0;
1698                 goto out;
1699         }
1700 
1701         /* Get "before" change value */
1702         bva.va_mask = AT_CTIME|AT_SEQ|AT_MODE;
1703         error = VOP_GETATTR(dvp, &bva, 0, cr, NULL);
1704         if (error) {
1705                 *cs->statusp = resp->status = puterrno4(error);
1706                 if (name != nm)
1707                         kmem_free(name, MAXPATHLEN + 1);
1708                 kmem_free(nm, len);
1709                 nfs4_ntov_table_free(&ntov, &sarg);
1710                 resp->attrset = 0;
1711                 goto out;
1712         }
1713         NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bva.va_ctime)
1714 
1715         vap = sarg.vap;
1716 
1717         /*
1718          * Set the default initial values for attributes when the parent
1719          * directory does not have the VSUID/VSGID bit set and they have
1720          * not been specified in createattrs.
1721          */
1722         if (!(bva.va_mode & VSUID) && (vap->va_mask & AT_UID) == 0) {
1723                 vap->va_uid = crgetuid(cr);
1724                 vap->va_mask |= AT_UID;
1725         }
1726         if (!(bva.va_mode & VSGID) && (vap->va_mask & AT_GID) == 0) {
1727                 vap->va_gid = crgetgid(cr);
1728                 vap->va_mask |= AT_GID;
1729         }
1730 
1731         vap->va_mask |= AT_TYPE;
1732         switch (args->type) {
1733         case NF4DIR:
1734                 vap->va_type = VDIR;
1735                 if ((vap->va_mask & AT_MODE) == 0) {
1736                         vap->va_mode = 0700; /* default: owner rwx only */
1737                         vap->va_mask |= AT_MODE;
1738                 }
1739                 error = VOP_MKDIR(dvp, name, vap, &vp, cr, NULL, 0, NULL);
1740                 if (error)
1741                         break;
1742 
1743                 /*
1744                  * Get the initial "after" sequence number, if it fails,
1745                  * set to zero
1746                  */
1747                 iva.va_mask = AT_SEQ;
1748                 if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL))
1749                         iva.va_seq = 0;
1750                 break;
1751         case NF4LNK:
1752                 vap->va_type = VLNK;
1753                 if ((vap->va_mask & AT_MODE) == 0) {
1754                         vap->va_mode = 0700; /* default: owner rwx only */
1755                         vap->va_mask |= AT_MODE;
1756                 }
1757 
1758                 /*
1759                  * symlink names must be treated as data
1760                  */
1761                 lnm = utf8_to_str((utf8string *)&args->ftype4_u.linkdata,
1762                     &llen, NULL);
1763 
1764                 if (lnm == NULL) {
1765                         *cs->statusp = resp->status = NFS4ERR_INVAL;
1766                         if (name != nm)
1767                                 kmem_free(name, MAXPATHLEN + 1);
1768                         kmem_free(nm, len);
1769                         nfs4_ntov_table_free(&ntov, &sarg);
1770                         resp->attrset = 0;
1771                         goto out;
1772                 }
1773 
1774                 if (llen > MAXPATHLEN) {
1775                         *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
1776                         if (name != nm)
1777                                 kmem_free(name, MAXPATHLEN + 1);
1778                         kmem_free(nm, len);
1779                         kmem_free(lnm, llen);
1780                         nfs4_ntov_table_free(&ntov, &sarg);
1781                         resp->attrset = 0;
1782                         goto out;
1783                 }
1784 
1785                 lname = nfscmd_convname(ca, cs->exi, lnm,
1786                     NFSCMD_CONV_INBOUND, MAXPATHLEN  + 1);
1787 
1788                 if (lname == NULL) {
1789                         *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
1790                         if (name != nm)
1791                                 kmem_free(name, MAXPATHLEN + 1);
1792                         kmem_free(nm, len);
1793                         kmem_free(lnm, llen);
1794                         nfs4_ntov_table_free(&ntov, &sarg);
1795                         resp->attrset = 0;
1796                         goto out;
1797                 }
1798 
1799                 error = VOP_SYMLINK(dvp, name, vap, lname, cr, NULL, 0);
1800                 if (lname != lnm)
1801                         kmem_free(lname, MAXPATHLEN + 1);
1802                 kmem_free(lnm, llen);
1803                 if (error)
1804                         break;
1805 
1806                 /*
1807                  * Get the initial "after" sequence number, if it fails,
1808                  * set to zero
1809                  */
1810                 iva.va_mask = AT_SEQ;
1811                 if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL))
1812                         iva.va_seq = 0;
1813 
1814                 error = VOP_LOOKUP(dvp, name, &vp, NULL, 0, NULL, cr,
1815                     NULL, NULL, NULL);
1816                 if (error)
1817                         break;
1818 
1819                 /*
1820                  * va_seq is not safe over VOP calls, check it again
1821                  * if it has changed zero out iva to force atomic = FALSE.
1822                  */
1823                 iva2.va_mask = AT_SEQ;
1824                 if (VOP_GETATTR(dvp, &iva2, 0, cs->cr, NULL) ||
1825                     iva2.va_seq != iva.va_seq)
1826                         iva.va_seq = 0;
1827                 break;
1828         default:
1829                 /*
1830                  * probably a special file.
1831                  */
1832                 if ((vap->va_mask & AT_MODE) == 0) {
1833                         vap->va_mode = 0600; /* default: owner rw only */
1834                         vap->va_mask |= AT_MODE;
1835                 }
1836                 syncval = FNODSYNC;
1837                 /*
1838                  * We know this will only generate one VOP call
1839                  */
1840                 vp = do_rfs4_op_mknod(args, resp, req, cs, vap, name);
1841 
1842                 if (vp == NULL) {
1843                         if (name != nm)
1844                                 kmem_free(name, MAXPATHLEN + 1);
1845                         kmem_free(nm, len);
1846                         nfs4_ntov_table_free(&ntov, &sarg);
1847                         resp->attrset = 0;
1848                         goto out;
1849                 }
1850 
1851                 /*
1852                  * Get the initial "after" sequence number, if it fails,
1853                  * set to zero
1854                  */
1855                 iva.va_mask = AT_SEQ;
1856                 if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL))
1857                         iva.va_seq = 0;
1858 
1859                 break;
1860         }
1861         if (name != nm)
1862                 kmem_free(name, MAXPATHLEN + 1);
1863         kmem_free(nm, len);
1864 
1865         if (error) {
1866                 *cs->statusp = resp->status = puterrno4(error);
1867         }
1868 
1869         /*
1870          * Force modified data and metadata out to stable storage.
1871          */
1872         (void) VOP_FSYNC(dvp, 0, cr, NULL);
1873 
1874         if (resp->status != NFS4_OK) {
1875                 if (vp != NULL)
1876                         VN_RELE(vp);
1877                 nfs4_ntov_table_free(&ntov, &sarg);
1878                 resp->attrset = 0;
1879                 goto out;
1880         }
1881 
1882         /*
1883          * Finish setup of cinfo response, "before" value already set.
1884          * Get "after" change value, if it fails, simply return the
1885          * before value.
1886          */
1887         ava.va_mask = AT_CTIME|AT_SEQ;
1888         if (VOP_GETATTR(dvp, &ava, 0, cr, NULL)) {
1889                 ava.va_ctime = bva.va_ctime;
1890                 ava.va_seq = 0;
1891         }
1892         NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, ava.va_ctime);
1893 
1894         /*
1895          * True verification that object was created with correct
1896          * attrs is impossible.  The attrs could have been changed
1897          * immediately after object creation.  If attributes did
1898          * not verify, the only recourse for the server is to
1899          * destroy the object.  Maybe if some attrs (like gid)
1900          * are set incorrectly, the object should be destroyed;
1901          * however, seems bad as a default policy.  Do we really
1902          * want to destroy an object over one of the times not
1903          * verifying correctly?  For these reasons, the server
1904          * currently sets bits in attrset for createattrs
1905          * that were set; however, no verification is done.
1906          *
1907          * vmask_to_nmask accounts for vattr bits set on create
1908          *      [do_rfs4_set_attrs() only sets resp bits for
1909          *       non-vattr/vfs bits.]
1910          * Mask off any bits set by default so as not to return
1911          * more attrset bits than were requested in createattrs
1912          */
1913         nfs4_vmask_to_nmask(sarg.vap->va_mask, &resp->attrset);
1914         resp->attrset &= args->createattrs.attrmask;
1915         nfs4_ntov_table_free(&ntov, &sarg);
1916 
1917         error = makefh4(&cs->fh, vp, cs->exi);
1918         if (error) {
1919                 *cs->statusp = resp->status = puterrno4(error);
1920         }
1921 
1922         /*
1923          * The cinfo.atomic = TRUE only if we got no errors, we have
1924          * non-zero va_seq's, and it has incremented by exactly one
1925          * during the creation and it didn't change during the VOP_LOOKUP
1926          * or VOP_FSYNC.
1927          */
1928         if (!error && bva.va_seq && iva.va_seq && ava.va_seq &&
1929             iva.va_seq == (bva.va_seq + 1) && iva.va_seq == ava.va_seq)
1930                 resp->cinfo.atomic = TRUE;
1931         else
1932                 resp->cinfo.atomic = FALSE;
1933 
1934         /*
1935          * Force modified metadata out to stable storage.
1936          *
1937          * if a underlying vp exists, pass it to VOP_FSYNC
1938          */
1939         if (VOP_REALVP(vp, &realvp, NULL) == 0)
1940                 (void) VOP_FSYNC(realvp, syncval, cr, NULL);
1941         else
1942                 (void) VOP_FSYNC(vp, syncval, cr, NULL);
1943 
1944         if (resp->status != NFS4_OK) {
1945                 VN_RELE(vp);
1946                 goto out;
1947         }
1948         if (cs->vp)
1949                 VN_RELE(cs->vp);
1950 
1951         cs->vp = vp;
1952         *cs->statusp = resp->status = NFS4_OK;
1953 out:
1954         DTRACE_NFSV4_2(op__create__done, struct compound_state *, cs,
1955             CREATE4res *, resp);
1956 }
1957 
1958 /*ARGSUSED*/
1959 static void
1960 rfs4_op_delegpurge(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1961     struct compound_state *cs)
1962 {
1963         DTRACE_NFSV4_2(op__delegpurge__start, struct compound_state *, cs,
1964             DELEGPURGE4args *, &argop->nfs_argop4_u.opdelegpurge);
1965 
1966         rfs4_op_inval(argop, resop, req, cs);
1967 
1968         DTRACE_NFSV4_2(op__delegpurge__done, struct compound_state *, cs,
1969             DELEGPURGE4res *, &resop->nfs_resop4_u.opdelegpurge);
1970 }
1971 
1972 /*ARGSUSED*/
1973 static void
1974 rfs4_op_delegreturn(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1975     struct compound_state *cs)
1976 {
1977         DELEGRETURN4args *args = &argop->nfs_argop4_u.opdelegreturn;
1978         DELEGRETURN4res *resp = &resop->nfs_resop4_u.opdelegreturn;
1979         rfs4_deleg_state_t *dsp;
1980         nfsstat4 status;
1981 
1982         DTRACE_NFSV4_2(op__delegreturn__start, struct compound_state *, cs,
1983             DELEGRETURN4args *, args);
1984 
1985         status = rfs4_get_deleg_state(&args->deleg_stateid, &dsp);
1986         resp->status = *cs->statusp = status;
1987         if (status != NFS4_OK)
1988                 goto out;
1989 
1990         /* Ensure specified filehandle matches */
1991         if (cs->vp != dsp->rds_finfo->rf_vp) {
1992                 resp->status = *cs->statusp = NFS4ERR_BAD_STATEID;
1993         } else
1994                 rfs4_return_deleg(dsp, FALSE);
1995 
1996         rfs4_update_lease(dsp->rds_client);
1997 
1998         rfs4_deleg_state_rele(dsp);
1999 out:
2000         DTRACE_NFSV4_2(op__delegreturn__done, struct compound_state *, cs,
2001             DELEGRETURN4res *, resp);
2002 }
2003 
2004 /*
2005  * Check to see if a given "flavor" is an explicitly shared flavor.
2006  * The assumption of this routine is the "flavor" is already a valid
2007  * flavor in the secinfo list of "exi".
2008  *
2009  *      e.g.
2010  *              # share -o sec=flavor1 /export
2011  *              # share -o sec=flavor2 /export/home
2012  *
2013  *              flavor2 is not an explicitly shared flavor for /export,
2014  *              however it is in the secinfo list for /export thru the
2015  *              server namespace setup.
2016  */
2017 int
2018 is_exported_sec(int flavor, struct exportinfo *exi)
2019 {
2020         int     i;
2021         struct secinfo *sp;
2022 
2023         sp = exi->exi_export.ex_secinfo;
2024         for (i = 0; i < exi->exi_export.ex_seccnt; i++) {
2025                 if (flavor == sp[i].s_secinfo.sc_nfsnum ||
2026                     sp[i].s_secinfo.sc_nfsnum == AUTH_NONE) {
2027                         return (SEC_REF_EXPORTED(&sp[i]));
2028                 }
2029         }
2030 
2031         /* Should not reach this point based on the assumption */
2032         return (0);
2033 }
2034 
2035 /*
2036  * Check if the security flavor used in the request matches what is
2037  * required at the export point or at the root pseudo node (exi_root).
2038  *
2039  * returns 1 if there's a match or if exported with AUTH_NONE; 0 otherwise.
2040  *
2041  */
2042 static int
2043 secinfo_match_or_authnone(struct compound_state *cs)
2044 {
2045         int     i;
2046         struct secinfo *sp;
2047 
2048         /*
2049          * Check cs->nfsflavor (from the request) against
2050          * the current export data in cs->exi.
2051          */
2052         sp = cs->exi->exi_export.ex_secinfo;
2053         for (i = 0; i < cs->exi->exi_export.ex_seccnt; i++) {
2054                 if (cs->nfsflavor == sp[i].s_secinfo.sc_nfsnum ||
2055                     sp[i].s_secinfo.sc_nfsnum == AUTH_NONE)
2056                         return (1);
2057         }
2058 
2059         return (0);
2060 }
2061 
2062 /*
2063  * Check the access authority for the client and return the correct error.
2064  */
2065 nfsstat4
2066 call_checkauth4(struct compound_state *cs, struct svc_req *req)
2067 {
2068         int     authres;
2069 
2070         /*
2071          * First, check if the security flavor used in the request
2072          * are among the flavors set in the server namespace.
2073          */
2074         if (!secinfo_match_or_authnone(cs)) {
2075                 *cs->statusp = NFS4ERR_WRONGSEC;
2076                 return (*cs->statusp);
2077         }
2078 
2079         authres = checkauth4(cs, req);
2080 
2081         if (authres > 0) {
2082                 *cs->statusp = NFS4_OK;
2083                 if (! (cs->access & CS_ACCESS_LIMITED))
2084                         cs->access = CS_ACCESS_OK;
2085         } else if (authres == 0) {
2086                 *cs->statusp = NFS4ERR_ACCESS;
2087         } else if (authres == -2) {
2088                 *cs->statusp = NFS4ERR_WRONGSEC;
2089         } else {
2090                 *cs->statusp = NFS4ERR_DELAY;
2091         }
2092         return (*cs->statusp);
2093 }
2094 
2095 /*
2096  * bitmap4_to_attrmask is called by getattr and readdir.
2097  * It sets up the vattr mask and determines whether vfsstat call is needed
2098  * based on the input bitmap.
2099  * Returns nfsv4 status.
2100  */
2101 static nfsstat4
2102 bitmap4_to_attrmask(bitmap4 breq, struct nfs4_svgetit_arg *sargp)
2103 {
2104         int i;
2105         uint_t  va_mask;
2106         struct statvfs64 *sbp = sargp->sbp;
2107 
2108         sargp->sbp = NULL;
2109         sargp->flag = 0;
2110         sargp->rdattr_error = NFS4_OK;
2111         sargp->mntdfid_set = FALSE;
2112         if (sargp->cs->vp)
2113                 sargp->xattr = get_fh4_flag(&sargp->cs->fh,
2114                     FH4_ATTRDIR | FH4_NAMEDATTR);
2115         else
2116                 sargp->xattr = 0;
2117 
2118         /*
2119          * Set rdattr_error_req to true if return error per
2120          * failed entry rather than fail the readdir.
2121          */
2122         if (breq & FATTR4_RDATTR_ERROR_MASK)
2123                 sargp->rdattr_error_req = 1;
2124         else
2125                 sargp->rdattr_error_req = 0;
2126 
2127         /*
2128          * generate the va_mask
2129          * Handle the easy cases first
2130          */
2131         switch (breq) {
2132         case NFS4_NTOV_ATTR_MASK:
2133                 sargp->vap->va_mask = NFS4_NTOV_ATTR_AT_MASK;
2134                 return (NFS4_OK);
2135 
2136         case NFS4_FS_ATTR_MASK:
2137                 sargp->vap->va_mask = NFS4_FS_ATTR_AT_MASK;
2138                 sargp->sbp = sbp;
2139                 return (NFS4_OK);
2140 
2141         case NFS4_NTOV_ATTR_CACHE_MASK:
2142                 sargp->vap->va_mask = NFS4_NTOV_ATTR_CACHE_AT_MASK;
2143                 return (NFS4_OK);
2144 
2145         case FATTR4_LEASE_TIME_MASK:
2146                 sargp->vap->va_mask = 0;
2147                 return (NFS4_OK);
2148 
2149         default:
2150                 va_mask = 0;
2151                 for (i = 0; i < nfs4_ntov_map_size; i++) {
2152                         if ((breq & nfs4_ntov_map[i].fbit) &&
2153                             nfs4_ntov_map[i].vbit)
2154                                 va_mask |= nfs4_ntov_map[i].vbit;
2155                 }
2156 
2157                 /*
2158                  * Check is vfsstat is needed
2159                  */
2160                 if (breq & NFS4_FS_ATTR_MASK)
2161                         sargp->sbp = sbp;
2162 
2163                 sargp->vap->va_mask = va_mask;
2164                 return (NFS4_OK);
2165         }
2166         /* NOTREACHED */
2167 }
2168 
2169 /*
2170  * bitmap4_get_sysattrs is called by getattr and readdir.
2171  * It calls both VOP_GETATTR and VFS_STATVFS calls to get the attrs.
2172  * Returns nfsv4 status.
2173  */
2174 static nfsstat4
2175 bitmap4_get_sysattrs(struct nfs4_svgetit_arg *sargp)
2176 {
2177         int error;
2178         struct compound_state *cs = sargp->cs;
2179         vnode_t *vp = cs->vp;
2180 
2181         if (sargp->sbp != NULL) {
2182                 if (error = VFS_STATVFS(vp->v_vfsp, sargp->sbp)) {
2183                         sargp->sbp = NULL;   /* to identify error */
2184                         return (puterrno4(error));
2185                 }
2186         }
2187 
2188         return (rfs4_vop_getattr(vp, sargp->vap, 0, cs->cr));
2189 }
2190 
2191 static void
2192 nfs4_ntov_table_init(struct nfs4_ntov_table *ntovp)
2193 {
2194         ntovp->na = kmem_zalloc(sizeof (union nfs4_attr_u) * nfs4_ntov_map_size,
2195             KM_SLEEP);
2196         ntovp->attrcnt = 0;
2197         ntovp->vfsstat = FALSE;
2198 }
2199 
2200 static void
2201 nfs4_ntov_table_free(struct nfs4_ntov_table *ntovp,
2202     struct nfs4_svgetit_arg *sargp)
2203 {
2204         int i;
2205         union nfs4_attr_u *na;
2206         uint8_t *amap;
2207 
2208         /*
2209          * XXX Should do the same checks for whether the bit is set
2210          */
2211         for (i = 0, na = ntovp->na, amap = ntovp->amap;
2212             i < ntovp->attrcnt; i++, na++, amap++) {
2213                 (void) (*nfs4_ntov_map[*amap].sv_getit)(
2214                     NFS4ATTR_FREEIT, sargp, na);
2215         }
2216         if ((sargp->op == NFS4ATTR_SETIT) || (sargp->op == NFS4ATTR_VERIT)) {
2217                 /*
2218                  * xdr_free for getattr will be done later
2219                  */
2220                 for (i = 0, na = ntovp->na, amap = ntovp->amap;
2221                     i < ntovp->attrcnt; i++, na++, amap++) {
2222                         xdr_free(nfs4_ntov_map[*amap].xfunc, (caddr_t)na);
2223                 }
2224         }
2225         kmem_free(ntovp->na, sizeof (union nfs4_attr_u) * nfs4_ntov_map_size);
2226 }
2227 
2228 /*
2229  * do_rfs4_op_getattr gets the system attrs and converts into fattr4.
2230  */
2231 static nfsstat4
2232 do_rfs4_op_getattr(bitmap4 breq, fattr4 *fattrp,
2233     struct nfs4_svgetit_arg *sargp)
2234 {
2235         int error = 0;
2236         int i, k;
2237         struct nfs4_ntov_table ntov;
2238         XDR xdr;
2239         ulong_t xdr_size;
2240         char *xdr_attrs;
2241         nfsstat4 status = NFS4_OK;
2242         nfsstat4 prev_rdattr_error = sargp->rdattr_error;
2243         union nfs4_attr_u *na;
2244         uint8_t *amap;
2245 
2246         sargp->op = NFS4ATTR_GETIT;
2247         sargp->flag = 0;
2248 
2249         fattrp->attrmask = 0;
2250         /* if no bits requested, then return empty fattr4 */
2251         if (breq == 0) {
2252                 fattrp->attrlist4_len = 0;
2253                 fattrp->attrlist4 = NULL;
2254                 return (NFS4_OK);
2255         }
2256 
2257         /*
2258          * return NFS4ERR_INVAL when client requests write-only attrs
2259          */
2260         if (breq & (FATTR4_TIME_ACCESS_SET_MASK | FATTR4_TIME_MODIFY_SET_MASK))
2261                 return (NFS4ERR_INVAL);
2262 
2263         nfs4_ntov_table_init(&ntov);
2264         na = ntov.na;
2265         amap = ntov.amap;
2266 
2267         /*
2268          * Now loop to get or verify the attrs
2269          */
2270         for (i = 0; i < nfs4_ntov_map_size; i++) {
2271                 if (breq & nfs4_ntov_map[i].fbit) {
2272                         if ((*nfs4_ntov_map[i].sv_getit)(
2273                             NFS4ATTR_SUPPORTED, sargp, NULL) == 0) {
2274 
2275                                 error = (*nfs4_ntov_map[i].sv_getit)(
2276                                     NFS4ATTR_GETIT, sargp, na);
2277 
2278                                 /*
2279                                  * Possible error values:
2280                                  * >0 if sv_getit failed to
2281                                  * get the attr; 0 if succeeded;
2282                                  * <0 if rdattr_error and the
2283                                  * attribute cannot be returned.
2284                                  */
2285                                 if (error && !(sargp->rdattr_error_req))
2286                                         goto done;
2287                                 /*
2288                                  * If error then just for entry
2289                                  */
2290                                 if (error == 0) {
2291                                         fattrp->attrmask |=
2292                                             nfs4_ntov_map[i].fbit;
2293                                         *amap++ =
2294                                             (uint8_t)nfs4_ntov_map[i].nval;
2295                                         na++;
2296                                         (ntov.attrcnt)++;
2297                                 } else if ((error > 0) &&
2298                                     (sargp->rdattr_error == NFS4_OK)) {
2299                                         sargp->rdattr_error = puterrno4(error);
2300                                 }
2301                                 error = 0;
2302                         }
2303                 }
2304         }
2305 
2306         /*
2307          * If rdattr_error was set after the return value for it was assigned,
2308          * update it.
2309          */
2310         if (prev_rdattr_error != sargp->rdattr_error) {
2311                 na = ntov.na;
2312                 amap = ntov.amap;
2313                 for (i = 0; i < ntov.attrcnt; i++, na++, amap++) {
2314                         k = *amap;
2315                         if (k < FATTR4_RDATTR_ERROR) {
2316                                 continue;
2317                         }
2318                         if ((k == FATTR4_RDATTR_ERROR) &&
2319                             ((*nfs4_ntov_map[k].sv_getit)(
2320                             NFS4ATTR_SUPPORTED, sargp, NULL) == 0)) {
2321 
2322                                 (void) (*nfs4_ntov_map[k].sv_getit)(
2323                                     NFS4ATTR_GETIT, sargp, na);
2324                         }
2325                         break;
2326                 }
2327         }
2328 
2329         xdr_size = 0;
2330         na = ntov.na;
2331         amap = ntov.amap;
2332         for (i = 0; i < ntov.attrcnt; i++, na++, amap++) {
2333                 xdr_size += xdr_sizeof(nfs4_ntov_map[*amap].xfunc, na);
2334         }
2335 
2336         fattrp->attrlist4_len = xdr_size;
2337         if (xdr_size) {
2338                 /* freed by rfs4_op_getattr_free() */
2339                 fattrp->attrlist4 = xdr_attrs = kmem_zalloc(xdr_size, KM_SLEEP);
2340 
2341                 xdrmem_create(&xdr, xdr_attrs, xdr_size, XDR_ENCODE);
2342 
2343                 na = ntov.na;
2344                 amap = ntov.amap;
2345                 for (i = 0; i < ntov.attrcnt; i++, na++, amap++) {
2346                         if (!(*nfs4_ntov_map[*amap].xfunc)(&xdr, na)) {
2347                                 DTRACE_PROBE1(nfss__e__getattr4_encfail,
2348                                     int, *amap);
2349                                 status = NFS4ERR_SERVERFAULT;
2350                                 break;
2351                         }
2352                 }
2353                 /* xdrmem_destroy(&xdrs); */        /* NO-OP */
2354         } else {
2355                 fattrp->attrlist4 = NULL;
2356         }
2357 done:
2358 
2359         nfs4_ntov_table_free(&ntov, sargp);
2360 
2361         if (error != 0)
2362                 status = puterrno4(error);
2363 
2364         return (status);
2365 }
2366 
2367 /* ARGSUSED */
2368 static void
2369 rfs4_op_getattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2370     struct compound_state *cs)
2371 {
2372         GETATTR4args *args = &argop->nfs_argop4_u.opgetattr;
2373         GETATTR4res *resp = &resop->nfs_resop4_u.opgetattr;
2374         struct nfs4_svgetit_arg sarg;
2375         struct statvfs64 sb;
2376         nfsstat4 status;
2377 
2378         DTRACE_NFSV4_2(op__getattr__start, struct compound_state *, cs,
2379             GETATTR4args *, args);
2380 
2381         if (cs->vp == NULL) {
2382                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2383                 goto out;
2384         }
2385 
2386         if (cs->access == CS_ACCESS_DENIED) {
2387                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
2388                 goto out;
2389         }
2390 
2391         sarg.sbp = &sb;
2392         sarg.cs = cs;
2393         sarg.is_referral = B_FALSE;
2394 
2395         status = bitmap4_to_attrmask(args->attr_request, &sarg);
2396         if (status == NFS4_OK) {
2397 
2398                 status = bitmap4_get_sysattrs(&sarg);
2399                 if (status == NFS4_OK) {
2400 
2401                         /* Is this a referral? */
2402                         if (vn_is_nfs_reparse(cs->vp, cs->cr)) {
2403                                 /* Older V4 Solaris client sees a link */
2404                                 if (client_is_downrev(req))
2405                                         sarg.vap->va_type = VLNK;
2406                                 else
2407                                         sarg.is_referral = B_TRUE;
2408                         }
2409 
2410                         status = do_rfs4_op_getattr(args->attr_request,
2411                             &resp->obj_attributes, &sarg);
2412                 }
2413         }
2414         *cs->statusp = resp->status = status;
2415 out:
2416         DTRACE_NFSV4_2(op__getattr__done, struct compound_state *, cs,
2417             GETATTR4res *, resp);
2418 }
2419 
2420 static void
2421 rfs4_op_getattr_free(nfs_resop4 *resop)
2422 {
2423         GETATTR4res *resp = &resop->nfs_resop4_u.opgetattr;
2424 
2425         nfs4_fattr4_free(&resp->obj_attributes);
2426 }
2427 
2428 /* ARGSUSED */
2429 static void
2430 rfs4_op_getfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2431     struct compound_state *cs)
2432 {
2433         GETFH4res *resp = &resop->nfs_resop4_u.opgetfh;
2434 
2435         DTRACE_NFSV4_1(op__getfh__start, struct compound_state *, cs);
2436 
2437         if (cs->vp == NULL) {
2438                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2439                 goto out;
2440         }
2441         if (cs->access == CS_ACCESS_DENIED) {
2442                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
2443                 goto out;
2444         }
2445 
2446         /* check for reparse point at the share point */
2447         if (cs->exi->exi_moved || vn_is_nfs_reparse(cs->exi->exi_vp, cs->cr)) {
2448                 /* it's all bad */
2449                 cs->exi->exi_moved = 1;
2450                 *cs->statusp = resp->status = NFS4ERR_MOVED;
2451                 DTRACE_PROBE2(nfs4serv__func__referral__shared__moved,
2452                     vnode_t *, cs->vp, char *, "rfs4_op_getfh");
2453                 return;
2454         }
2455 
2456         /* check for reparse point at vp */
2457         if (vn_is_nfs_reparse(cs->vp, cs->cr) && !client_is_downrev(req)) {
2458                 /* it's not all bad */
2459                 *cs->statusp = resp->status = NFS4ERR_MOVED;
2460                 DTRACE_PROBE2(nfs4serv__func__referral__moved,
2461                     vnode_t *, cs->vp, char *, "rfs4_op_getfh");
2462                 return;
2463         }
2464 
2465         resp->object.nfs_fh4_val =
2466             kmem_alloc(cs->fh.nfs_fh4_len, KM_SLEEP);
2467         nfs_fh4_copy(&cs->fh, &resp->object);
2468         *cs->statusp = resp->status = NFS4_OK;
2469 out:
2470         DTRACE_NFSV4_2(op__getfh__done, struct compound_state *, cs,
2471             GETFH4res *, resp);
2472 }
2473 
2474 static void
2475 rfs4_op_getfh_free(nfs_resop4 *resop)
2476 {
2477         GETFH4res *resp = &resop->nfs_resop4_u.opgetfh;
2478 
2479         if (resp->status == NFS4_OK &&
2480             resp->object.nfs_fh4_val != NULL) {
2481                 kmem_free(resp->object.nfs_fh4_val, resp->object.nfs_fh4_len);
2482                 resp->object.nfs_fh4_val = NULL;
2483                 resp->object.nfs_fh4_len = 0;
2484         }
2485 }
2486 
2487 /*
2488  * illegal: args: void
2489  *          res : status (NFS4ERR_OP_ILLEGAL)
2490  */
2491 /* ARGSUSED */
2492 static void
2493 rfs4_op_illegal(nfs_argop4 *argop, nfs_resop4 *resop,
2494     struct svc_req *req, struct compound_state *cs)
2495 {
2496         ILLEGAL4res *resp = &resop->nfs_resop4_u.opillegal;
2497 
2498         resop->resop = OP_ILLEGAL;
2499         *cs->statusp = resp->status = NFS4ERR_OP_ILLEGAL;
2500 }
2501 
2502 /*
2503  * link: args: SAVED_FH: file, CURRENT_FH: target directory
2504  *       res: status. If success - CURRENT_FH unchanged, return change_info
2505  */
2506 /* ARGSUSED */
2507 static void
2508 rfs4_op_link(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2509     struct compound_state *cs)
2510 {
2511         LINK4args *args = &argop->nfs_argop4_u.oplink;
2512         LINK4res *resp = &resop->nfs_resop4_u.oplink;
2513         int error;
2514         vnode_t *vp;
2515         vnode_t *dvp;
2516         struct vattr bdva, idva, adva;
2517         char *nm;
2518         uint_t  len;
2519         struct sockaddr *ca;
2520         char *name = NULL;
2521         nfsstat4 status;
2522 
2523         DTRACE_NFSV4_2(op__link__start, struct compound_state *, cs,
2524             LINK4args *, args);
2525 
2526         /* SAVED_FH: source object */
2527         vp = cs->saved_vp;
2528         if (vp == NULL) {
2529                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2530                 goto out;
2531         }
2532 
2533         /* CURRENT_FH: target directory */
2534         dvp = cs->vp;
2535         if (dvp == NULL) {
2536                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2537                 goto out;
2538         }
2539 
2540         /*
2541          * If there is a non-shared filesystem mounted on this vnode,
2542          * do not allow to link any file in this directory.
2543          */
2544         if (vn_ismntpt(dvp)) {
2545                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
2546                 goto out;
2547         }
2548 
2549         if (cs->access == CS_ACCESS_DENIED) {
2550                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
2551                 goto out;
2552         }
2553 
2554         /* Check source object's type validity */
2555         if (vp->v_type == VDIR) {
2556                 *cs->statusp = resp->status = NFS4ERR_ISDIR;
2557                 goto out;
2558         }
2559 
2560         /* Check target directory's type */
2561         if (dvp->v_type != VDIR) {
2562                 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
2563                 goto out;
2564         }
2565 
2566         if (cs->saved_exi != cs->exi) {
2567                 *cs->statusp = resp->status = NFS4ERR_XDEV;
2568                 goto out;
2569         }
2570 
2571         status = utf8_dir_verify(&args->newname);
2572         if (status != NFS4_OK) {
2573                 *cs->statusp = resp->status = status;
2574                 goto out;
2575         }
2576 
2577         nm = utf8_to_fn(&args->newname, &len, NULL);
2578         if (nm == NULL) {
2579                 *cs->statusp = resp->status = NFS4ERR_INVAL;
2580                 goto out;
2581         }
2582 
2583         if (len > MAXNAMELEN) {
2584                 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
2585                 kmem_free(nm, len);
2586                 goto out;
2587         }
2588 
2589         if (rdonly4(req, cs)) {
2590                 *cs->statusp = resp->status = NFS4ERR_ROFS;
2591                 kmem_free(nm, len);
2592                 goto out;
2593         }
2594 
2595         /* Get "before" change value */
2596         bdva.va_mask = AT_CTIME|AT_SEQ;
2597         error = VOP_GETATTR(dvp, &bdva, 0, cs->cr, NULL);
2598         if (error) {
2599                 *cs->statusp = resp->status = puterrno4(error);
2600                 kmem_free(nm, len);
2601                 goto out;
2602         }
2603 
2604         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2605         name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
2606             MAXPATHLEN  + 1);
2607 
2608         if (name == NULL) {
2609                 *cs->statusp = resp->status = NFS4ERR_INVAL;
2610                 kmem_free(nm, len);
2611                 goto out;
2612         }
2613 
2614         NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bdva.va_ctime)
2615 
2616         error = VOP_LINK(dvp, vp, name, cs->cr, NULL, 0);
2617 
2618         if (nm != name)
2619                 kmem_free(name, MAXPATHLEN + 1);
2620         kmem_free(nm, len);
2621 
2622         /*
2623          * Get the initial "after" sequence number, if it fails, set to zero
2624          */
2625         idva.va_mask = AT_SEQ;
2626         if (VOP_GETATTR(dvp, &idva, 0, cs->cr, NULL))
2627                 idva.va_seq = 0;
2628 
2629         /*
2630          * Force modified data and metadata out to stable storage.
2631          */
2632         (void) VOP_FSYNC(vp, FNODSYNC, cs->cr, NULL);
2633         (void) VOP_FSYNC(dvp, 0, cs->cr, NULL);
2634 
2635         if (error) {
2636                 *cs->statusp = resp->status = puterrno4(error);
2637                 goto out;
2638         }
2639 
2640         /*
2641          * Get "after" change value, if it fails, simply return the
2642          * before value.
2643          */
2644         adva.va_mask = AT_CTIME|AT_SEQ;
2645         if (VOP_GETATTR(dvp, &adva, 0, cs->cr, NULL)) {
2646                 adva.va_ctime = bdva.va_ctime;
2647                 adva.va_seq = 0;
2648         }
2649 
2650         NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, adva.va_ctime)
2651 
2652         /*
2653          * The cinfo.atomic = TRUE only if we have
2654          * non-zero va_seq's, and it has incremented by exactly one
2655          * during the VOP_LINK and it didn't change during the VOP_FSYNC.
2656          */
2657         if (bdva.va_seq && idva.va_seq && adva.va_seq &&
2658             idva.va_seq == (bdva.va_seq + 1) && idva.va_seq == adva.va_seq)
2659                 resp->cinfo.atomic = TRUE;
2660         else
2661                 resp->cinfo.atomic = FALSE;
2662 
2663         *cs->statusp = resp->status = NFS4_OK;
2664 out:
2665         DTRACE_NFSV4_2(op__link__done, struct compound_state *, cs,
2666             LINK4res *, resp);
2667 }
2668 
2669 /*
2670  * Used by rfs4_op_lookup and rfs4_op_lookupp to do the actual work.
2671  */
2672 
2673 /* ARGSUSED */
2674 static nfsstat4
2675 do_rfs4_op_lookup(char *nm, struct svc_req *req, struct compound_state *cs)
2676 {
2677         int error;
2678         int different_export = 0;
2679         vnode_t *vp, *pre_tvp = NULL, *oldvp = NULL;
2680         struct exportinfo *exi = NULL, *pre_exi = NULL;
2681         nfsstat4 stat;
2682         fid_t fid;
2683         int attrdir, dotdot, walk;
2684         bool_t is_newvp = FALSE;
2685 
2686         if (cs->vp->v_flag & V_XATTRDIR) {
2687                 attrdir = 1;
2688                 ASSERT(get_fh4_flag(&cs->fh, FH4_ATTRDIR));
2689         } else {
2690                 attrdir = 0;
2691                 ASSERT(! get_fh4_flag(&cs->fh, FH4_ATTRDIR));
2692         }
2693 
2694         dotdot = (nm[0] == '.' && nm[1] == '.' && nm[2] == '\0');
2695 
2696         /*
2697          * If dotdotting, then need to check whether it's
2698          * above the root of a filesystem, or above an
2699          * export point.
2700          */
2701         if (dotdot) {
2702 
2703                 /*
2704                  * If dotdotting at the root of a filesystem, then
2705                  * need to traverse back to the mounted-on filesystem
2706                  * and do the dotdot lookup there.
2707                  */
2708                 if ((cs->vp->v_flag & VROOT) || VN_IS_CURZONEROOT(cs->vp)) {
2709 
2710                         /*
2711                          * If at the system root, then can
2712                          * go up no further.
2713                          */
2714                         if (VN_CMP(cs->vp, ZONE_ROOTVP()))
2715                                 return (puterrno4(ENOENT));
2716 
2717                         /*
2718                          * Traverse back to the mounted-on filesystem
2719                          */
2720                         cs->vp = untraverse(cs->vp);
2721 
2722                         /*
2723                          * Set the different_export flag so we remember
2724                          * to pick up a new exportinfo entry for
2725                          * this new filesystem.
2726                          */
2727                         different_export = 1;
2728                 } else {
2729 
2730                         /*
2731                          * If dotdotting above an export point then set
2732                          * the different_export to get new export info.
2733                          */
2734                         different_export = nfs_exported(cs->exi, cs->vp);
2735                 }
2736         }
2737 
2738         error = VOP_LOOKUP(cs->vp, nm, &vp, NULL, 0, NULL, cs->cr,
2739             NULL, NULL, NULL);
2740         if (error)
2741                 return (puterrno4(error));
2742 
2743         /*
2744          * If the vnode is in a pseudo filesystem, check whether it is visible.
2745          *
2746          * XXX if the vnode is a symlink and it is not visible in
2747          * a pseudo filesystem, return ENOENT (not following symlink).
2748          * V4 client can not mount such symlink. This is a regression
2749          * from V2/V3.
2750          *
2751          * In the same exported filesystem, if the security flavor used
2752          * is not an explicitly shared flavor, limit the view to the visible
2753          * list entries only. This is not a WRONGSEC case because it's already
2754          * checked via PUTROOTFH/PUTPUBFH or PUTFH.
2755          */
2756         if (!different_export &&
2757             (PSEUDO(cs->exi) || ! is_exported_sec(cs->nfsflavor, cs->exi) ||
2758             cs->access & CS_ACCESS_LIMITED)) {
2759                 if (! nfs_visible(cs->exi, vp, &different_export)) {
2760                         VN_RELE(vp);
2761                         return (puterrno4(ENOENT));
2762                 }
2763         }
2764 
2765         /*
2766          * If it's a mountpoint, then traverse it.
2767          */
2768         if (vn_ismntpt(vp)) {
2769                 pre_exi = cs->exi;   /* save pre-traversed exportinfo */
2770                 pre_tvp = vp;           /* save pre-traversed vnode     */
2771 
2772                 /*
2773                  * hold pre_tvp to counteract rele by traverse.  We will
2774                  * need pre_tvp below if checkexport4 fails
2775                  */
2776                 VN_HOLD(pre_tvp);
2777                 if ((error = traverse(&vp)) != 0) {
2778                         VN_RELE(vp);
2779                         VN_RELE(pre_tvp);
2780                         return (puterrno4(error));
2781                 }
2782                 different_export = 1;
2783         } else if (vp->v_vfsp != cs->vp->v_vfsp) {
2784                 /*
2785                  * The vfsp comparison is to handle the case where
2786                  * a LOFS mount is shared.  lo_lookup traverses mount points,
2787                  * and NFS is unaware of local fs transistions because
2788                  * v_vfsmountedhere isn't set.  For this special LOFS case,
2789                  * the dir and the obj returned by lookup will have different
2790                  * vfs ptrs.
2791                  */
2792                 different_export = 1;
2793         }
2794 
2795         if (different_export) {
2796 
2797                 bzero(&fid, sizeof (fid));
2798                 fid.fid_len = MAXFIDSZ;
2799                 error = vop_fid_pseudo(vp, &fid);
2800                 if (error) {
2801                         VN_RELE(vp);
2802                         if (pre_tvp)
2803                                 VN_RELE(pre_tvp);
2804                         return (puterrno4(error));
2805                 }
2806 
2807                 if (dotdot)
2808                         exi = nfs_vptoexi(NULL, vp, cs->cr, &walk, NULL, TRUE);
2809                 else
2810                         exi = checkexport4(&vp->v_vfsp->vfs_fsid, &fid, vp);
2811 
2812                 if (exi == NULL) {
2813                         if (pre_tvp) {
2814                                 /*
2815                                  * If this vnode is a mounted-on vnode,
2816                                  * but the mounted-on file system is not
2817                                  * exported, send back the filehandle for
2818                                  * the mounted-on vnode, not the root of
2819                                  * the mounted-on file system.
2820                                  */
2821                                 VN_RELE(vp);
2822                                 vp = pre_tvp;
2823                                 exi = pre_exi;
2824                         } else {
2825                                 VN_RELE(vp);
2826                                 return (puterrno4(EACCES));
2827                         }
2828                 } else if (pre_tvp) {
2829                         /* we're done with pre_tvp now. release extra hold */
2830                         VN_RELE(pre_tvp);
2831                 }
2832 
2833                 cs->exi = exi;
2834 
2835                 /*
2836                  * Now we do a checkauth4. The reason is that
2837                  * this client/user may not have access to the new
2838                  * exported file system, and if they do,
2839                  * the client/user may be mapped to a different uid.
2840                  *
2841                  * We start with a new cr, because the checkauth4 done
2842                  * in the PUT*FH operation over wrote the cred's uid,
2843                  * gid, etc, and we want the real thing before calling
2844                  * checkauth4()
2845                  */
2846                 crfree(cs->cr);
2847                 cs->cr = crdup(cs->basecr);
2848 
2849                 oldvp = cs->vp;
2850                 cs->vp = vp;
2851                 is_newvp = TRUE;
2852 
2853                 stat = call_checkauth4(cs, req);
2854                 if (stat != NFS4_OK) {
2855                         VN_RELE(cs->vp);
2856                         cs->vp = oldvp;
2857                         return (stat);
2858                 }
2859         }
2860 
2861         /*
2862          * After various NFS checks, do a label check on the path
2863          * component. The label on this path should either be the
2864          * global zone's label or a zone's label. We are only
2865          * interested in the zone's label because exported files
2866          * in global zone is accessible (though read-only) to
2867          * clients. The exportability/visibility check is already
2868          * done before reaching this code.
2869          */
2870         if (is_system_labeled()) {
2871                 bslabel_t *clabel;
2872 
2873                 ASSERT(req->rq_label != NULL);
2874                 clabel = req->rq_label;
2875                 DTRACE_PROBE2(tx__rfs4__log__info__oplookup__clabel, char *,
2876                     "got client label from request(1)", struct svc_req *, req);
2877 
2878                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2879                         if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
2880                             cs->exi)) {
2881                                 error = EACCES;
2882                                 goto err_out;
2883                         }
2884                 } else {
2885                         /*
2886                          * We grant access to admin_low label clients
2887                          * only if the client is trusted, i.e. also
2888                          * running Solaris Trusted Extension.
2889                          */
2890                         struct sockaddr *ca;
2891                         int             addr_type;
2892                         void            *ipaddr;
2893                         tsol_tpc_t      *tp;
2894 
2895                         ca = (struct sockaddr *)svc_getrpccaller(
2896                             req->rq_xprt)->buf;
2897                         if (ca->sa_family == AF_INET) {
2898                                 addr_type = IPV4_VERSION;
2899                                 ipaddr = &((struct sockaddr_in *)ca)->sin_addr;
2900                         } else if (ca->sa_family == AF_INET6) {
2901                                 addr_type = IPV6_VERSION;
2902                                 ipaddr = &((struct sockaddr_in6 *)
2903                                     ca)->sin6_addr;
2904                         }
2905                         tp = find_tpc(ipaddr, addr_type, B_FALSE);
2906                         if (tp == NULL || tp->tpc_tp.tp_doi !=
2907                             l_admin_low->tsl_doi || tp->tpc_tp.host_type !=
2908                             SUN_CIPSO) {
2909                                 if (tp != NULL)
2910                                         TPC_RELE(tp);
2911                                 error = EACCES;
2912                                 goto err_out;
2913                         }
2914                         TPC_RELE(tp);
2915                 }
2916         }
2917 
2918         error = makefh4(&cs->fh, vp, cs->exi);
2919 
2920 err_out:
2921         if (error) {
2922                 if (is_newvp) {
2923                         VN_RELE(cs->vp);
2924                         cs->vp = oldvp;
2925                 } else
2926                         VN_RELE(vp);
2927                 return (puterrno4(error));
2928         }
2929 
2930         if (!is_newvp) {
2931                 if (cs->vp)
2932                         VN_RELE(cs->vp);
2933                 cs->vp = vp;
2934         } else if (oldvp)
2935                 VN_RELE(oldvp);
2936 
2937         /*
2938          * if did lookup on attrdir and didn't lookup .., set named
2939          * attr fh flag
2940          */
2941         if (attrdir && ! dotdot)
2942                 set_fh4_flag(&cs->fh, FH4_NAMEDATTR);
2943 
2944         /* Assume false for now, open proc will set this */
2945         cs->mandlock = FALSE;
2946 
2947         return (NFS4_OK);
2948 }
2949 
2950 /* ARGSUSED */
2951 static void
2952 rfs4_op_lookup(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2953     struct compound_state *cs)
2954 {
2955         LOOKUP4args *args = &argop->nfs_argop4_u.oplookup;
2956         LOOKUP4res *resp = &resop->nfs_resop4_u.oplookup;
2957         char *nm;
2958         uint_t len;
2959         struct sockaddr *ca;
2960         char *name = NULL;
2961         nfsstat4 status;
2962 
2963         DTRACE_NFSV4_2(op__lookup__start, struct compound_state *, cs,
2964             LOOKUP4args *, args);
2965 
2966         if (cs->vp == NULL) {
2967                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2968                 goto out;
2969         }
2970 
2971         if (cs->vp->v_type == VLNK) {
2972                 *cs->statusp = resp->status = NFS4ERR_SYMLINK;
2973                 goto out;
2974         }
2975 
2976         if (cs->vp->v_type != VDIR) {
2977                 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
2978                 goto out;
2979         }
2980 
2981         status = utf8_dir_verify(&args->objname);
2982         if (status != NFS4_OK) {
2983                 *cs->statusp = resp->status = status;
2984                 goto out;
2985         }
2986 
2987         nm = utf8_to_str(&args->objname, &len, NULL);
2988         if (nm == NULL) {
2989                 *cs->statusp = resp->status = NFS4ERR_INVAL;
2990                 goto out;
2991         }
2992 
2993         if (len > MAXNAMELEN) {
2994                 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
2995                 kmem_free(nm, len);
2996                 goto out;
2997         }
2998 
2999         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
3000         name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
3001             MAXPATHLEN  + 1);
3002 
3003         if (name == NULL) {
3004                 *cs->statusp = resp->status = NFS4ERR_INVAL;
3005                 kmem_free(nm, len);
3006                 goto out;
3007         }
3008 
3009         *cs->statusp = resp->status = do_rfs4_op_lookup(name, req, cs);
3010 
3011         if (name != nm)
3012                 kmem_free(name, MAXPATHLEN + 1);
3013         kmem_free(nm, len);
3014 
3015 out:
3016         DTRACE_NFSV4_2(op__lookup__done, struct compound_state *, cs,
3017             LOOKUP4res *, resp);
3018 }
3019 
3020 /* ARGSUSED */
3021 static void
3022 rfs4_op_lookupp(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req,
3023     struct compound_state *cs)
3024 {
3025         LOOKUPP4res *resp = &resop->nfs_resop4_u.oplookupp;
3026 
3027         DTRACE_NFSV4_1(op__lookupp__start, struct compound_state *, cs);
3028 
3029         if (cs->vp == NULL) {
3030                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3031                 goto out;
3032         }
3033 
3034         if (cs->vp->v_type != VDIR) {
3035                 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
3036                 goto out;
3037         }
3038 
3039         *cs->statusp = resp->status = do_rfs4_op_lookup("..", req, cs);
3040 
3041         /*
3042          * From NFSV4 Specification, LOOKUPP should not check for
3043          * NFS4ERR_WRONGSEC. Retrun NFS4_OK instead.
3044          */
3045         if (resp->status == NFS4ERR_WRONGSEC) {
3046                 *cs->statusp = resp->status = NFS4_OK;
3047         }
3048 
3049 out:
3050         DTRACE_NFSV4_2(op__lookupp__done, struct compound_state *, cs,
3051             LOOKUPP4res *, resp);
3052 }
3053 
3054 
3055 /*ARGSUSED2*/
3056 static void
3057 rfs4_op_openattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3058     struct compound_state *cs)
3059 {
3060         OPENATTR4args   *args = &argop->nfs_argop4_u.opopenattr;
3061         OPENATTR4res    *resp = &resop->nfs_resop4_u.opopenattr;
3062         vnode_t         *avp = NULL;
3063         int             lookup_flags = LOOKUP_XATTR, error;
3064         int             exp_ro = 0;
3065 
3066         DTRACE_NFSV4_2(op__openattr__start, struct compound_state *, cs,
3067             OPENATTR4args *, args);
3068 
3069         if (cs->vp == NULL) {
3070                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3071                 goto out;
3072         }
3073 
3074         if ((cs->vp->v_vfsp->vfs_flag & VFS_XATTR) == 0 &&
3075             !vfs_has_feature(cs->vp->v_vfsp, VFSFT_SYSATTR_VIEWS)) {
3076                 *cs->statusp = resp->status = puterrno4(ENOTSUP);
3077                 goto out;
3078         }
3079 
3080         /*
3081          * If file system supports passing ACE mask to VOP_ACCESS then
3082          * check for ACE_READ_NAMED_ATTRS, otherwise do legacy checks
3083          */
3084 
3085         if (vfs_has_feature(cs->vp->v_vfsp, VFSFT_ACEMASKONACCESS))
3086                 error = VOP_ACCESS(cs->vp, ACE_READ_NAMED_ATTRS,
3087                     V_ACE_MASK, cs->cr, NULL);
3088         else
3089                 error = ((VOP_ACCESS(cs->vp, VREAD, 0, cs->cr, NULL) != 0) &&
3090                     (VOP_ACCESS(cs->vp, VWRITE, 0, cs->cr, NULL) != 0) &&
3091                     (VOP_ACCESS(cs->vp, VEXEC, 0, cs->cr, NULL) != 0));
3092 
3093         if (error) {
3094                 *cs->statusp = resp->status = puterrno4(EACCES);
3095                 goto out;
3096         }
3097 
3098         /*
3099          * The CREATE_XATTR_DIR VOP flag cannot be specified if
3100          * the file system is exported read-only -- regardless of
3101          * createdir flag.  Otherwise the attrdir would be created
3102          * (assuming server fs isn't mounted readonly locally).  If
3103          * VOP_LOOKUP returns ENOENT in this case, the error will
3104          * be translated into EROFS.  ENOSYS is mapped to ENOTSUP
3105          * because specfs has no VOP_LOOKUP op, so the macro would
3106          * return ENOSYS.  EINVAL is returned by all (current)
3107          * Solaris file system implementations when any of their
3108          * restrictions are violated (xattr(dir) can't have xattrdir).
3109          * Returning NOTSUPP is more appropriate in this case
3110          * because the object will never be able to have an attrdir.
3111          */
3112         if (args->createdir && ! (exp_ro = rdonly4(req, cs)))
3113                 lookup_flags |= CREATE_XATTR_DIR;
3114 
3115         error = VOP_LOOKUP(cs->vp, "", &avp, NULL, lookup_flags, NULL, cs->cr,
3116             NULL, NULL, NULL);
3117 
3118         if (error) {
3119                 if (error == ENOENT && args->createdir && exp_ro)
3120                         *cs->statusp = resp->status = puterrno4(EROFS);
3121                 else if (error == EINVAL || error == ENOSYS)
3122                         *cs->statusp = resp->status = puterrno4(ENOTSUP);
3123                 else
3124                         *cs->statusp = resp->status = puterrno4(error);
3125                 goto out;
3126         }
3127 
3128         ASSERT(avp->v_flag & V_XATTRDIR);
3129 
3130         error = makefh4(&cs->fh, avp, cs->exi);
3131 
3132         if (error) {
3133                 VN_RELE(avp);
3134                 *cs->statusp = resp->status = puterrno4(error);
3135                 goto out;
3136         }
3137 
3138         VN_RELE(cs->vp);
3139         cs->vp = avp;
3140 
3141         /*
3142          * There is no requirement for an attrdir fh flag
3143          * because the attrdir has a vnode flag to distinguish
3144          * it from regular (non-xattr) directories.  The
3145          * FH4_ATTRDIR flag is set for future sanity checks.
3146          */
3147         set_fh4_flag(&cs->fh, FH4_ATTRDIR);
3148         *cs->statusp = resp->status = NFS4_OK;
3149 
3150 out:
3151         DTRACE_NFSV4_2(op__openattr__done, struct compound_state *, cs,
3152             OPENATTR4res *, resp);
3153 }
3154 
3155 static int
3156 do_io(int direction, vnode_t *vp, struct uio *uio, int ioflag, cred_t *cred,
3157     caller_context_t *ct)
3158 {
3159         int error;
3160         int i;
3161         clock_t delaytime;
3162 
3163         delaytime = MSEC_TO_TICK_ROUNDUP(rfs4_lock_delay);
3164 
3165         /*
3166          * Don't block on mandatory locks. If this routine returns
3167          * EAGAIN, the caller should return NFS4ERR_LOCKED.
3168          */
3169         uio->uio_fmode = FNONBLOCK;
3170 
3171         for (i = 0; i < rfs4_maxlock_tries; i++) {
3172 
3173 
3174                 if (direction == FREAD) {
3175                         (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, ct);
3176                         error = VOP_READ(vp, uio, ioflag, cred, ct);
3177                         VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, ct);
3178                 } else {
3179                         (void) VOP_RWLOCK(vp, V_WRITELOCK_TRUE, ct);
3180                         error = VOP_WRITE(vp, uio, ioflag, cred, ct);
3181                         VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, ct);
3182                 }
3183 
3184                 if (error != EAGAIN)
3185                         break;
3186 
3187                 if (i < rfs4_maxlock_tries - 1) {
3188                         delay(delaytime);
3189                         delaytime *= 2;
3190                 }
3191         }
3192 
3193         return (error);
3194 }
3195 
3196 /* ARGSUSED */
3197 static void
3198 rfs4_op_read(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3199     struct compound_state *cs)
3200 {
3201         READ4args *args = &argop->nfs_argop4_u.opread;
3202         READ4res *resp = &resop->nfs_resop4_u.opread;
3203         int error;
3204         int verror;
3205         vnode_t *vp;
3206         struct vattr va;
3207         struct iovec iov, *iovp = NULL;
3208         int iovcnt;
3209         struct uio uio;
3210         u_offset_t offset;
3211         bool_t *deleg = &cs->deleg;
3212         nfsstat4 stat;
3213         int in_crit = 0;
3214         mblk_t *mp = NULL;
3215         int alloc_err = 0;
3216         int rdma_used = 0;
3217         int loaned_buffers;
3218         caller_context_t ct;
3219         struct uio *uiop;
3220 
3221         DTRACE_NFSV4_2(op__read__start, struct compound_state *, cs,
3222             READ4args, args);
3223 
3224         vp = cs->vp;
3225         if (vp == NULL) {
3226                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3227                 goto out;
3228         }
3229         if (cs->access == CS_ACCESS_DENIED) {
3230                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
3231                 goto out;
3232         }
3233 
3234         if ((stat = rfs4_check_stateid(FREAD, vp, &args->stateid, FALSE,
3235             deleg, TRUE, &ct)) != NFS4_OK) {
3236                 *cs->statusp = resp->status = stat;
3237                 goto out;
3238         }
3239 
3240         /*
3241          * Enter the critical region before calling VOP_RWLOCK
3242          * to avoid a deadlock with write requests.
3243          */
3244         if (nbl_need_check(vp)) {
3245                 nbl_start_crit(vp, RW_READER);
3246                 in_crit = 1;
3247                 if (nbl_conflict(vp, NBL_READ, args->offset, args->count, 0,
3248                     &ct)) {
3249                         *cs->statusp = resp->status = NFS4ERR_LOCKED;
3250                         goto out;
3251                 }
3252         }
3253 
3254         if (args->wlist) {
3255                 if (args->count > clist_len(args->wlist)) {
3256                         *cs->statusp = resp->status = NFS4ERR_INVAL;
3257                         goto out;
3258                 }
3259                 rdma_used = 1;
3260         }
3261 
3262         /* use loaned buffers for TCP */
3263         loaned_buffers = (nfs_loaned_buffers && !rdma_used) ? 1 : 0;
3264 
3265         va.va_mask = AT_MODE|AT_SIZE|AT_UID;
3266         verror = VOP_GETATTR(vp, &va, 0, cs->cr, &ct);
3267 
3268         /*
3269          * If we can't get the attributes, then we can't do the
3270          * right access checking.  So, we'll fail the request.
3271          */
3272         if (verror) {
3273                 *cs->statusp = resp->status = puterrno4(verror);
3274                 goto out;
3275         }
3276 
3277         if (vp->v_type != VREG) {
3278                 *cs->statusp = resp->status =
3279                     ((vp->v_type == VDIR) ? NFS4ERR_ISDIR : NFS4ERR_INVAL);
3280                 goto out;
3281         }
3282 
3283         if (crgetuid(cs->cr) != va.va_uid &&
3284             (error = VOP_ACCESS(vp, VREAD, 0, cs->cr, &ct)) &&
3285             (error = VOP_ACCESS(vp, VEXEC, 0, cs->cr, &ct))) {
3286                 *cs->statusp = resp->status = puterrno4(error);
3287                 goto out;
3288         }
3289 
3290         if (MANDLOCK(vp, va.va_mode)) { /* XXX - V4 supports mand locking */
3291                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
3292                 goto out;
3293         }
3294 
3295         offset = args->offset;
3296         if (offset >= va.va_size) {
3297                 *cs->statusp = resp->status = NFS4_OK;
3298                 resp->eof = TRUE;
3299                 resp->data_len = 0;
3300                 resp->data_val = NULL;
3301                 resp->mblk = NULL;
3302                 /* RDMA */
3303                 resp->wlist = args->wlist;
3304                 resp->wlist_len = resp->data_len;
3305                 *cs->statusp = resp->status = NFS4_OK;
3306                 if (resp->wlist)
3307                         clist_zero_len(resp->wlist);
3308                 goto out;
3309         }
3310 
3311         if (args->count == 0) {
3312                 *cs->statusp = resp->status = NFS4_OK;
3313                 resp->eof = FALSE;
3314                 resp->data_len = 0;
3315                 resp->data_val = NULL;
3316                 resp->mblk = NULL;
3317                 /* RDMA */
3318                 resp->wlist = args->wlist;
3319                 resp->wlist_len = resp->data_len;
3320                 if (resp->wlist)
3321                         clist_zero_len(resp->wlist);
3322                 goto out;
3323         }
3324 
3325         /*
3326          * Do not allocate memory more than maximum allowed
3327          * transfer size
3328          */
3329         if (args->count > rfs4_tsize(req))
3330                 args->count = rfs4_tsize(req);
3331 
3332         if (loaned_buffers) {
3333                 uiop = (uio_t *)rfs_setup_xuio(vp);
3334                 ASSERT(uiop != NULL);
3335                 uiop->uio_segflg = UIO_SYSSPACE;
3336                 uiop->uio_loffset = args->offset;
3337                 uiop->uio_resid = args->count;
3338 
3339                 /* Jump to do the read if successful */
3340                 if (!VOP_REQZCBUF(vp, UIO_READ, (xuio_t *)uiop, cs->cr, &ct)) {
3341                         /*
3342                          * Need to hold the vnode until after VOP_RETZCBUF()
3343                          * is called.
3344                          */
3345                         VN_HOLD(vp);
3346                         goto doio_read;
3347                 }
3348 
3349                 DTRACE_PROBE2(nfss__i__reqzcbuf_failed, int,
3350                     uiop->uio_loffset, int, uiop->uio_resid);
3351 
3352                 uiop->uio_extflg = 0;
3353 
3354                 /* failure to setup for zero copy */
3355                 rfs_free_xuio((void *)uiop);
3356                 loaned_buffers = 0;
3357         }
3358 
3359         /*
3360          * If returning data via RDMA Write, then grab the chunk list. If we
3361          * aren't returning READ data w/RDMA_WRITE, then grab a mblk.
3362          */
3363         if (rdma_used) {
3364                 mp = NULL;
3365                 (void) rdma_get_wchunk(req, &iov, args->wlist);
3366                 uio.uio_iov = &iov;
3367                 uio.uio_iovcnt = 1;
3368         } else {
3369                 /*
3370                  * mp will contain the data to be sent out in the read reply.
3371                  * It will be freed after the reply has been sent.
3372                  */
3373                 mp = rfs_read_alloc(args->count, &iovp, &iovcnt);
3374                 ASSERT(mp != NULL);
3375                 ASSERT(alloc_err == 0);
3376                 uio.uio_iov = iovp;
3377                 uio.uio_iovcnt = iovcnt;
3378         }
3379 
3380         uio.uio_segflg = UIO_SYSSPACE;
3381         uio.uio_extflg = UIO_COPY_CACHED;
3382         uio.uio_loffset = args->offset;
3383         uio.uio_resid = args->count;
3384         uiop = &uio;
3385 
3386 doio_read:
3387         error = do_io(FREAD, vp, uiop, 0, cs->cr, &ct);
3388 
3389         va.va_mask = AT_SIZE;
3390         verror = VOP_GETATTR(vp, &va, 0, cs->cr, &ct);
3391 
3392         if (error) {
3393                 if (mp)
3394                         freemsg(mp);
3395                 *cs->statusp = resp->status = puterrno4(error);
3396                 goto out;
3397         }
3398 
3399         /* make mblk using zc buffers */
3400         if (loaned_buffers) {
3401                 mp = uio_to_mblk(uiop);
3402                 ASSERT(mp != NULL);
3403         }
3404 
3405         *cs->statusp = resp->status = NFS4_OK;
3406 
3407         ASSERT(uiop->uio_resid >= 0);
3408         resp->data_len = args->count - uiop->uio_resid;
3409         if (mp) {
3410                 resp->data_val = (char *)mp->b_datap->db_base;
3411                 rfs_rndup_mblks(mp, resp->data_len, loaned_buffers);
3412         } else {
3413                 resp->data_val = (caddr_t)iov.iov_base;
3414         }
3415 
3416         resp->mblk = mp;
3417 
3418         if (!verror && offset + resp->data_len == va.va_size)
3419                 resp->eof = TRUE;
3420         else
3421                 resp->eof = FALSE;
3422 
3423         if (rdma_used) {
3424                 if (!rdma_setup_read_data4(args, resp)) {
3425                         *cs->statusp = resp->status = NFS4ERR_INVAL;
3426                 }
3427         } else {
3428                 resp->wlist = NULL;
3429         }
3430 
3431 out:
3432         if (in_crit)
3433                 nbl_end_crit(vp);
3434 
3435         if (iovp != NULL)
3436                 kmem_free(iovp, iovcnt * sizeof (struct iovec));
3437 
3438         DTRACE_NFSV4_2(op__read__done, struct compound_state *, cs,
3439             READ4res *, resp);
3440 }
3441 
3442 static void
3443 rfs4_op_read_free(nfs_resop4 *resop)
3444 {
3445         READ4res        *resp = &resop->nfs_resop4_u.opread;
3446 
3447         if (resp->status == NFS4_OK && resp->mblk != NULL) {
3448                 freemsg(resp->mblk);
3449                 resp->mblk = NULL;
3450                 resp->data_val = NULL;
3451                 resp->data_len = 0;
3452         }
3453 }
3454 
3455 static void
3456 rfs4_op_readdir_free(nfs_resop4 * resop)
3457 {
3458         READDIR4res    *resp = &resop->nfs_resop4_u.opreaddir;
3459 
3460         if (resp->status == NFS4_OK && resp->mblk != NULL) {
3461                 freeb(resp->mblk);
3462                 resp->mblk = NULL;
3463                 resp->data_len = 0;
3464         }
3465 }
3466 
3467 
3468 /* ARGSUSED */
3469 static void
3470 rfs4_op_putpubfh(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req,
3471     struct compound_state *cs)
3472 {
3473         PUTPUBFH4res    *resp = &resop->nfs_resop4_u.opputpubfh;
3474         int             error;
3475         vnode_t         *vp;
3476         struct exportinfo *exi, *sav_exi;
3477         nfs_fh4_fmt_t   *fh_fmtp;
3478         nfs_export_t *ne = nfs_get_export();
3479 
3480         DTRACE_NFSV4_1(op__putpubfh__start, struct compound_state *, cs);
3481 
3482         if (cs->vp) {
3483                 VN_RELE(cs->vp);
3484                 cs->vp = NULL;
3485         }
3486 
3487         if (cs->cr)
3488                 crfree(cs->cr);
3489 
3490         cs->cr = crdup(cs->basecr);
3491 
3492         vp = ne->exi_public->exi_vp;
3493         if (vp == NULL) {
3494                 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
3495                 goto out;
3496         }
3497 
3498         error = makefh4(&cs->fh, vp, ne->exi_public);
3499         if (error != 0) {
3500                 *cs->statusp = resp->status = puterrno4(error);
3501                 goto out;
3502         }
3503         sav_exi = cs->exi;
3504         if (ne->exi_public == ne->exi_root) {
3505                 /*
3506                  * No filesystem is actually shared public, so we default
3507                  * to exi_root. In this case, we must check whether root
3508                  * is exported.
3509                  */
3510                 fh_fmtp = (nfs_fh4_fmt_t *)cs->fh.nfs_fh4_val;
3511 
3512                 /*
3513                  * if root filesystem is exported, the exportinfo struct that we
3514                  * should use is what checkexport4 returns, because root_exi is
3515                  * actually a mostly empty struct.
3516                  */
3517                 exi = checkexport4(&fh_fmtp->fh4_fsid,
3518                     (fid_t *)&fh_fmtp->fh4_xlen, NULL);
3519                 cs->exi = ((exi != NULL) ? exi : ne->exi_public);
3520         } else {
3521                 /*
3522                  * it's a properly shared filesystem
3523                  */
3524                 cs->exi = ne->exi_public;
3525         }
3526 
3527         if (is_system_labeled()) {
3528                 bslabel_t *clabel;
3529 
3530                 ASSERT(req->rq_label != NULL);
3531                 clabel = req->rq_label;
3532                 DTRACE_PROBE2(tx__rfs4__log__info__opputpubfh__clabel, char *,
3533                     "got client label from request(1)",
3534                     struct svc_req *, req);
3535                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3536                         if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3537                             cs->exi)) {
3538                                 *cs->statusp = resp->status =
3539                                     NFS4ERR_SERVERFAULT;
3540                                 goto out;
3541                         }
3542                 }
3543         }
3544 
3545         VN_HOLD(vp);
3546         cs->vp = vp;
3547 
3548         if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) {
3549                 VN_RELE(cs->vp);
3550                 cs->vp = NULL;
3551                 cs->exi = sav_exi;
3552                 goto out;
3553         }
3554 
3555         *cs->statusp = resp->status = NFS4_OK;
3556 out:
3557         DTRACE_NFSV4_2(op__putpubfh__done, struct compound_state *, cs,
3558             PUTPUBFH4res *, resp);
3559 }
3560 
3561 /*
3562  * XXX - issue with put*fh operations. Suppose /export/home is exported.
3563  * Suppose an NFS client goes to mount /export/home/joe. If /export, home,
3564  * or joe have restrictive search permissions, then we shouldn't let
3565  * the client get a file handle. This is easy to enforce. However, we
3566  * don't know what security flavor should be used until we resolve the
3567  * path name. Another complication is uid mapping. If root is
3568  * the user, then it will be mapped to the anonymous user by default,
3569  * but we won't know that till we've resolved the path name. And we won't
3570  * know what the anonymous user is.
3571  * Luckily, SECINFO is specified to take a full filename.
3572  * So what we will have to in rfs4_op_lookup is check that flavor of
3573  * the target object matches that of the request, and if root was the
3574  * caller, check for the root= and anon= options, and if necessary,
3575  * repeat the lookup using the right cred_t. But that's not done yet.
3576  */
3577 /* ARGSUSED */
3578 static void
3579 rfs4_op_putfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3580     struct compound_state *cs)
3581 {
3582         PUTFH4args *args = &argop->nfs_argop4_u.opputfh;
3583         PUTFH4res *resp = &resop->nfs_resop4_u.opputfh;
3584         nfs_fh4_fmt_t *fh_fmtp;
3585 
3586         DTRACE_NFSV4_2(op__putfh__start, struct compound_state *, cs,
3587             PUTFH4args *, args);
3588 
3589         if (cs->vp) {
3590                 VN_RELE(cs->vp);
3591                 cs->vp = NULL;
3592         }
3593 
3594         if (cs->cr) {
3595                 crfree(cs->cr);
3596                 cs->cr = NULL;
3597         }
3598 
3599 
3600         if (args->object.nfs_fh4_len < NFS_FH4_LEN) {
3601                 *cs->statusp = resp->status = NFS4ERR_BADHANDLE;
3602                 goto out;
3603         }
3604 
3605         fh_fmtp = (nfs_fh4_fmt_t *)args->object.nfs_fh4_val;
3606         cs->exi = checkexport4(&fh_fmtp->fh4_fsid, (fid_t *)&fh_fmtp->fh4_xlen,
3607             NULL);
3608 
3609         if (cs->exi == NULL) {
3610                 *cs->statusp = resp->status = NFS4ERR_STALE;
3611                 goto out;
3612         }
3613 
3614         cs->cr = crdup(cs->basecr);
3615 
3616         ASSERT(cs->cr != NULL);
3617 
3618         if (! (cs->vp = nfs4_fhtovp(&args->object, cs->exi, &resp->status))) {
3619                 *cs->statusp = resp->status;
3620                 goto out;
3621         }
3622 
3623         if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) {
3624                 VN_RELE(cs->vp);
3625                 cs->vp = NULL;
3626                 goto out;
3627         }
3628 
3629         nfs_fh4_copy(&args->object, &cs->fh);
3630         *cs->statusp = resp->status = NFS4_OK;
3631         cs->deleg = FALSE;
3632 
3633 out:
3634         DTRACE_NFSV4_2(op__putfh__done, struct compound_state *, cs,
3635             PUTFH4res *, resp);
3636 }
3637 
3638 /* ARGSUSED */
3639 static void
3640 rfs4_op_putrootfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3641     struct compound_state *cs)
3642 {
3643         PUTROOTFH4res *resp = &resop->nfs_resop4_u.opputrootfh;
3644         int error;
3645         fid_t fid;
3646         struct exportinfo *exi, *sav_exi;
3647 
3648         DTRACE_NFSV4_1(op__putrootfh__start, struct compound_state *, cs);
3649 
3650         if (cs->vp) {
3651                 VN_RELE(cs->vp);
3652                 cs->vp = NULL;
3653         }
3654 
3655         if (cs->cr)
3656                 crfree(cs->cr);
3657 
3658         cs->cr = crdup(cs->basecr);
3659 
3660         /*
3661          * Using rootdir, the system root vnode,
3662          * get its fid.
3663          */
3664         bzero(&fid, sizeof (fid));
3665         fid.fid_len = MAXFIDSZ;
3666         error = vop_fid_pseudo(ZONE_ROOTVP(), &fid);
3667         if (error != 0) {
3668                 *cs->statusp = resp->status = puterrno4(error);
3669                 goto out;
3670         }
3671 
3672         /*
3673          * Then use the root fsid & fid it to find out if it's exported
3674          *
3675          * If the server root isn't exported directly, then
3676          * it should at least be a pseudo export based on
3677          * one or more exports further down in the server's
3678          * file tree.
3679          */
3680         exi = checkexport4(&ZONE_ROOTVP()->v_vfsp->vfs_fsid, &fid, NULL);
3681         if (exi == NULL || exi->exi_export.ex_flags & EX_PUBLIC) {
3682                 NFS4_DEBUG(rfs4_debug,
3683                     (CE_WARN, "rfs4_op_putrootfh: export check failure"));
3684                 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
3685                 goto out;
3686         }
3687 
3688         /*
3689          * Now make a filehandle based on the root
3690          * export and root vnode.
3691          */
3692         error = makefh4(&cs->fh, ZONE_ROOTVP(), exi);
3693         if (error != 0) {
3694                 *cs->statusp = resp->status = puterrno4(error);
3695                 goto out;
3696         }
3697 
3698         sav_exi = cs->exi;
3699         cs->exi = exi;
3700 
3701         VN_HOLD(ZONE_ROOTVP());
3702         cs->vp = ZONE_ROOTVP();
3703 
3704         if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) {
3705                 VN_RELE(cs->vp);
3706                 cs->vp = NULL;
3707                 cs->exi = sav_exi;
3708                 goto out;
3709         }
3710 
3711         *cs->statusp = resp->status = NFS4_OK;
3712         cs->deleg = FALSE;
3713 out:
3714         DTRACE_NFSV4_2(op__putrootfh__done, struct compound_state *, cs,
3715             PUTROOTFH4res *, resp);
3716 }
3717 
3718 /*
3719  * readlink: args: CURRENT_FH.
3720  *      res: status. If success - CURRENT_FH unchanged, return linktext.
3721  */
3722 
3723 /* ARGSUSED */
3724 static void
3725 rfs4_op_readlink(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3726     struct compound_state *cs)
3727 {
3728         READLINK4res *resp = &resop->nfs_resop4_u.opreadlink;
3729         int error;
3730         vnode_t *vp;
3731         struct iovec iov;
3732         struct vattr va;
3733         struct uio uio;
3734         char *data;
3735         struct sockaddr *ca;
3736         char *name = NULL;
3737         int is_referral;
3738 
3739         DTRACE_NFSV4_1(op__readlink__start, struct compound_state *, cs);
3740 
3741         /* CURRENT_FH: directory */
3742         vp = cs->vp;
3743         if (vp == NULL) {
3744                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3745                 goto out;
3746         }
3747 
3748         if (cs->access == CS_ACCESS_DENIED) {
3749                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
3750                 goto out;
3751         }
3752 
3753         /* Is it a referral? */
3754         if (vn_is_nfs_reparse(vp, cs->cr) && client_is_downrev(req)) {
3755 
3756                 is_referral = 1;
3757 
3758         } else {
3759 
3760                 is_referral = 0;
3761 
3762                 if (vp->v_type == VDIR) {
3763                         *cs->statusp = resp->status = NFS4ERR_ISDIR;
3764                         goto out;
3765                 }
3766 
3767                 if (vp->v_type != VLNK) {
3768                         *cs->statusp = resp->status = NFS4ERR_INVAL;
3769                         goto out;
3770                 }
3771 
3772         }
3773 
3774         va.va_mask = AT_MODE;
3775         error = VOP_GETATTR(vp, &va, 0, cs->cr, NULL);
3776         if (error) {
3777                 *cs->statusp = resp->status = puterrno4(error);
3778                 goto out;
3779         }
3780 
3781         if (MANDLOCK(vp, va.va_mode)) {
3782                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
3783                 goto out;
3784         }
3785 
3786         data = kmem_alloc(MAXPATHLEN + 1, KM_SLEEP);
3787 
3788         if (is_referral) {
3789                 char *s;
3790                 size_t strsz;
3791 
3792                 /* Get an artificial symlink based on a referral */
3793                 s = build_symlink(vp, cs->cr, &strsz);
3794                 global_svstat_ptr[4][NFS_REFERLINKS].value.ui64++;
3795                 DTRACE_PROBE2(nfs4serv__func__referral__reflink,
3796                     vnode_t *, vp, char *, s);
3797                 if (s == NULL)
3798                         error = EINVAL;
3799                 else {
3800                         error = 0;
3801                         (void) strlcpy(data, s, MAXPATHLEN + 1);
3802                         kmem_free(s, strsz);
3803                 }
3804 
3805         } else {
3806 
3807                 iov.iov_base = data;
3808                 iov.iov_len = MAXPATHLEN;
3809                 uio.uio_iov = &iov;
3810                 uio.uio_iovcnt = 1;
3811                 uio.uio_segflg = UIO_SYSSPACE;
3812                 uio.uio_extflg = UIO_COPY_CACHED;
3813                 uio.uio_loffset = 0;
3814                 uio.uio_resid = MAXPATHLEN;
3815 
3816                 error = VOP_READLINK(vp, &uio, cs->cr, NULL);
3817 
3818                 if (!error)
3819                         *(data + MAXPATHLEN - uio.uio_resid) = '\0';
3820         }
3821 
3822         if (error) {
3823                 kmem_free((caddr_t)data, (uint_t)MAXPATHLEN + 1);
3824                 *cs->statusp = resp->status = puterrno4(error);
3825                 goto out;
3826         }
3827 
3828         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
3829         name = nfscmd_convname(ca, cs->exi, data, NFSCMD_CONV_OUTBOUND,
3830             MAXPATHLEN  + 1);
3831 
3832         if (name == NULL) {
3833                 /*
3834                  * Even though the conversion failed, we return
3835                  * something. We just don't translate it.
3836                  */
3837                 name = data;
3838         }
3839 
3840         /*
3841          * treat link name as data
3842          */
3843         (void) str_to_utf8(name, (utf8string *)&resp->link);
3844 
3845         if (name != data)
3846                 kmem_free(name, MAXPATHLEN + 1);
3847         kmem_free((caddr_t)data, (uint_t)MAXPATHLEN + 1);
3848         *cs->statusp = resp->status = NFS4_OK;
3849 
3850 out:
3851         DTRACE_NFSV4_2(op__readlink__done, struct compound_state *, cs,
3852             READLINK4res *, resp);
3853 }
3854 
3855 static void
3856 rfs4_op_readlink_free(nfs_resop4 *resop)
3857 {
3858         READLINK4res *resp = &resop->nfs_resop4_u.opreadlink;
3859         utf8string *symlink = (utf8string *)&resp->link;
3860 
3861         if (symlink->utf8string_val) {
3862                 UTF8STRING_FREE(*symlink)
3863         }
3864 }
3865 
3866 /*
3867  * release_lockowner:
3868  *      Release any state associated with the supplied
3869  *      lockowner. Note if any lo_state is holding locks we will not
3870  *      rele that lo_state and thus the lockowner will not be destroyed.
3871  *      A client using lock after the lock owner stateid has been released
3872  *      will suffer the consequence of NFS4ERR_BAD_STATEID and would have
3873  *      to reissue the lock with new_lock_owner set to TRUE.
3874  *      args: lock_owner
3875  *      res:  status
3876  */
3877 /* ARGSUSED */
3878 static void
3879 rfs4_op_release_lockowner(nfs_argop4 *argop, nfs_resop4 *resop,
3880     struct svc_req *req, struct compound_state *cs)
3881 {
3882         RELEASE_LOCKOWNER4args *ap = &argop->nfs_argop4_u.oprelease_lockowner;
3883         RELEASE_LOCKOWNER4res *resp = &resop->nfs_resop4_u.oprelease_lockowner;
3884         rfs4_lockowner_t *lo;
3885         rfs4_openowner_t *oo;
3886         rfs4_state_t *sp;
3887         rfs4_lo_state_t *lsp;
3888         rfs4_client_t *cp;
3889         bool_t create = FALSE;
3890         locklist_t *llist;
3891         sysid_t sysid;
3892 
3893         DTRACE_NFSV4_2(op__release__lockowner__start, struct compound_state *,
3894             cs, RELEASE_LOCKOWNER4args *, ap);
3895 
3896         /* Make sure there is a clientid around for this request */
3897         cp = rfs4_findclient_by_id(ap->lock_owner.clientid, FALSE);
3898 
3899         if (cp == NULL) {
3900                 *cs->statusp = resp->status =
3901                     rfs4_check_clientid(&ap->lock_owner.clientid, 0);
3902                 goto out;
3903         }
3904         rfs4_client_rele(cp);
3905 
3906         lo = rfs4_findlockowner(&ap->lock_owner, &create);
3907         if (lo == NULL) {
3908                 *cs->statusp = resp->status = NFS4_OK;
3909                 goto out;
3910         }
3911         ASSERT(lo->rl_client != NULL);
3912 
3913         /*
3914          * Check for EXPIRED client. If so will reap state with in a lease
3915          * period or on next set_clientid_confirm step
3916          */
3917         if (rfs4_lease_expired(lo->rl_client)) {
3918                 rfs4_lockowner_rele(lo);
3919                 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
3920                 goto out;
3921         }
3922 
3923         /*
3924          * If no sysid has been assigned, then no locks exist; just return.
3925          */
3926         rfs4_dbe_lock(lo->rl_client->rc_dbe);
3927         if (lo->rl_client->rc_sysidt == LM_NOSYSID) {
3928                 rfs4_lockowner_rele(lo);
3929                 rfs4_dbe_unlock(lo->rl_client->rc_dbe);
3930                 goto out;
3931         }
3932 
3933         sysid = lo->rl_client->rc_sysidt;
3934         rfs4_dbe_unlock(lo->rl_client->rc_dbe);
3935 
3936         /*
3937          * Mark the lockowner invalid.
3938          */
3939         rfs4_dbe_hide(lo->rl_dbe);
3940 
3941         /*
3942          * sysid-pid pair should now not be used since the lockowner is
3943          * invalid. If the client were to instantiate the lockowner again
3944          * it would be assigned a new pid. Thus we can get the list of
3945          * current locks.
3946          */
3947 
3948         llist = flk_get_active_locks(sysid, lo->rl_pid);
3949         /* If we are still holding locks fail */
3950         if (llist != NULL) {
3951 
3952                 *cs->statusp = resp->status = NFS4ERR_LOCKS_HELD;
3953 
3954                 flk_free_locklist(llist);
3955                 /*
3956                  * We need to unhide the lockowner so the client can
3957                  * try it again. The bad thing here is if the client
3958                  * has a logic error that took it here in the first place
3959                  * they probably have lost accounting of the locks that it
3960                  * is holding. So we may have dangling state until the
3961                  * open owner state is reaped via close. One scenario
3962                  * that could possibly occur is that the client has
3963                  * sent the unlock request(s) in separate threads
3964                  * and has not waited for the replies before sending the
3965                  * RELEASE_LOCKOWNER request. Presumably, it would expect
3966                  * and deal appropriately with NFS4ERR_LOCKS_HELD, by
3967                  * reissuing the request.
3968                  */
3969                 rfs4_dbe_unhide(lo->rl_dbe);
3970                 rfs4_lockowner_rele(lo);
3971                 goto out;
3972         }
3973 
3974         /*
3975          * For the corresponding client we need to check each open
3976          * owner for any opens that have lockowner state associated
3977          * with this lockowner.
3978          */
3979 
3980         rfs4_dbe_lock(lo->rl_client->rc_dbe);
3981         for (oo = list_head(&lo->rl_client->rc_openownerlist); oo != NULL;
3982             oo = list_next(&lo->rl_client->rc_openownerlist, oo)) {
3983 
3984                 rfs4_dbe_lock(oo->ro_dbe);
3985                 for (sp = list_head(&oo->ro_statelist); sp != NULL;
3986                     sp = list_next(&oo->ro_statelist, sp)) {
3987 
3988                         rfs4_dbe_lock(sp->rs_dbe);
3989                         for (lsp = list_head(&sp->rs_lostatelist);
3990                             lsp != NULL;
3991                             lsp = list_next(&sp->rs_lostatelist, lsp)) {
3992                                 if (lsp->rls_locker == lo) {
3993                                         rfs4_dbe_lock(lsp->rls_dbe);
3994                                         rfs4_dbe_invalidate(lsp->rls_dbe);
3995                                         rfs4_dbe_unlock(lsp->rls_dbe);
3996                                 }
3997                         }
3998                         rfs4_dbe_unlock(sp->rs_dbe);
3999                 }
4000                 rfs4_dbe_unlock(oo->ro_dbe);
4001         }
4002         rfs4_dbe_unlock(lo->rl_client->rc_dbe);
4003 
4004         rfs4_lockowner_rele(lo);
4005 
4006         *cs->statusp = resp->status = NFS4_OK;
4007 
4008 out:
4009         DTRACE_NFSV4_2(op__release__lockowner__done, struct compound_state *,
4010             cs, RELEASE_LOCKOWNER4res *, resp);
4011 }
4012 
4013 /*
4014  * short utility function to lookup a file and recall the delegation
4015  */
4016 static rfs4_file_t *
4017 rfs4_lookup_and_findfile(vnode_t *dvp, char *nm, vnode_t **vpp,
4018     int *lkup_error, cred_t *cr)
4019 {
4020         vnode_t *vp;
4021         rfs4_file_t *fp = NULL;
4022         bool_t fcreate = FALSE;
4023         int error;
4024 
4025         if (vpp)
4026                 *vpp = NULL;
4027 
4028         if ((error = VOP_LOOKUP(dvp, nm, &vp, NULL, 0, NULL, cr, NULL, NULL,
4029             NULL)) == 0) {
4030                 if (vp->v_type == VREG)
4031                         fp = rfs4_findfile(vp, NULL, &fcreate);
4032                 if (vpp)
4033                         *vpp = vp;
4034                 else
4035                         VN_RELE(vp);
4036         }
4037 
4038         if (lkup_error)
4039                 *lkup_error = error;
4040 
4041         return (fp);
4042 }
4043 
4044 /*
4045  * remove: args: CURRENT_FH: directory; name.
4046  *      res: status. If success - CURRENT_FH unchanged, return change_info
4047  *              for directory.
4048  */
4049 /* ARGSUSED */
4050 static void
4051 rfs4_op_remove(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4052     struct compound_state *cs)
4053 {
4054         REMOVE4args *args = &argop->nfs_argop4_u.opremove;
4055         REMOVE4res *resp = &resop->nfs_resop4_u.opremove;
4056         int error;
4057         vnode_t *dvp, *vp;
4058         struct vattr bdva, idva, adva;
4059         char *nm;
4060         uint_t len;
4061         rfs4_file_t *fp;
4062         int in_crit = 0;
4063         bslabel_t *clabel;
4064         struct sockaddr *ca;
4065         char *name = NULL;
4066         nfsstat4 status;
4067 
4068         DTRACE_NFSV4_2(op__remove__start, struct compound_state *, cs,
4069             REMOVE4args *, args);
4070 
4071         /* CURRENT_FH: directory */
4072         dvp = cs->vp;
4073         if (dvp == NULL) {
4074                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4075                 goto out;
4076         }
4077 
4078         if (cs->access == CS_ACCESS_DENIED) {
4079                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4080                 goto out;
4081         }
4082 
4083         /*
4084          * If there is an unshared filesystem mounted on this vnode,
4085          * Do not allow to remove anything in this directory.
4086          */
4087         if (vn_ismntpt(dvp)) {
4088                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4089                 goto out;
4090         }
4091 
4092         if (dvp->v_type != VDIR) {
4093                 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
4094                 goto out;
4095         }
4096 
4097         status = utf8_dir_verify(&args->target);
4098         if (status != NFS4_OK) {
4099                 *cs->statusp = resp->status = status;
4100                 goto out;
4101         }
4102 
4103         /*
4104          * Lookup the file so that we can check if it's a directory
4105          */
4106         nm = utf8_to_fn(&args->target, &len, NULL);
4107         if (nm == NULL) {
4108                 *cs->statusp = resp->status = NFS4ERR_INVAL;
4109                 goto out;
4110         }
4111 
4112         if (len > MAXNAMELEN) {
4113                 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
4114                 kmem_free(nm, len);
4115                 goto out;
4116         }
4117 
4118         if (rdonly4(req, cs)) {
4119                 *cs->statusp = resp->status = NFS4ERR_ROFS;
4120                 kmem_free(nm, len);
4121                 goto out;
4122         }
4123 
4124         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
4125         name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
4126             MAXPATHLEN  + 1);
4127 
4128         if (name == NULL) {
4129                 *cs->statusp = resp->status = NFS4ERR_INVAL;
4130                 kmem_free(nm, len);
4131                 goto out;
4132         }
4133 
4134         /*
4135          * Lookup the file to determine type and while we are see if
4136          * there is a file struct around and check for delegation.
4137          * We don't need to acquire va_seq before this lookup, if
4138          * it causes an update, cinfo.before will not match, which will
4139          * trigger a cache flush even if atomic is TRUE.
4140          */
4141         if (fp = rfs4_lookup_and_findfile(dvp, name, &vp, &error, cs->cr)) {
4142                 if (rfs4_check_delegated_byfp(FWRITE, fp, TRUE, TRUE, TRUE,
4143                     NULL)) {
4144                         VN_RELE(vp);
4145                         rfs4_file_rele(fp);
4146                         *cs->statusp = resp->status = NFS4ERR_DELAY;
4147                         if (nm != name)
4148                                 kmem_free(name, MAXPATHLEN + 1);
4149                         kmem_free(nm, len);
4150                         goto out;
4151                 }
4152         }
4153 
4154         /* Didn't find anything to remove */
4155         if (vp == NULL) {
4156                 *cs->statusp = resp->status = error;
4157                 if (nm != name)
4158                         kmem_free(name, MAXPATHLEN + 1);
4159                 kmem_free(nm, len);
4160                 goto out;
4161         }
4162 
4163         if (nbl_need_check(vp)) {
4164                 nbl_start_crit(vp, RW_READER);
4165                 in_crit = 1;
4166                 if (nbl_conflict(vp, NBL_REMOVE, 0, 0, 0, NULL)) {
4167                         *cs->statusp = resp->status = NFS4ERR_FILE_OPEN;
4168                         if (nm != name)
4169                                 kmem_free(name, MAXPATHLEN + 1);
4170                         kmem_free(nm, len);
4171                         nbl_end_crit(vp);
4172                         VN_RELE(vp);
4173                         if (fp) {
4174                                 rfs4_clear_dont_grant(fp);
4175                                 rfs4_file_rele(fp);
4176                         }
4177                         goto out;
4178                 }
4179         }
4180 
4181         /* check label before allowing removal */
4182         if (is_system_labeled()) {
4183                 ASSERT(req->rq_label != NULL);
4184                 clabel = req->rq_label;
4185                 DTRACE_PROBE2(tx__rfs4__log__info__opremove__clabel, char *,
4186                     "got client label from request(1)",
4187                     struct svc_req *, req);
4188                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
4189                         if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
4190                             cs->exi)) {
4191                                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4192                                 if (name != nm)
4193                                         kmem_free(name, MAXPATHLEN + 1);
4194                                 kmem_free(nm, len);
4195                                 if (in_crit)
4196                                         nbl_end_crit(vp);
4197                                 VN_RELE(vp);
4198                                 if (fp) {
4199                                         rfs4_clear_dont_grant(fp);
4200                                         rfs4_file_rele(fp);
4201                                 }
4202                                 goto out;
4203                         }
4204                 }
4205         }
4206 
4207         /* Get dir "before" change value */
4208         bdva.va_mask = AT_CTIME|AT_SEQ;
4209         error = VOP_GETATTR(dvp, &bdva, 0, cs->cr, NULL);
4210         if (error) {
4211                 *cs->statusp = resp->status = puterrno4(error);
4212                 if (nm != name)
4213                         kmem_free(name, MAXPATHLEN + 1);
4214                 kmem_free(nm, len);
4215                 if (in_crit)
4216                         nbl_end_crit(vp);
4217                 VN_RELE(vp);
4218                 if (fp) {
4219                         rfs4_clear_dont_grant(fp);
4220                         rfs4_file_rele(fp);
4221                 }
4222                 goto out;
4223         }
4224         NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bdva.va_ctime)
4225 
4226         /* Actually do the REMOVE operation */
4227         if (vp->v_type == VDIR) {
4228                 /*
4229                  * Can't remove a directory that has a mounted-on filesystem.
4230                  */
4231                 if (vn_ismntpt(vp)) {
4232                         error = EACCES;
4233                 } else {
4234                         /*
4235                          * System V defines rmdir to return EEXIST,
4236                          * not ENOTEMPTY, if the directory is not
4237                          * empty.  A System V NFS server needs to map
4238                          * NFS4ERR_EXIST to NFS4ERR_NOTEMPTY to
4239                          * transmit over the wire.
4240                          */
4241                         if ((error = VOP_RMDIR(dvp, name, ZONE_ROOTVP(), cs->cr,
4242                             NULL, 0)) == EEXIST)
4243                                 error = ENOTEMPTY;
4244                 }
4245         } else {
4246                 if ((error = VOP_REMOVE(dvp, name, cs->cr, NULL, 0)) == 0 &&
4247                     fp != NULL) {
4248                         struct vattr va;
4249                         vnode_t *tvp;
4250 
4251                         rfs4_dbe_lock(fp->rf_dbe);
4252                         tvp = fp->rf_vp;
4253                         if (tvp)
4254                                 VN_HOLD(tvp);
4255                         rfs4_dbe_unlock(fp->rf_dbe);
4256 
4257                         if (tvp) {
4258                                 /*
4259                                  * This is va_seq safe because we are not
4260                                  * manipulating dvp.
4261                                  */
4262                                 va.va_mask = AT_NLINK;
4263                                 if (!VOP_GETATTR(tvp, &va, 0, cs->cr, NULL) &&
4264                                     va.va_nlink == 0) {
4265                                         /* Remove state on file remove */
4266                                         if (in_crit) {
4267                                                 nbl_end_crit(vp);
4268                                                 in_crit = 0;
4269                                         }
4270                                         rfs4_close_all_state(fp);
4271                                 }
4272                                 VN_RELE(tvp);
4273                         }
4274                 }
4275         }
4276 
4277         if (in_crit)
4278                 nbl_end_crit(vp);
4279         VN_RELE(vp);
4280 
4281         if (fp) {
4282                 rfs4_clear_dont_grant(fp);
4283                 rfs4_file_rele(fp);
4284         }
4285         if (nm != name)
4286                 kmem_free(name, MAXPATHLEN + 1);
4287         kmem_free(nm, len);
4288 
4289         if (error) {
4290                 *cs->statusp = resp->status = puterrno4(error);
4291                 goto out;
4292         }
4293 
4294         /*
4295          * Get the initial "after" sequence number, if it fails, set to zero
4296          */
4297         idva.va_mask = AT_SEQ;
4298         if (VOP_GETATTR(dvp, &idva, 0, cs->cr, NULL))
4299                 idva.va_seq = 0;
4300 
4301         /*
4302          * Force modified data and metadata out to stable storage.
4303          */
4304         (void) VOP_FSYNC(dvp, 0, cs->cr, NULL);
4305 
4306         /*
4307          * Get "after" change value, if it fails, simply return the
4308          * before value.
4309          */
4310         adva.va_mask = AT_CTIME|AT_SEQ;
4311         if (VOP_GETATTR(dvp, &adva, 0, cs->cr, NULL)) {
4312                 adva.va_ctime = bdva.va_ctime;
4313                 adva.va_seq = 0;
4314         }
4315 
4316         NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, adva.va_ctime)
4317 
4318         /*
4319          * The cinfo.atomic = TRUE only if we have
4320          * non-zero va_seq's, and it has incremented by exactly one
4321          * during the VOP_REMOVE/RMDIR and it didn't change during
4322          * the VOP_FSYNC.
4323          */
4324         if (bdva.va_seq && idva.va_seq && adva.va_seq &&
4325             idva.va_seq == (bdva.va_seq + 1) && idva.va_seq == adva.va_seq)
4326                 resp->cinfo.atomic = TRUE;
4327         else
4328                 resp->cinfo.atomic = FALSE;
4329 
4330         *cs->statusp = resp->status = NFS4_OK;
4331 
4332 out:
4333         DTRACE_NFSV4_2(op__remove__done, struct compound_state *, cs,
4334             REMOVE4res *, resp);
4335 }
4336 
4337 /*
4338  * rename: args: SAVED_FH: from directory, CURRENT_FH: target directory,
4339  *              oldname and newname.
4340  *      res: status. If success - CURRENT_FH unchanged, return change_info
4341  *              for both from and target directories.
4342  */
4343 /* ARGSUSED */
4344 static void
4345 rfs4_op_rename(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4346     struct compound_state *cs)
4347 {
4348         RENAME4args *args = &argop->nfs_argop4_u.oprename;
4349         RENAME4res *resp = &resop->nfs_resop4_u.oprename;
4350         int error;
4351         vnode_t *odvp;
4352         vnode_t *ndvp;
4353         vnode_t *srcvp, *targvp, *tvp;
4354         struct vattr obdva, oidva, oadva;
4355         struct vattr nbdva, nidva, nadva;
4356         char *onm, *nnm;
4357         uint_t olen, nlen;
4358         rfs4_file_t *fp, *sfp;
4359         int in_crit_src, in_crit_targ;
4360         int fp_rele_grant_hold, sfp_rele_grant_hold;
4361         int unlinked;
4362         bslabel_t *clabel;
4363         struct sockaddr *ca;
4364         char *converted_onm = NULL;
4365         char *converted_nnm = NULL;
4366         nfsstat4 status;
4367 
4368         DTRACE_NFSV4_2(op__rename__start, struct compound_state *, cs,
4369             RENAME4args *, args);
4370 
4371         fp = sfp = NULL;
4372         srcvp = targvp = tvp = NULL;
4373         in_crit_src = in_crit_targ = 0;
4374         fp_rele_grant_hold = sfp_rele_grant_hold = 0;
4375         unlinked = 0;
4376 
4377         /* CURRENT_FH: target directory */
4378         ndvp = cs->vp;
4379         if (ndvp == NULL) {
4380                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4381                 goto out;
4382         }
4383 
4384         /* SAVED_FH: from directory */
4385         odvp = cs->saved_vp;
4386         if (odvp == NULL) {
4387                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4388                 goto out;
4389         }
4390 
4391         if (cs->access == CS_ACCESS_DENIED) {
4392                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4393                 goto out;
4394         }
4395 
4396         /*
4397          * If there is an unshared filesystem mounted on this vnode,
4398          * do not allow to rename objects in this directory.
4399          */
4400         if (vn_ismntpt(odvp)) {
4401                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4402                 goto out;
4403         }
4404 
4405         /*
4406          * If there is an unshared filesystem mounted on this vnode,
4407          * do not allow to rename to this directory.
4408          */
4409         if (vn_ismntpt(ndvp)) {
4410                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4411                 goto out;
4412         }
4413 
4414         if (odvp->v_type != VDIR || ndvp->v_type != VDIR) {
4415                 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
4416                 goto out;
4417         }
4418 
4419         if (cs->saved_exi != cs->exi) {
4420                 *cs->statusp = resp->status = NFS4ERR_XDEV;
4421                 goto out;
4422         }
4423 
4424         status = utf8_dir_verify(&args->oldname);
4425         if (status != NFS4_OK) {
4426                 *cs->statusp = resp->status = status;
4427                 goto out;
4428         }
4429 
4430         status = utf8_dir_verify(&args->newname);
4431         if (status != NFS4_OK) {
4432                 *cs->statusp = resp->status = status;
4433                 goto out;
4434         }
4435 
4436         onm = utf8_to_fn(&args->oldname, &olen, NULL);
4437         if (onm == NULL) {
4438                 *cs->statusp = resp->status = NFS4ERR_INVAL;
4439                 goto out;
4440         }
4441         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
4442         nlen = MAXPATHLEN + 1;
4443         converted_onm = nfscmd_convname(ca, cs->exi, onm, NFSCMD_CONV_INBOUND,
4444             nlen);
4445 
4446         if (converted_onm == NULL) {
4447                 *cs->statusp = resp->status = NFS4ERR_INVAL;
4448                 kmem_free(onm, olen);
4449                 goto out;
4450         }
4451 
4452         nnm = utf8_to_fn(&args->newname, &nlen, NULL);
4453         if (nnm == NULL) {
4454                 *cs->statusp = resp->status = NFS4ERR_INVAL;
4455                 if (onm != converted_onm)
4456                         kmem_free(converted_onm, MAXPATHLEN + 1);
4457                 kmem_free(onm, olen);
4458                 goto out;
4459         }
4460         converted_nnm = nfscmd_convname(ca, cs->exi, nnm, NFSCMD_CONV_INBOUND,
4461             MAXPATHLEN  + 1);
4462 
4463         if (converted_nnm == NULL) {
4464                 *cs->statusp = resp->status = NFS4ERR_INVAL;
4465                 kmem_free(nnm, nlen);
4466                 nnm = NULL;
4467                 if (onm != converted_onm)
4468                         kmem_free(converted_onm, MAXPATHLEN + 1);
4469                 kmem_free(onm, olen);
4470                 goto out;
4471         }
4472 
4473 
4474         if (olen > MAXNAMELEN || nlen > MAXNAMELEN) {
4475                 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
4476                 kmem_free(onm, olen);
4477                 kmem_free(nnm, nlen);
4478                 goto out;
4479         }
4480 
4481 
4482         if (rdonly4(req, cs)) {
4483                 *cs->statusp = resp->status = NFS4ERR_ROFS;
4484                 if (onm != converted_onm)
4485                         kmem_free(converted_onm, MAXPATHLEN + 1);
4486                 kmem_free(onm, olen);
4487                 if (nnm != converted_nnm)
4488                         kmem_free(converted_nnm, MAXPATHLEN + 1);
4489                 kmem_free(nnm, nlen);
4490                 goto out;
4491         }
4492 
4493         /* check label of the target dir */
4494         if (is_system_labeled()) {
4495                 ASSERT(req->rq_label != NULL);
4496                 clabel = req->rq_label;
4497                 DTRACE_PROBE2(tx__rfs4__log__info__oprename__clabel, char *,
4498                     "got client label from request(1)",
4499                     struct svc_req *, req);
4500                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
4501                         if (!do_rfs_label_check(clabel, ndvp,
4502                             EQUALITY_CHECK, cs->exi)) {
4503                                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4504                                 goto err_out;
4505                         }
4506                 }
4507         }
4508 
4509         /*
4510          * Is the source a file and have a delegation?
4511          * We don't need to acquire va_seq before these lookups, if
4512          * it causes an update, cinfo.before will not match, which will
4513          * trigger a cache flush even if atomic is TRUE.
4514          */
4515         if (sfp = rfs4_lookup_and_findfile(odvp, converted_onm, &srcvp,
4516             &error, cs->cr)) {
4517                 if (rfs4_check_delegated_byfp(FWRITE, sfp, TRUE, TRUE, TRUE,
4518                     NULL)) {
4519                         *cs->statusp = resp->status = NFS4ERR_DELAY;
4520                         goto err_out;
4521                 }
4522         }
4523 
4524         if (srcvp == NULL) {
4525                 *cs->statusp = resp->status = puterrno4(error);
4526                 if (onm != converted_onm)
4527                         kmem_free(converted_onm, MAXPATHLEN + 1);
4528                 kmem_free(onm, olen);
4529                 if (nnm != converted_nnm)
4530                         kmem_free(converted_nnm, MAXPATHLEN + 1);
4531                 kmem_free(nnm, nlen);
4532                 goto out;
4533         }
4534 
4535         sfp_rele_grant_hold = 1;
4536 
4537         /* Does the destination exist and a file and have a delegation? */
4538         if (fp = rfs4_lookup_and_findfile(ndvp, converted_nnm, &targvp,
4539             NULL, cs->cr)) {
4540                 if (rfs4_check_delegated_byfp(FWRITE, fp, TRUE, TRUE, TRUE,
4541                     NULL)) {
4542                         *cs->statusp = resp->status = NFS4ERR_DELAY;
4543                         goto err_out;
4544                 }
4545         }
4546         fp_rele_grant_hold = 1;
4547 
4548         /* Check for NBMAND lock on both source and target */
4549         if (nbl_need_check(srcvp)) {
4550                 nbl_start_crit(srcvp, RW_READER);
4551                 in_crit_src = 1;
4552                 if (nbl_conflict(srcvp, NBL_RENAME, 0, 0, 0, NULL)) {
4553                         *cs->statusp = resp->status = NFS4ERR_FILE_OPEN;
4554                         goto err_out;
4555                 }
4556         }
4557 
4558         if (targvp && nbl_need_check(targvp)) {
4559                 nbl_start_crit(targvp, RW_READER);
4560                 in_crit_targ = 1;
4561                 if (nbl_conflict(targvp, NBL_REMOVE, 0, 0, 0, NULL)) {
4562                         *cs->statusp = resp->status = NFS4ERR_FILE_OPEN;
4563                         goto err_out;
4564                 }
4565         }
4566 
4567         /* Get source "before" change value */
4568         obdva.va_mask = AT_CTIME|AT_SEQ;
4569         error = VOP_GETATTR(odvp, &obdva, 0, cs->cr, NULL);
4570         if (!error) {
4571                 nbdva.va_mask = AT_CTIME|AT_SEQ;
4572                 error = VOP_GETATTR(ndvp, &nbdva, 0, cs->cr, NULL);
4573         }
4574         if (error) {
4575                 *cs->statusp = resp->status = puterrno4(error);
4576                 goto err_out;
4577         }
4578 
4579         NFS4_SET_FATTR4_CHANGE(resp->source_cinfo.before, obdva.va_ctime)
4580         NFS4_SET_FATTR4_CHANGE(resp->target_cinfo.before, nbdva.va_ctime)
4581 
4582         error = VOP_RENAME(odvp, converted_onm, ndvp, converted_nnm, cs->cr,
4583             NULL, 0);
4584 
4585         /*
4586          * If target existed and was unlinked by VOP_RENAME, state will need
4587          * closed. To avoid deadlock, rfs4_close_all_state will be done after
4588          * any necessary nbl_end_crit on srcvp and tgtvp.
4589          */
4590         if (error == 0 && fp != NULL) {
4591                 rfs4_dbe_lock(fp->rf_dbe);
4592                 tvp = fp->rf_vp;
4593                 if (tvp)
4594                         VN_HOLD(tvp);
4595                 rfs4_dbe_unlock(fp->rf_dbe);
4596 
4597                 if (tvp) {
4598                         struct vattr va;
4599                         va.va_mask = AT_NLINK;
4600 
4601                         if (!VOP_GETATTR(tvp, &va, 0, cs->cr, NULL) &&
4602                             va.va_nlink == 0) {
4603                                 unlinked = 1;
4604 
4605                                 /* DEBUG data */
4606                                 if ((srcvp == targvp) || (tvp != targvp)) {
4607                                         cmn_err(CE_WARN, "rfs4_op_rename: "
4608                                             "srcvp %p, targvp: %p, tvp: %p",
4609                                             (void *)srcvp, (void *)targvp,
4610                                             (void *)tvp);
4611                                 }
4612                         } else {
4613                                 VN_RELE(tvp);
4614                         }
4615                 }
4616         }
4617         if (error == 0)
4618                 vn_renamepath(ndvp, srcvp, nnm, nlen - 1);
4619 
4620         if (in_crit_src)
4621                 nbl_end_crit(srcvp);
4622         if (srcvp)
4623                 VN_RELE(srcvp);
4624         if (in_crit_targ)
4625                 nbl_end_crit(targvp);
4626         if (targvp)
4627                 VN_RELE(targvp);
4628 
4629         if (unlinked) {
4630                 ASSERT(fp != NULL);
4631                 ASSERT(tvp != NULL);
4632 
4633                 /* DEBUG data */
4634                 if (RW_READ_HELD(&tvp->v_nbllock)) {
4635                         cmn_err(CE_WARN, "rfs4_op_rename: "
4636                             "RW_READ_HELD(%p)", (void *)tvp);
4637                 }
4638 
4639                 /* The file is gone and so should the state */
4640                 rfs4_close_all_state(fp);
4641                 VN_RELE(tvp);
4642         }
4643 
4644         if (sfp) {
4645                 rfs4_clear_dont_grant(sfp);
4646                 rfs4_file_rele(sfp);
4647         }
4648         if (fp) {
4649                 rfs4_clear_dont_grant(fp);
4650                 rfs4_file_rele(fp);
4651         }
4652 
4653         if (converted_onm != onm)
4654                 kmem_free(converted_onm, MAXPATHLEN + 1);
4655         kmem_free(onm, olen);
4656         if (converted_nnm != nnm)
4657                 kmem_free(converted_nnm, MAXPATHLEN + 1);
4658         kmem_free(nnm, nlen);
4659 
4660         /*
4661          * Get the initial "after" sequence number, if it fails, set to zero
4662          */
4663         oidva.va_mask = AT_SEQ;
4664         if (VOP_GETATTR(odvp, &oidva, 0, cs->cr, NULL))
4665                 oidva.va_seq = 0;
4666 
4667         nidva.va_mask = AT_SEQ;
4668         if (VOP_GETATTR(ndvp, &nidva, 0, cs->cr, NULL))
4669                 nidva.va_seq = 0;
4670 
4671         /*
4672          * Force modified data and metadata out to stable storage.
4673          */
4674         (void) VOP_FSYNC(odvp, 0, cs->cr, NULL);
4675         (void) VOP_FSYNC(ndvp, 0, cs->cr, NULL);
4676 
4677         if (error) {
4678                 *cs->statusp = resp->status = puterrno4(error);
4679                 goto out;
4680         }
4681 
4682         /*
4683          * Get "after" change values, if it fails, simply return the
4684          * before value.
4685          */
4686         oadva.va_mask = AT_CTIME|AT_SEQ;
4687         if (VOP_GETATTR(odvp, &oadva, 0, cs->cr, NULL)) {
4688                 oadva.va_ctime = obdva.va_ctime;
4689                 oadva.va_seq = 0;
4690         }
4691 
4692         nadva.va_mask = AT_CTIME|AT_SEQ;
4693         if (VOP_GETATTR(odvp, &nadva, 0, cs->cr, NULL)) {
4694                 nadva.va_ctime = nbdva.va_ctime;
4695                 nadva.va_seq = 0;
4696         }
4697 
4698         NFS4_SET_FATTR4_CHANGE(resp->source_cinfo.after, oadva.va_ctime)
4699         NFS4_SET_FATTR4_CHANGE(resp->target_cinfo.after, nadva.va_ctime)
4700 
4701         /*
4702          * The cinfo.atomic = TRUE only if we have
4703          * non-zero va_seq's, and it has incremented by exactly one
4704          * during the VOP_RENAME and it didn't change during the VOP_FSYNC.
4705          */
4706         if (obdva.va_seq && oidva.va_seq && oadva.va_seq &&
4707             oidva.va_seq == (obdva.va_seq + 1) && oidva.va_seq == oadva.va_seq)
4708                 resp->source_cinfo.atomic = TRUE;
4709         else
4710                 resp->source_cinfo.atomic = FALSE;
4711 
4712         if (nbdva.va_seq && nidva.va_seq && nadva.va_seq &&
4713             nidva.va_seq == (nbdva.va_seq + 1) && nidva.va_seq == nadva.va_seq)
4714                 resp->target_cinfo.atomic = TRUE;
4715         else
4716                 resp->target_cinfo.atomic = FALSE;
4717 
4718 #ifdef  VOLATILE_FH_TEST
4719         {
4720         extern void add_volrnm_fh(struct exportinfo *, vnode_t *);
4721 
4722         /*
4723          * Add the renamed file handle to the volatile rename list
4724          */
4725         if (cs->exi->exi_export.ex_flags & EX_VOLRNM) {
4726                 /* file handles may expire on rename */
4727                 vnode_t *vp;
4728 
4729                 nnm = utf8_to_fn(&args->newname, &nlen, NULL);
4730                 /*
4731                  * Already know that nnm will be a valid string
4732                  */
4733                 error = VOP_LOOKUP(ndvp, nnm, &vp, NULL, 0, NULL, cs->cr,
4734                     NULL, NULL, NULL);
4735                 kmem_free(nnm, nlen);
4736                 if (!error) {
4737                         add_volrnm_fh(cs->exi, vp);
4738                         VN_RELE(vp);
4739                 }
4740         }
4741         }
4742 #endif  /* VOLATILE_FH_TEST */
4743 
4744         *cs->statusp = resp->status = NFS4_OK;
4745 out:
4746         DTRACE_NFSV4_2(op__rename__done, struct compound_state *, cs,
4747             RENAME4res *, resp);
4748         return;
4749 
4750 err_out:
4751         if (onm != converted_onm)
4752                 kmem_free(converted_onm, MAXPATHLEN + 1);
4753         if (onm != NULL)
4754                 kmem_free(onm, olen);
4755         if (nnm != converted_nnm)
4756                 kmem_free(converted_nnm, MAXPATHLEN + 1);
4757         if (nnm != NULL)
4758                 kmem_free(nnm, nlen);
4759 
4760         if (in_crit_src) nbl_end_crit(srcvp);
4761         if (in_crit_targ) nbl_end_crit(targvp);
4762         if (targvp) VN_RELE(targvp);
4763         if (srcvp) VN_RELE(srcvp);
4764         if (sfp) {
4765                 if (sfp_rele_grant_hold) rfs4_clear_dont_grant(sfp);
4766                 rfs4_file_rele(sfp);
4767         }
4768         if (fp) {
4769                 if (fp_rele_grant_hold) rfs4_clear_dont_grant(fp);
4770                 rfs4_file_rele(fp);
4771         }
4772 
4773         DTRACE_NFSV4_2(op__rename__done, struct compound_state *, cs,
4774             RENAME4res *, resp);
4775 }
4776 
4777 /* ARGSUSED */
4778 static void
4779 rfs4_op_renew(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4780     struct compound_state *cs)
4781 {
4782         RENEW4args *args = &argop->nfs_argop4_u.oprenew;
4783         RENEW4res *resp = &resop->nfs_resop4_u.oprenew;
4784         rfs4_client_t *cp;
4785 
4786         DTRACE_NFSV4_2(op__renew__start, struct compound_state *, cs,
4787             RENEW4args *, args);
4788 
4789         if ((cp = rfs4_findclient_by_id(args->clientid, FALSE)) == NULL) {
4790                 *cs->statusp = resp->status =
4791                     rfs4_check_clientid(&args->clientid, 0);
4792                 goto out;
4793         }
4794 
4795         if (rfs4_lease_expired(cp)) {
4796                 rfs4_client_rele(cp);
4797                 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
4798                 goto out;
4799         }
4800 
4801         rfs4_update_lease(cp);
4802 
4803         mutex_enter(cp->rc_cbinfo.cb_lock);
4804         if (cp->rc_cbinfo.cb_notified_of_cb_path_down == FALSE) {
4805                 cp->rc_cbinfo.cb_notified_of_cb_path_down = TRUE;
4806                 *cs->statusp = resp->status = NFS4ERR_CB_PATH_DOWN;
4807         } else {
4808                 *cs->statusp = resp->status = NFS4_OK;
4809         }
4810         mutex_exit(cp->rc_cbinfo.cb_lock);
4811 
4812         rfs4_client_rele(cp);
4813 
4814 out:
4815         DTRACE_NFSV4_2(op__renew__done, struct compound_state *, cs,
4816             RENEW4res *, resp);
4817 }
4818 
4819 /* ARGSUSED */
4820 static void
4821 rfs4_op_restorefh(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req,
4822     struct compound_state *cs)
4823 {
4824         RESTOREFH4res *resp = &resop->nfs_resop4_u.oprestorefh;
4825 
4826         DTRACE_NFSV4_1(op__restorefh__start, struct compound_state *, cs);
4827 
4828         /* No need to check cs->access - we are not accessing any object */
4829         if ((cs->saved_vp == NULL) || (cs->saved_fh.nfs_fh4_val == NULL)) {
4830                 *cs->statusp = resp->status = NFS4ERR_RESTOREFH;
4831                 goto out;
4832         }
4833         if (cs->vp != NULL) {
4834                 VN_RELE(cs->vp);
4835         }
4836         cs->vp = cs->saved_vp;
4837         cs->saved_vp = NULL;
4838         cs->exi = cs->saved_exi;
4839         nfs_fh4_copy(&cs->saved_fh, &cs->fh);
4840         *cs->statusp = resp->status = NFS4_OK;
4841         cs->deleg = FALSE;
4842 
4843 out:
4844         DTRACE_NFSV4_2(op__restorefh__done, struct compound_state *, cs,
4845             RESTOREFH4res *, resp);
4846 }
4847 
4848 /* ARGSUSED */
4849 static void
4850 rfs4_op_savefh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4851     struct compound_state *cs)
4852 {
4853         SAVEFH4res *resp = &resop->nfs_resop4_u.opsavefh;
4854 
4855         DTRACE_NFSV4_1(op__savefh__start, struct compound_state *, cs);
4856 
4857         /* No need to check cs->access - we are not accessing any object */
4858         if (cs->vp == NULL) {
4859                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4860                 goto out;
4861         }
4862         if (cs->saved_vp != NULL) {
4863                 VN_RELE(cs->saved_vp);
4864         }
4865         cs->saved_vp = cs->vp;
4866         VN_HOLD(cs->saved_vp);
4867         cs->saved_exi = cs->exi;
4868         /*
4869          * since SAVEFH is fairly rare, don't alloc space for its fh
4870          * unless necessary.
4871          */
4872         if (cs->saved_fh.nfs_fh4_val == NULL) {
4873                 cs->saved_fh.nfs_fh4_val = kmem_alloc(NFS4_FHSIZE, KM_SLEEP);
4874         }
4875         nfs_fh4_copy(&cs->fh, &cs->saved_fh);
4876         *cs->statusp = resp->status = NFS4_OK;
4877 
4878 out:
4879         DTRACE_NFSV4_2(op__savefh__done, struct compound_state *, cs,
4880             SAVEFH4res *, resp);
4881 }
4882 
4883 /*
4884  * rfs4_verify_attr is called when nfsv4 Setattr failed, but we wish to
4885  * return the bitmap of attrs that were set successfully. It is also
4886  * called by Verify/Nverify to test the vattr/vfsstat attrs. It should
4887  * always be called only after rfs4_do_set_attrs().
4888  *
4889  * Verify that the attributes are same as the expected ones. sargp->vap
4890  * and sargp->sbp contain the input attributes as translated from fattr4.
4891  *
4892  * This function verifies only the attrs that correspond to a vattr or
4893  * vfsstat struct. That is because of the extra step needed to get the
4894  * corresponding system structs. Other attributes have already been set or
4895  * verified by do_rfs4_set_attrs.
4896  *
4897  * Return 0 if all attrs match, -1 if some don't, error if error processing.
4898  */
4899 static int
4900 rfs4_verify_attr(struct nfs4_svgetit_arg *sargp,
4901     bitmap4 *resp, struct nfs4_ntov_table *ntovp)
4902 {
4903         int error, ret_error = 0;
4904         int i, k;
4905         uint_t sva_mask = sargp->vap->va_mask;
4906         uint_t vbit;
4907         union nfs4_attr_u *na;
4908         uint8_t *amap;
4909         bool_t getsb = ntovp->vfsstat;
4910 
4911         if (sva_mask != 0) {
4912                 /*
4913                  * Okay to overwrite sargp->vap because we verify based
4914                  * on the incoming values.
4915                  */
4916                 ret_error = VOP_GETATTR(sargp->cs->vp, sargp->vap, 0,
4917                     sargp->cs->cr, NULL);
4918                 if (ret_error) {
4919                         if (resp == NULL)
4920                                 return (ret_error);
4921                         /*
4922                          * Must return bitmap of successful attrs
4923                          */
4924                         sva_mask = 0;   /* to prevent checking vap later */
4925                 } else {
4926                         /*
4927                          * Some file systems clobber va_mask. it is probably
4928                          * wrong of them to do so, nonethless we practice
4929                          * defensive coding.
4930                          * See bug id 4276830.
4931                          */
4932                         sargp->vap->va_mask = sva_mask;
4933                 }
4934         }
4935 
4936         if (getsb) {
4937                 /*
4938                  * Now get the superblock and loop on the bitmap, as there is
4939                  * no simple way of translating from superblock to bitmap4.
4940                  */
4941                 ret_error = VFS_STATVFS(sargp->cs->vp->v_vfsp, sargp->sbp);
4942                 if (ret_error) {
4943                         if (resp == NULL)
4944                                 goto errout;
4945                         getsb = FALSE;
4946                 }
4947         }
4948 
4949         /*
4950          * Now loop and verify each attribute which getattr returned
4951          * whether it's the same as the input.
4952          */
4953         if (resp == NULL && !getsb && (sva_mask == 0))
4954                 goto errout;
4955 
4956         na = ntovp->na;
4957         amap = ntovp->amap;
4958         k = 0;
4959         for (i = 0; i < ntovp->attrcnt; i++, na++, amap++) {
4960                 k = *amap;
4961                 ASSERT(nfs4_ntov_map[k].nval == k);
4962                 vbit = nfs4_ntov_map[k].vbit;
4963 
4964                 /*
4965                  * If vattr attribute but VOP_GETATTR failed, or it's
4966                  * superblock attribute but VFS_STATVFS failed, skip
4967                  */
4968                 if (vbit) {
4969                         if ((vbit & sva_mask) == 0)
4970                                 continue;
4971                 } else if (!(getsb && nfs4_ntov_map[k].vfsstat)) {
4972                         continue;
4973                 }
4974                 error = (*nfs4_ntov_map[k].sv_getit)(NFS4ATTR_VERIT, sargp, na);
4975                 if (resp != NULL) {
4976                         if (error)
4977                                 ret_error = -1; /* not all match */
4978                         else    /* update response bitmap */
4979                                 *resp |= nfs4_ntov_map[k].fbit;
4980                         continue;
4981                 }
4982                 if (error) {
4983                         ret_error = -1; /* not all match */
4984                         break;
4985                 }
4986         }
4987 errout:
4988         return (ret_error);
4989 }
4990 
4991 /*
4992  * Decode the attribute to be set/verified. If the attr requires a sys op
4993  * (VOP_GETATTR, VFS_VFSSTAT), and the request is to verify, then don't
4994  * call the sv_getit function for it, because the sys op hasn't yet been done.
4995  * Return 0 for success, error code if failed.
4996  *
4997  * Note: the decoded arg is not freed here but in nfs4_ntov_table_free.
4998  */
4999 static int
5000 decode_fattr4_attr(nfs4_attr_cmd_t cmd, struct nfs4_svgetit_arg *sargp,
5001     int k, XDR *xdrp, bitmap4 *resp_bval, union nfs4_attr_u *nap)
5002 {
5003         int error = 0;
5004         bool_t set_later;
5005 
5006         sargp->vap->va_mask |= nfs4_ntov_map[k].vbit;
5007 
5008         if ((*nfs4_ntov_map[k].xfunc)(xdrp, nap)) {
5009                 set_later = nfs4_ntov_map[k].vbit || nfs4_ntov_map[k].vfsstat;
5010                 /*
5011                  * don't verify yet if a vattr or sb dependent attr,
5012                  * because we don't have their sys values yet.
5013                  * Will be done later.
5014                  */
5015                 if (! (set_later && (cmd == NFS4ATTR_VERIT))) {
5016                         /*
5017                          * ACLs are a special case, since setting the MODE
5018                          * conflicts with setting the ACL.  We delay setting
5019                          * the ACL until all other attributes have been set.
5020                          * The ACL gets set in do_rfs4_op_setattr().
5021                          */
5022                         if (nfs4_ntov_map[k].fbit != FATTR4_ACL_MASK) {
5023                                 error = (*nfs4_ntov_map[k].sv_getit)(cmd,
5024                                     sargp, nap);
5025                                 if (error) {
5026                                         xdr_free(nfs4_ntov_map[k].xfunc,
5027                                             (caddr_t)nap);
5028                                 }
5029                         }
5030                 }
5031         } else {
5032 #ifdef  DEBUG
5033                 cmn_err(CE_NOTE, "decode_fattr4_attr: error "
5034                     "decoding attribute %d\n", k);
5035 #endif
5036                 error = EINVAL;
5037         }
5038         if (!error && resp_bval && !set_later) {
5039                 *resp_bval |= nfs4_ntov_map[k].fbit;
5040         }
5041 
5042         return (error);
5043 }
5044 
5045 /*
5046  * Set vattr based on incoming fattr4 attrs - used by setattr.
5047  * Set response mask. Ignore any values that are not writable vattr attrs.
5048  */
5049 static nfsstat4
5050 do_rfs4_set_attrs(bitmap4 *resp, fattr4 *fattrp, struct compound_state *cs,
5051     struct nfs4_svgetit_arg *sargp, struct nfs4_ntov_table *ntovp,
5052     nfs4_attr_cmd_t cmd)
5053 {
5054         int error = 0;
5055         int i;
5056         char *attrs = fattrp->attrlist4;
5057         uint32_t attrslen = fattrp->attrlist4_len;
5058         XDR xdr;
5059         nfsstat4 status = NFS4_OK;
5060         vnode_t *vp = cs->vp;
5061         union nfs4_attr_u *na;
5062         uint8_t *amap;
5063 
5064 #ifndef lint
5065         /*
5066          * Make sure that maximum attribute number can be expressed as an
5067          * 8 bit quantity.
5068          */
5069         ASSERT(NFS4_MAXNUM_ATTRS <= (UINT8_MAX + 1));
5070 #endif
5071 
5072         if (vp == NULL) {
5073                 if (resp)
5074                         *resp = 0;
5075                 return (NFS4ERR_NOFILEHANDLE);
5076         }
5077         if (cs->access == CS_ACCESS_DENIED) {
5078                 if (resp)
5079                         *resp = 0;
5080                 return (NFS4ERR_ACCESS);
5081         }
5082 
5083         sargp->op = cmd;
5084         sargp->cs = cs;
5085         sargp->flag = 0;     /* may be set later */
5086         sargp->vap->va_mask = 0;
5087         sargp->rdattr_error = NFS4_OK;
5088         sargp->rdattr_error_req = FALSE;
5089         /* sargp->sbp is set by the caller */
5090 
5091         xdrmem_create(&xdr, attrs, attrslen, XDR_DECODE);
5092 
5093         na = ntovp->na;
5094         amap = ntovp->amap;
5095 
5096         /*
5097          * The following loop iterates on the nfs4_ntov_map checking
5098          * if the fbit is set in the requested bitmap.
5099          * If set then we process the arguments using the
5100          * rfs4_fattr4 conversion functions to populate the setattr
5101          * vattr and va_mask. Any settable attrs that are not using vattr
5102          * will be set in this loop.
5103          */
5104         for (i = 0; i < nfs4_ntov_map_size; i++) {
5105                 if (!(fattrp->attrmask & nfs4_ntov_map[i].fbit)) {
5106                         continue;
5107                 }
5108                 /*
5109                  * If setattr, must be a writable attr.
5110                  * If verify/nverify, must be a readable attr.
5111                  */
5112                 if ((error = (*nfs4_ntov_map[i].sv_getit)(
5113                     NFS4ATTR_SUPPORTED, sargp, NULL)) != 0) {
5114                         /*
5115                          * Client tries to set/verify an
5116                          * unsupported attribute, tries to set
5117                          * a read only attr or verify a write
5118                          * only one - error!
5119                          */
5120                         break;
5121                 }
5122                 /*
5123                  * Decode the attribute to set/verify
5124                  */
5125                 error = decode_fattr4_attr(cmd, sargp, nfs4_ntov_map[i].nval,
5126                     &xdr, resp ? resp : NULL, na);
5127                 if (error)
5128                         break;
5129                 *amap++ = (uint8_t)nfs4_ntov_map[i].nval;
5130                 na++;
5131                 (ntovp->attrcnt)++;
5132                 if (nfs4_ntov_map[i].vfsstat)
5133                         ntovp->vfsstat = TRUE;
5134         }
5135 
5136         if (error != 0)
5137                 status = (error == ENOTSUP ? NFS4ERR_ATTRNOTSUPP :
5138                     puterrno4(error));
5139         /* xdrmem_destroy(&xdrs); */        /* NO-OP */
5140         return (status);
5141 }
5142 
5143 static nfsstat4
5144 do_rfs4_op_setattr(bitmap4 *resp, fattr4 *fattrp, struct compound_state *cs,
5145     stateid4 *stateid)
5146 {
5147         int error = 0;
5148         struct nfs4_svgetit_arg sarg;
5149         bool_t trunc;
5150 
5151         nfsstat4 status = NFS4_OK;
5152         cred_t *cr = cs->cr;
5153         vnode_t *vp = cs->vp;
5154         struct nfs4_ntov_table ntov;
5155         struct statvfs64 sb;
5156         struct vattr bva;
5157         struct flock64 bf;
5158         int in_crit = 0;
5159         uint_t saved_mask = 0;
5160         caller_context_t ct;
5161 
5162         *resp = 0;
5163         sarg.sbp = &sb;
5164         sarg.is_referral = B_FALSE;
5165         nfs4_ntov_table_init(&ntov);
5166         status = do_rfs4_set_attrs(resp, fattrp, cs, &sarg, &ntov,
5167             NFS4ATTR_SETIT);
5168         if (status != NFS4_OK) {
5169                 /*
5170                  * failed set attrs
5171                  */
5172                 goto done;
5173         }
5174         if ((sarg.vap->va_mask == 0) &&
5175             (! (fattrp->attrmask & FATTR4_ACL_MASK))) {
5176                 /*
5177                  * no further work to be done
5178                  */
5179                 goto done;
5180         }
5181 
5182         /*
5183          * If we got a request to set the ACL and the MODE, only
5184          * allow changing VSUID, VSGID, and VSVTX.  Attempting
5185          * to change any other bits, along with setting an ACL,
5186          * gives NFS4ERR_INVAL.
5187          */
5188         if ((fattrp->attrmask & FATTR4_ACL_MASK) &&
5189             (fattrp->attrmask & FATTR4_MODE_MASK)) {
5190                 vattr_t va;
5191 
5192                 va.va_mask = AT_MODE;
5193                 error = VOP_GETATTR(vp, &va, 0, cs->cr, NULL);
5194                 if (error) {
5195                         status = puterrno4(error);
5196                         goto done;
5197                 }
5198                 if ((sarg.vap->va_mode ^ va.va_mode) &
5199                     ~(VSUID | VSGID | VSVTX)) {
5200                         status = NFS4ERR_INVAL;
5201                         goto done;
5202                 }
5203         }
5204 
5205         /* Check stateid only if size has been set */
5206         if (sarg.vap->va_mask & AT_SIZE) {
5207                 trunc = (sarg.vap->va_size == 0);
5208                 status = rfs4_check_stateid(FWRITE, cs->vp, stateid,
5209                     trunc, &cs->deleg, sarg.vap->va_mask & AT_SIZE, &ct);
5210                 if (status != NFS4_OK)
5211                         goto done;
5212         } else {
5213                 ct.cc_sysid = 0;
5214                 ct.cc_pid = 0;
5215                 ct.cc_caller_id = nfs4_srv_caller_id;
5216                 ct.cc_flags = CC_DONTBLOCK;
5217         }
5218 
5219         /* XXX start of possible race with delegations */
5220 
5221         /*
5222          * We need to specially handle size changes because it is
5223          * possible for the client to create a file with read-only
5224          * modes, but with the file opened for writing. If the client
5225          * then tries to set the file size, e.g. ftruncate(3C),
5226          * fcntl(F_FREESP), the normal access checking done in
5227          * VOP_SETATTR would prevent the client from doing it even though
5228          * it should be allowed to do so.  To get around this, we do the
5229          * access checking for ourselves and use VOP_SPACE which doesn't
5230          * do the access checking.
5231          * Also the client should not be allowed to change the file
5232          * size if there is a conflicting non-blocking mandatory lock in
5233          * the region of the change.
5234          */
5235         if (vp->v_type == VREG && (sarg.vap->va_mask & AT_SIZE)) {
5236                 u_offset_t offset;
5237                 ssize_t length;
5238 
5239                 /*
5240                  * ufs_setattr clears AT_SIZE from vap->va_mask, but
5241                  * before returning, sarg.vap->va_mask is used to
5242                  * generate the setattr reply bitmap.  We also clear
5243                  * AT_SIZE below before calling VOP_SPACE.  For both
5244                  * of these cases, the va_mask needs to be saved here
5245                  * and restored after calling VOP_SETATTR.
5246                  */
5247                 saved_mask = sarg.vap->va_mask;
5248 
5249                 /*
5250                  * Check any possible conflict due to NBMAND locks.
5251                  * Get into critical region before VOP_GETATTR, so the
5252                  * size attribute is valid when checking conflicts.
5253                  */
5254                 if (nbl_need_check(vp)) {
5255                         nbl_start_crit(vp, RW_READER);
5256                         in_crit = 1;
5257                 }
5258 
5259                 bva.va_mask = AT_UID|AT_SIZE;
5260                 if (error = VOP_GETATTR(vp, &bva, 0, cr, &ct)) {
5261                         status = puterrno4(error);
5262                         goto done;
5263                 }
5264 
5265                 if (in_crit) {
5266                         if (sarg.vap->va_size < bva.va_size) {
5267                                 offset = sarg.vap->va_size;
5268                                 length = bva.va_size - sarg.vap->va_size;
5269                         } else {
5270                                 offset = bva.va_size;
5271                                 length = sarg.vap->va_size - bva.va_size;
5272                         }
5273                         if (nbl_conflict(vp, NBL_WRITE, offset, length, 0,
5274                             &ct)) {
5275                                 status = NFS4ERR_LOCKED;
5276                                 goto done;
5277                         }
5278                 }
5279 
5280                 if (crgetuid(cr) == bva.va_uid) {
5281                         sarg.vap->va_mask &= ~AT_SIZE;
5282                         bf.l_type = F_WRLCK;
5283                         bf.l_whence = 0;
5284                         bf.l_start = (off64_t)sarg.vap->va_size;
5285                         bf.l_len = 0;
5286                         bf.l_sysid = 0;
5287                         bf.l_pid = 0;
5288                         error = VOP_SPACE(vp, F_FREESP, &bf, FWRITE,
5289                             (offset_t)sarg.vap->va_size, cr, &ct);
5290                 }
5291         }
5292 
5293         if (!error && sarg.vap->va_mask != 0)
5294                 error = VOP_SETATTR(vp, sarg.vap, sarg.flag, cr, &ct);
5295 
5296         /* restore va_mask -- ufs_setattr clears AT_SIZE */
5297         if (saved_mask & AT_SIZE)
5298                 sarg.vap->va_mask |= AT_SIZE;
5299 
5300         /*
5301          * If an ACL was being set, it has been delayed until now,
5302          * in order to set the mode (via the VOP_SETATTR() above) first.
5303          */
5304         if ((! error) && (fattrp->attrmask & FATTR4_ACL_MASK)) {
5305                 int i;
5306 
5307                 for (i = 0; i < NFS4_MAXNUM_ATTRS; i++)
5308                         if (ntov.amap[i] == FATTR4_ACL)
5309                                 break;
5310                 if (i < NFS4_MAXNUM_ATTRS) {
5311                         error = (*nfs4_ntov_map[FATTR4_ACL].sv_getit)(
5312                             NFS4ATTR_SETIT, &sarg, &ntov.na[i]);
5313                         if (error == 0) {
5314                                 *resp |= FATTR4_ACL_MASK;
5315                         } else if (error == ENOTSUP) {
5316                                 (void) rfs4_verify_attr(&sarg, resp, &ntov);
5317                                 status = NFS4ERR_ATTRNOTSUPP;
5318                                 goto done;
5319                         }
5320                 } else {
5321                         NFS4_DEBUG(rfs4_debug,
5322                             (CE_NOTE, "do_rfs4_op_setattr: "
5323                             "unable to find ACL in fattr4"));
5324                         error = EINVAL;
5325                 }
5326         }
5327 
5328         if (error) {
5329                 /* check if a monitor detected a delegation conflict */
5330                 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK))
5331                         status = NFS4ERR_DELAY;
5332                 else
5333                         status = puterrno4(error);
5334 
5335                 /*
5336                  * Set the response bitmap when setattr failed.
5337                  * If VOP_SETATTR partially succeeded, test by doing a
5338                  * VOP_GETATTR on the object and comparing the data
5339                  * to the setattr arguments.
5340                  */
5341                 (void) rfs4_verify_attr(&sarg, resp, &ntov);
5342         } else {
5343                 /*
5344                  * Force modified metadata out to stable storage.
5345                  */
5346                 (void) VOP_FSYNC(vp, FNODSYNC, cr, &ct);
5347                 /*
5348                  * Set response bitmap
5349                  */
5350                 nfs4_vmask_to_nmask_set(sarg.vap->va_mask, resp);
5351         }
5352 
5353 /* Return early and already have a NFSv4 error */
5354 done:
5355         /*
5356          * Except for nfs4_vmask_to_nmask_set(), vattr --> fattr
5357          * conversion sets both readable and writeable NFS4 attrs
5358          * for AT_MTIME and AT_ATIME.  The line below masks out
5359          * unrequested attrs from the setattr result bitmap.  This
5360          * is placed after the done: label to catch the ATTRNOTSUP
5361          * case.
5362          */
5363         *resp &= fattrp->attrmask;
5364 
5365         if (in_crit)
5366                 nbl_end_crit(vp);
5367 
5368         nfs4_ntov_table_free(&ntov, &sarg);
5369 
5370         return (status);
5371 }
5372 
5373 /* ARGSUSED */
5374 static void
5375 rfs4_op_setattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5376     struct compound_state *cs)
5377 {
5378         SETATTR4args *args = &argop->nfs_argop4_u.opsetattr;
5379         SETATTR4res *resp = &resop->nfs_resop4_u.opsetattr;
5380         bslabel_t *clabel;
5381 
5382         DTRACE_NFSV4_2(op__setattr__start, struct compound_state *, cs,
5383             SETATTR4args *, args);
5384 
5385         if (cs->vp == NULL) {
5386                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5387                 goto out;
5388         }
5389 
5390         /*
5391          * If there is an unshared filesystem mounted on this vnode,
5392          * do not allow to setattr on this vnode.
5393          */
5394         if (vn_ismntpt(cs->vp)) {
5395                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
5396                 goto out;
5397         }
5398 
5399         resp->attrsset = 0;
5400 
5401         if (rdonly4(req, cs)) {
5402                 *cs->statusp = resp->status = NFS4ERR_ROFS;
5403                 goto out;
5404         }
5405 
5406         /* check label before setting attributes */
5407         if (is_system_labeled()) {
5408                 ASSERT(req->rq_label != NULL);
5409                 clabel = req->rq_label;
5410                 DTRACE_PROBE2(tx__rfs4__log__info__opsetattr__clabel, char *,
5411                     "got client label from request(1)",
5412                     struct svc_req *, req);
5413                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
5414                         if (!do_rfs_label_check(clabel, cs->vp,
5415                             EQUALITY_CHECK, cs->exi)) {
5416                                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
5417                                 goto out;
5418                         }
5419                 }
5420         }
5421 
5422         *cs->statusp = resp->status =
5423             do_rfs4_op_setattr(&resp->attrsset, &args->obj_attributes, cs,
5424             &args->stateid);
5425 
5426 out:
5427         DTRACE_NFSV4_2(op__setattr__done, struct compound_state *, cs,
5428             SETATTR4res *, resp);
5429 }
5430 
5431 /* ARGSUSED */
5432 static void
5433 rfs4_op_verify(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5434     struct compound_state *cs)
5435 {
5436         /*
5437          * verify and nverify are exactly the same, except that nverify
5438          * succeeds when some argument changed, and verify succeeds when
5439          * when none changed.
5440          */
5441 
5442         VERIFY4args  *args = &argop->nfs_argop4_u.opverify;
5443         VERIFY4res *resp = &resop->nfs_resop4_u.opverify;
5444 
5445         int error;
5446         struct nfs4_svgetit_arg sarg;
5447         struct statvfs64 sb;
5448         struct nfs4_ntov_table ntov;
5449 
5450         DTRACE_NFSV4_2(op__verify__start, struct compound_state *, cs,
5451             VERIFY4args *, args);
5452 
5453         if (cs->vp == NULL) {
5454                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5455                 goto out;
5456         }
5457 
5458         sarg.sbp = &sb;
5459         sarg.is_referral = B_FALSE;
5460         nfs4_ntov_table_init(&ntov);
5461         resp->status = do_rfs4_set_attrs(NULL, &args->obj_attributes, cs,
5462             &sarg, &ntov, NFS4ATTR_VERIT);
5463         if (resp->status != NFS4_OK) {
5464                 /*
5465                  * do_rfs4_set_attrs will try to verify systemwide attrs,
5466                  * so could return -1 for "no match".
5467                  */
5468                 if (resp->status == -1)
5469                         resp->status = NFS4ERR_NOT_SAME;
5470                 goto done;
5471         }
5472         error = rfs4_verify_attr(&sarg, NULL, &ntov);
5473         switch (error) {
5474         case 0:
5475                 resp->status = NFS4_OK;
5476                 break;
5477         case -1:
5478                 resp->status = NFS4ERR_NOT_SAME;
5479                 break;
5480         default:
5481                 resp->status = puterrno4(error);
5482                 break;
5483         }
5484 done:
5485         *cs->statusp = resp->status;
5486         nfs4_ntov_table_free(&ntov, &sarg);
5487 out:
5488         DTRACE_NFSV4_2(op__verify__done, struct compound_state *, cs,
5489             VERIFY4res *, resp);
5490 }
5491 
5492 /* ARGSUSED */
5493 static void
5494 rfs4_op_nverify(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5495     struct compound_state *cs)
5496 {
5497         /*
5498          * verify and nverify are exactly the same, except that nverify
5499          * succeeds when some argument changed, and verify succeeds when
5500          * when none changed.
5501          */
5502 
5503         NVERIFY4args  *args = &argop->nfs_argop4_u.opnverify;
5504         NVERIFY4res *resp = &resop->nfs_resop4_u.opnverify;
5505 
5506         int error;
5507         struct nfs4_svgetit_arg sarg;
5508         struct statvfs64 sb;
5509         struct nfs4_ntov_table ntov;
5510 
5511         DTRACE_NFSV4_2(op__nverify__start, struct compound_state *, cs,
5512             NVERIFY4args *, args);
5513 
5514         if (cs->vp == NULL) {
5515                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5516                 DTRACE_NFSV4_2(op__nverify__done, struct compound_state *, cs,
5517                     NVERIFY4res *, resp);
5518                 return;
5519         }
5520         sarg.sbp = &sb;
5521         sarg.is_referral = B_FALSE;
5522         nfs4_ntov_table_init(&ntov);
5523         resp->status = do_rfs4_set_attrs(NULL, &args->obj_attributes, cs,
5524             &sarg, &ntov, NFS4ATTR_VERIT);
5525         if (resp->status != NFS4_OK) {
5526                 /*
5527                  * do_rfs4_set_attrs will try to verify systemwide attrs,
5528                  * so could return -1 for "no match".
5529                  */
5530                 if (resp->status == -1)
5531                         resp->status = NFS4_OK;
5532                 goto done;
5533         }
5534         error = rfs4_verify_attr(&sarg, NULL, &ntov);
5535         switch (error) {
5536         case 0:
5537                 resp->status = NFS4ERR_SAME;
5538                 break;
5539         case -1:
5540                 resp->status = NFS4_OK;
5541                 break;
5542         default:
5543                 resp->status = puterrno4(error);
5544                 break;
5545         }
5546 done:
5547         *cs->statusp = resp->status;
5548         nfs4_ntov_table_free(&ntov, &sarg);
5549 
5550         DTRACE_NFSV4_2(op__nverify__done, struct compound_state *, cs,
5551             NVERIFY4res *, resp);
5552 }
5553 
5554 /*
5555  * XXX - This should live in an NFS header file.
5556  */
5557 #define MAX_IOVECS      12
5558 
5559 /* ARGSUSED */
5560 static void
5561 rfs4_op_write(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5562     struct compound_state *cs)
5563 {
5564         WRITE4args *args = &argop->nfs_argop4_u.opwrite;
5565         WRITE4res *resp = &resop->nfs_resop4_u.opwrite;
5566         int error;
5567         vnode_t *vp;
5568         struct vattr bva;
5569         u_offset_t rlimit;
5570         struct uio uio;
5571         struct iovec iov[MAX_IOVECS];
5572         struct iovec *iovp;
5573         int iovcnt;
5574         int ioflag;
5575         cred_t *savecred, *cr;
5576         bool_t *deleg = &cs->deleg;
5577         nfsstat4 stat;
5578         int in_crit = 0;
5579         caller_context_t ct;
5580         nfs4_srv_t *nsrv4;
5581 
5582         DTRACE_NFSV4_2(op__write__start, struct compound_state *, cs,
5583             WRITE4args *, args);
5584 
5585         vp = cs->vp;
5586         if (vp == NULL) {
5587                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5588                 goto out;
5589         }
5590         if (cs->access == CS_ACCESS_DENIED) {
5591                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
5592                 goto out;
5593         }
5594 
5595         cr = cs->cr;
5596 
5597         if ((stat = rfs4_check_stateid(FWRITE, vp, &args->stateid, FALSE,
5598             deleg, TRUE, &ct)) != NFS4_OK) {
5599                 *cs->statusp = resp->status = stat;
5600                 goto out;
5601         }
5602 
5603         /*
5604          * We have to enter the critical region before calling VOP_RWLOCK
5605          * to avoid a deadlock with ufs.
5606          */
5607         if (nbl_need_check(vp)) {
5608                 nbl_start_crit(vp, RW_READER);
5609                 in_crit = 1;
5610                 if (nbl_conflict(vp, NBL_WRITE,
5611                     args->offset, args->data_len, 0, &ct)) {
5612                         *cs->statusp = resp->status = NFS4ERR_LOCKED;
5613                         goto out;
5614                 }
5615         }
5616 
5617         bva.va_mask = AT_MODE | AT_UID;
5618         error = VOP_GETATTR(vp, &bva, 0, cr, &ct);
5619 
5620         /*
5621          * If we can't get the attributes, then we can't do the
5622          * right access checking.  So, we'll fail the request.
5623          */
5624         if (error) {
5625                 *cs->statusp = resp->status = puterrno4(error);
5626                 goto out;
5627         }
5628 
5629         if (rdonly4(req, cs)) {
5630                 *cs->statusp = resp->status = NFS4ERR_ROFS;
5631                 goto out;
5632         }
5633 
5634         if (vp->v_type != VREG) {
5635                 *cs->statusp = resp->status =
5636                     ((vp->v_type == VDIR) ? NFS4ERR_ISDIR : NFS4ERR_INVAL);
5637                 goto out;
5638         }
5639 
5640         if (crgetuid(cr) != bva.va_uid &&
5641             (error = VOP_ACCESS(vp, VWRITE, 0, cr, &ct))) {
5642                 *cs->statusp = resp->status = puterrno4(error);
5643                 goto out;
5644         }
5645 
5646         if (MANDLOCK(vp, bva.va_mode)) {
5647                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
5648                 goto out;
5649         }
5650 
5651         nsrv4 = zone_getspecific(rfs4_zone_key, curzone);
5652         if (args->data_len == 0) {
5653                 *cs->statusp = resp->status = NFS4_OK;
5654                 resp->count = 0;
5655                 resp->committed = args->stable;
5656                 resp->writeverf = nsrv4->write4verf;
5657                 goto out;
5658         }
5659 
5660         if (args->mblk != NULL) {
5661                 mblk_t *m;
5662                 uint_t bytes, round_len;
5663 
5664                 iovcnt = 0;
5665                 bytes = 0;
5666                 round_len = roundup(args->data_len, BYTES_PER_XDR_UNIT);
5667                 for (m = args->mblk;
5668                     m != NULL && bytes < round_len;
5669                     m = m->b_cont) {
5670                         iovcnt++;
5671                         bytes += MBLKL(m);
5672                 }
5673 #ifdef DEBUG
5674                 /* should have ended on an mblk boundary */
5675                 if (bytes != round_len) {
5676                         printf("bytes=0x%x, round_len=0x%x, req len=0x%x\n",
5677                             bytes, round_len, args->data_len);
5678                         printf("args=%p, args->mblk=%p, m=%p", (void *)args,
5679                             (void *)args->mblk, (void *)m);
5680                         ASSERT(bytes == round_len);
5681                 }
5682 #endif
5683                 if (iovcnt <= MAX_IOVECS) {
5684                         iovp = iov;
5685                 } else {
5686                         iovp = kmem_alloc(sizeof (*iovp) * iovcnt, KM_SLEEP);
5687                 }
5688                 mblk_to_iov(args->mblk, iovcnt, iovp);
5689         } else if (args->rlist != NULL) {
5690                 iovcnt = 1;
5691                 iovp = iov;
5692                 iovp->iov_base = (char *)((args->rlist)->u.c_daddr3);
5693                 iovp->iov_len = args->data_len;
5694         } else {
5695                 iovcnt = 1;
5696                 iovp = iov;
5697                 iovp->iov_base = args->data_val;
5698                 iovp->iov_len = args->data_len;
5699         }
5700 
5701         uio.uio_iov = iovp;
5702         uio.uio_iovcnt = iovcnt;
5703 
5704         uio.uio_segflg = UIO_SYSSPACE;
5705         uio.uio_extflg = UIO_COPY_DEFAULT;
5706         uio.uio_loffset = args->offset;
5707         uio.uio_resid = args->data_len;
5708         uio.uio_llimit = curproc->p_fsz_ctl;
5709         rlimit = uio.uio_llimit - args->offset;
5710         if (rlimit < (u_offset_t)uio.uio_resid)
5711                 uio.uio_resid = (int)rlimit;
5712 
5713         if (args->stable == UNSTABLE4)
5714                 ioflag = 0;
5715         else if (args->stable == FILE_SYNC4)
5716                 ioflag = FSYNC;
5717         else if (args->stable == DATA_SYNC4)
5718                 ioflag = FDSYNC;
5719         else {
5720                 if (iovp != iov)
5721                         kmem_free(iovp, sizeof (*iovp) * iovcnt);
5722                 *cs->statusp = resp->status = NFS4ERR_INVAL;
5723                 goto out;
5724         }
5725 
5726         /*
5727          * We're changing creds because VM may fault and we need
5728          * the cred of the current thread to be used if quota
5729          * checking is enabled.
5730          */
5731         savecred = curthread->t_cred;
5732         curthread->t_cred = cr;
5733         error = do_io(FWRITE, vp, &uio, ioflag, cr, &ct);
5734         curthread->t_cred = savecred;
5735 
5736         if (iovp != iov)
5737                 kmem_free(iovp, sizeof (*iovp) * iovcnt);
5738 
5739         if (error) {
5740                 *cs->statusp = resp->status = puterrno4(error);
5741                 goto out;
5742         }
5743 
5744         *cs->statusp = resp->status = NFS4_OK;
5745         resp->count = args->data_len - uio.uio_resid;
5746 
5747         if (ioflag == 0)
5748                 resp->committed = UNSTABLE4;
5749         else
5750                 resp->committed = FILE_SYNC4;
5751 
5752         resp->writeverf = nsrv4->write4verf;
5753 
5754 out:
5755         if (in_crit)
5756                 nbl_end_crit(vp);
5757 
5758         DTRACE_NFSV4_2(op__write__done, struct compound_state *, cs,
5759             WRITE4res *, resp);
5760 }
5761 
5762 
5763 /* XXX put in a header file */
5764 extern int      sec_svc_getcred(struct svc_req *, cred_t *,  caddr_t *, int *);
5765 
5766 void
5767 rfs4_compound(COMPOUND4args *args, COMPOUND4res *resp, struct exportinfo *exi,
5768     struct svc_req *req, cred_t *cr, int *rv)
5769 {
5770         uint_t i;
5771         struct compound_state cs;
5772         nfs4_srv_t *nsrv4;
5773         nfs_export_t *ne = nfs_get_export();
5774 
5775         if (rv != NULL)
5776                 *rv = 0;
5777         rfs4_init_compound_state(&cs);
5778         /*
5779          * Form a reply tag by copying over the reqeuest tag.
5780          */
5781         resp->tag.utf8string_val =
5782             kmem_alloc(args->tag.utf8string_len, KM_SLEEP);
5783         resp->tag.utf8string_len = args->tag.utf8string_len;
5784         bcopy(args->tag.utf8string_val, resp->tag.utf8string_val,
5785             resp->tag.utf8string_len);
5786 
5787         cs.statusp = &resp->status;
5788         cs.req = req;
5789         resp->array = NULL;
5790         resp->array_len = 0;
5791 
5792         /*
5793          * XXX for now, minorversion should be zero
5794          */
5795         if (args->minorversion != NFS4_MINORVERSION) {
5796                 DTRACE_NFSV4_2(compound__start, struct compound_state *,
5797                     &cs, COMPOUND4args *, args);
5798                 resp->status = NFS4ERR_MINOR_VERS_MISMATCH;
5799                 DTRACE_NFSV4_2(compound__done, struct compound_state *,
5800                     &cs, COMPOUND4res *, resp);
5801                 return;
5802         }
5803 
5804         if (args->array_len == 0) {
5805                 resp->status = NFS4_OK;
5806                 return;
5807         }
5808 
5809         ASSERT(exi == NULL);
5810         ASSERT(cr == NULL);
5811 
5812         cr = crget();
5813         ASSERT(cr != NULL);
5814 
5815         if (sec_svc_getcred(req, cr, &cs.principal, &cs.nfsflavor) == 0) {
5816                 DTRACE_NFSV4_2(compound__start, struct compound_state *,
5817                     &cs, COMPOUND4args *, args);
5818                 crfree(cr);
5819                 DTRACE_NFSV4_2(compound__done, struct compound_state *,
5820                     &cs, COMPOUND4res *, resp);
5821                 svcerr_badcred(req->rq_xprt);
5822                 if (rv != NULL)
5823                         *rv = 1;
5824                 return;
5825         }
5826         resp->array_len = args->array_len;
5827         resp->array = kmem_zalloc(args->array_len * sizeof (nfs_resop4),
5828             KM_SLEEP);
5829 
5830         cs.basecr = cr;
5831         nsrv4 = zone_getspecific(rfs4_zone_key, curzone);
5832 
5833         DTRACE_NFSV4_2(compound__start, struct compound_state *, &cs,
5834             COMPOUND4args *, args);
5835 
5836         /*
5837          * For now, NFS4 compound processing must be protected by
5838          * exported_lock because it can access more than one exportinfo
5839          * per compound and share/unshare can now change multiple
5840          * exinfo structs.  The NFS2/3 code only refs 1 exportinfo
5841          * per proc (excluding public exinfo), and exi_count design
5842          * is sufficient to protect concurrent execution of NFS2/3
5843          * ops along with unexport.  This lock will be removed as
5844          * part of the NFSv4 phase 2 namespace redesign work.
5845          */
5846         rw_enter(&ne->exported_lock, RW_READER);
5847 
5848         /*
5849          * If this is the first compound we've seen, we need to start all
5850          * new instances' grace periods.
5851          */
5852         if (nsrv4->seen_first_compound == 0) {
5853                 rfs4_grace_start_new(nsrv4);
5854                 /*
5855                  * This must be set after rfs4_grace_start_new(), otherwise
5856                  * another thread could proceed past here before the former
5857                  * is finished.
5858                  */
5859                 nsrv4->seen_first_compound = 1;
5860         }
5861 
5862         for (i = 0; i < args->array_len && cs.cont; i++) {
5863                 nfs_argop4 *argop;
5864                 nfs_resop4 *resop;
5865                 uint_t op;
5866 
5867                 argop = &args->array[i];
5868                 resop = &resp->array[i];
5869                 resop->resop = argop->argop;
5870                 op = (uint_t)resop->resop;
5871 
5872                 if (op < rfsv4disp_cnt) {
5873                         /*
5874                          * Count the individual ops here; NULL and COMPOUND
5875                          * are counted in common_dispatch()
5876                          */
5877                         rfsproccnt_v4_ptr[op].value.ui64++;
5878 
5879                         NFS4_DEBUG(rfs4_debug > 1,
5880                             (CE_NOTE, "Executing %s", rfs4_op_string[op]));
5881                         (*rfsv4disptab[op].dis_proc)(argop, resop, req, &cs);
5882                         NFS4_DEBUG(rfs4_debug > 1, (CE_NOTE, "%s returned %d",
5883                             rfs4_op_string[op], *cs.statusp));
5884                         if (*cs.statusp != NFS4_OK)
5885                                 cs.cont = FALSE;
5886                 } else {
5887                         /*
5888                          * This is effectively dead code since XDR code
5889                          * will have already returned BADXDR if op doesn't
5890                          * decode to legal value.  This only done for a
5891                          * day when XDR code doesn't verify v4 opcodes.
5892                          */
5893                         op = OP_ILLEGAL;
5894                         rfsproccnt_v4_ptr[OP_ILLEGAL_IDX].value.ui64++;
5895 
5896                         rfs4_op_illegal(argop, resop, req, &cs);
5897                         cs.cont = FALSE;
5898                 }
5899 
5900                 /*
5901                  * If not at last op, and if we are to stop, then
5902                  * compact the results array.
5903                  */
5904                 if ((i + 1) < args->array_len && !cs.cont) {
5905                         nfs_resop4 *new_res = kmem_alloc(
5906                             (i+1) * sizeof (nfs_resop4), KM_SLEEP);
5907                         bcopy(resp->array,
5908                             new_res, (i+1) * sizeof (nfs_resop4));
5909                         kmem_free(resp->array,
5910                             args->array_len * sizeof (nfs_resop4));
5911 
5912                         resp->array_len =  i + 1;
5913                         resp->array = new_res;
5914                 }
5915         }
5916 
5917         rw_exit(&ne->exported_lock);
5918 
5919         /*
5920          * clear exportinfo and vnode fields from compound_state before dtrace
5921          * probe, to avoid tracing residual values for path and share path.
5922          */
5923         if (cs.vp)
5924                 VN_RELE(cs.vp);
5925         if (cs.saved_vp)
5926                 VN_RELE(cs.saved_vp);
5927         cs.exi = cs.saved_exi = NULL;
5928         cs.vp = cs.saved_vp = NULL;
5929 
5930         DTRACE_NFSV4_2(compound__done, struct compound_state *, &cs,
5931             COMPOUND4res *, resp);
5932 
5933         if (cs.saved_fh.nfs_fh4_val)
5934                 kmem_free(cs.saved_fh.nfs_fh4_val, NFS4_FHSIZE);
5935 
5936         if (cs.basecr)
5937                 crfree(cs.basecr);
5938         if (cs.cr)
5939                 crfree(cs.cr);
5940         /*
5941          * done with this compound request, free the label
5942          */
5943 
5944         if (req->rq_label != NULL) {
5945                 kmem_free(req->rq_label, sizeof (bslabel_t));
5946                 req->rq_label = NULL;
5947         }
5948 }
5949 
5950 /*
5951  * XXX because of what appears to be duplicate calls to rfs4_compound_free
5952  * XXX zero out the tag and array values. Need to investigate why the
5953  * XXX calls occur, but at least prevent the panic for now.
5954  */
5955 void
5956 rfs4_compound_free(COMPOUND4res *resp)
5957 {
5958         uint_t i;
5959 
5960         if (resp->tag.utf8string_val) {
5961                 UTF8STRING_FREE(resp->tag)
5962         }
5963 
5964         for (i = 0; i < resp->array_len; i++) {
5965                 nfs_resop4 *resop;
5966                 uint_t op;
5967 
5968                 resop = &resp->array[i];
5969                 op = (uint_t)resop->resop;
5970                 if (op < rfsv4disp_cnt) {
5971                         (*rfsv4disptab[op].dis_resfree)(resop);
5972                 }
5973         }
5974         if (resp->array != NULL) {
5975                 kmem_free(resp->array, resp->array_len * sizeof (nfs_resop4));
5976         }
5977 }
5978 
5979 /*
5980  * Process the value of the compound request rpc flags, as a bit-AND
5981  * of the individual per-op flags (idempotent, allowork, publicfh_ok)
5982  */
5983 void
5984 rfs4_compound_flagproc(COMPOUND4args *args, int *flagp)
5985 {
5986         int i;
5987         int flag = RPC_ALL;
5988 
5989         for (i = 0; flag && i < args->array_len; i++) {
5990                 uint_t op;
5991 
5992                 op = (uint_t)args->array[i].argop;
5993 
5994                 if (op < rfsv4disp_cnt)
5995                         flag &= rfsv4disptab[op].dis_flags;
5996                 else
5997                         flag = 0;
5998         }
5999         *flagp = flag;
6000 }
6001 
6002 nfsstat4
6003 rfs4_client_sysid(rfs4_client_t *cp, sysid_t *sp)
6004 {
6005         nfsstat4 e;
6006 
6007         rfs4_dbe_lock(cp->rc_dbe);
6008 
6009         if (cp->rc_sysidt != LM_NOSYSID) {
6010                 *sp = cp->rc_sysidt;
6011                 e = NFS4_OK;
6012 
6013         } else if ((cp->rc_sysidt = lm_alloc_sysidt()) != LM_NOSYSID) {
6014                 *sp = cp->rc_sysidt;
6015                 e = NFS4_OK;
6016 
6017                 NFS4_DEBUG(rfs4_debug, (CE_NOTE,
6018                     "rfs4_client_sysid: allocated 0x%x\n", *sp));
6019         } else
6020                 e = NFS4ERR_DELAY;
6021 
6022         rfs4_dbe_unlock(cp->rc_dbe);
6023         return (e);
6024 }
6025 
6026 #if defined(DEBUG) && ! defined(lint)
6027 static void lock_print(char *str, int operation, struct flock64 *flk)
6028 {
6029         char *op, *type;
6030 
6031         switch (operation) {
6032         case F_GETLK: op = "F_GETLK";
6033                 break;
6034         case F_SETLK: op = "F_SETLK";
6035                 break;
6036         case F_SETLK_NBMAND: op = "F_SETLK_NBMAND";
6037                 break;
6038         default: op = "F_UNKNOWN";
6039                 break;
6040         }
6041         switch (flk->l_type) {
6042         case F_UNLCK: type = "F_UNLCK";
6043                 break;
6044         case F_RDLCK: type = "F_RDLCK";
6045                 break;
6046         case F_WRLCK: type = "F_WRLCK";
6047                 break;
6048         default: type = "F_UNKNOWN";
6049                 break;
6050         }
6051 
6052         ASSERT(flk->l_whence == 0);
6053         cmn_err(CE_NOTE, "%s:  %s, type = %s, off = %llx len = %llx pid = %d",
6054             str, op, type, (longlong_t)flk->l_start,
6055             flk->l_len ? (longlong_t)flk->l_len : ~0LL, flk->l_pid);
6056 }
6057 
6058 #define LOCK_PRINT(d, s, t, f) if (d) lock_print(s, t, f)
6059 #else
6060 #define LOCK_PRINT(d, s, t, f)
6061 #endif
6062 
6063 /*ARGSUSED*/
6064 static bool_t
6065 creds_ok(cred_set_t cr_set, struct svc_req *req, struct compound_state *cs)
6066 {
6067         return (TRUE);
6068 }
6069 
6070 /*
6071  * Look up the pathname using the vp in cs as the directory vnode.
6072  * cs->vp will be the vnode for the file on success
6073  */
6074 
6075 static nfsstat4
6076 rfs4_lookup(component4 *component, struct svc_req *req,
6077     struct compound_state *cs)
6078 {
6079         char *nm;
6080         uint32_t len;
6081         nfsstat4 status;
6082         struct sockaddr *ca;
6083         char *name;
6084 
6085         if (cs->vp == NULL) {
6086                 return (NFS4ERR_NOFILEHANDLE);
6087         }
6088         if (cs->vp->v_type != VDIR) {
6089                 return (NFS4ERR_NOTDIR);
6090         }
6091 
6092         status = utf8_dir_verify(component);
6093         if (status != NFS4_OK)
6094                 return (status);
6095 
6096         nm = utf8_to_fn(component, &len, NULL);
6097         if (nm == NULL) {
6098                 return (NFS4ERR_INVAL);
6099         }
6100 
6101         if (len > MAXNAMELEN) {
6102                 kmem_free(nm, len);
6103                 return (NFS4ERR_NAMETOOLONG);
6104         }
6105 
6106         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
6107         name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
6108             MAXPATHLEN + 1);
6109 
6110         if (name == NULL) {
6111                 kmem_free(nm, len);
6112                 return (NFS4ERR_INVAL);
6113         }
6114 
6115         status = do_rfs4_op_lookup(name, req, cs);
6116 
6117         if (name != nm)
6118                 kmem_free(name, MAXPATHLEN + 1);
6119 
6120         kmem_free(nm, len);
6121 
6122         return (status);
6123 }
6124 
6125 static nfsstat4
6126 rfs4_lookupfile(component4 *component, struct svc_req *req,
6127     struct compound_state *cs, uint32_t access, change_info4 *cinfo)
6128 {
6129         nfsstat4 status;
6130         vnode_t *dvp = cs->vp;
6131         vattr_t bva, ava, fva;
6132         int error;
6133 
6134         /* Get "before" change value */
6135         bva.va_mask = AT_CTIME|AT_SEQ;
6136         error = VOP_GETATTR(dvp, &bva, 0, cs->cr, NULL);
6137         if (error)
6138                 return (puterrno4(error));
6139 
6140         /* rfs4_lookup may VN_RELE directory */
6141         VN_HOLD(dvp);
6142 
6143         status = rfs4_lookup(component, req, cs);
6144         if (status != NFS4_OK) {
6145                 VN_RELE(dvp);
6146                 return (status);
6147         }
6148 
6149         /*
6150          * Get "after" change value, if it fails, simply return the
6151          * before value.
6152          */
6153         ava.va_mask = AT_CTIME|AT_SEQ;
6154         if (VOP_GETATTR(dvp, &ava, 0, cs->cr, NULL)) {
6155                 ava.va_ctime = bva.va_ctime;
6156                 ava.va_seq = 0;
6157         }
6158         VN_RELE(dvp);
6159 
6160         /*
6161          * Validate the file is a file
6162          */
6163         fva.va_mask = AT_TYPE|AT_MODE;
6164         error = VOP_GETATTR(cs->vp, &fva, 0, cs->cr, NULL);
6165         if (error)
6166                 return (puterrno4(error));
6167 
6168         if (fva.va_type != VREG) {
6169                 if (fva.va_type == VDIR)
6170                         return (NFS4ERR_ISDIR);
6171                 if (fva.va_type == VLNK)
6172                         return (NFS4ERR_SYMLINK);
6173                 return (NFS4ERR_INVAL);
6174         }
6175 
6176         NFS4_SET_FATTR4_CHANGE(cinfo->before, bva.va_ctime);
6177         NFS4_SET_FATTR4_CHANGE(cinfo->after, ava.va_ctime);
6178 
6179         /*
6180          * It is undefined if VOP_LOOKUP will change va_seq, so
6181          * cinfo.atomic = TRUE only if we have
6182          * non-zero va_seq's, and they have not changed.
6183          */
6184         if (bva.va_seq && ava.va_seq && ava.va_seq == bva.va_seq)
6185                 cinfo->atomic = TRUE;
6186         else
6187                 cinfo->atomic = FALSE;
6188 
6189         /* Check for mandatory locking */
6190         cs->mandlock = MANDLOCK(cs->vp, fva.va_mode);
6191         return (check_open_access(access, cs, req));
6192 }
6193 
6194 static nfsstat4
6195 create_vnode(vnode_t *dvp, char *nm,  vattr_t *vap, createmode4 mode,
6196     cred_t *cr, vnode_t **vpp, bool_t *created)
6197 {
6198         int error;
6199         nfsstat4 status = NFS4_OK;
6200         vattr_t va;
6201 
6202 tryagain:
6203 
6204         /*
6205          * The file open mode used is VWRITE.  If the client needs
6206          * some other semantic, then it should do the access checking
6207          * itself.  It would have been nice to have the file open mode
6208          * passed as part of the arguments.
6209          */
6210 
6211         *created = TRUE;
6212         error = VOP_CREATE(dvp, nm, vap, EXCL, VWRITE, vpp, cr, 0, NULL, NULL);
6213 
6214         if (error) {
6215                 *created = FALSE;
6216 
6217                 /*
6218                  * If we got something other than file already exists
6219                  * then just return this error.  Otherwise, we got
6220                  * EEXIST.  If we were doing a GUARDED create, then
6221                  * just return this error.  Otherwise, we need to
6222                  * make sure that this wasn't a duplicate of an
6223                  * exclusive create request.
6224                  *
6225                  * The assumption is made that a non-exclusive create
6226                  * request will never return EEXIST.
6227                  */
6228 
6229                 if (error != EEXIST || mode == GUARDED4) {
6230                         status = puterrno4(error);
6231                         return (status);
6232                 }
6233                 error = VOP_LOOKUP(dvp, nm, vpp, NULL, 0, NULL, cr,
6234                     NULL, NULL, NULL);
6235 
6236                 if (error) {
6237                         /*
6238                          * We couldn't find the file that we thought that
6239                          * we just created.  So, we'll just try creating
6240                          * it again.
6241                          */
6242                         if (error == ENOENT)
6243                                 goto tryagain;
6244 
6245                         status = puterrno4(error);
6246                         return (status);
6247                 }
6248 
6249                 if (mode == UNCHECKED4) {
6250                         /* existing object must be regular file */
6251                         if ((*vpp)->v_type != VREG) {
6252                                 if ((*vpp)->v_type == VDIR)
6253                                         status = NFS4ERR_ISDIR;
6254                                 else if ((*vpp)->v_type == VLNK)
6255                                         status = NFS4ERR_SYMLINK;
6256                                 else
6257                                         status = NFS4ERR_INVAL;
6258                                 VN_RELE(*vpp);
6259                                 return (status);
6260                         }
6261 
6262                         return (NFS4_OK);
6263                 }
6264 
6265                 /* Check for duplicate request */
6266                 va.va_mask = AT_MTIME;
6267                 error = VOP_GETATTR(*vpp, &va, 0, cr, NULL);
6268                 if (!error) {
6269                         /* We found the file */
6270                         const timestruc_t *mtime = &vap->va_mtime;
6271 
6272                         if (va.va_mtime.tv_sec != mtime->tv_sec ||
6273                             va.va_mtime.tv_nsec != mtime->tv_nsec) {
6274                                 /* but its not our creation */
6275                                 VN_RELE(*vpp);
6276                                 return (NFS4ERR_EXIST);
6277                         }
6278                         *created = TRUE; /* retrans of create == created */
6279                         return (NFS4_OK);
6280                 }
6281                 VN_RELE(*vpp);
6282                 return (NFS4ERR_EXIST);
6283         }
6284 
6285         return (NFS4_OK);
6286 }
6287 
6288 static nfsstat4
6289 check_open_access(uint32_t access, struct compound_state *cs,
6290     struct svc_req *req)
6291 {
6292         int error;
6293         vnode_t *vp;
6294         bool_t readonly;
6295         cred_t *cr = cs->cr;
6296 
6297         /* For now we don't allow mandatory locking as per V2/V3 */
6298         if (cs->access == CS_ACCESS_DENIED || cs->mandlock) {
6299                 return (NFS4ERR_ACCESS);
6300         }
6301 
6302         vp = cs->vp;
6303         ASSERT(cr != NULL && vp->v_type == VREG);
6304 
6305         /*
6306          * If the file system is exported read only and we are trying
6307          * to open for write, then return NFS4ERR_ROFS
6308          */
6309 
6310         readonly = rdonly4(req, cs);
6311 
6312         if ((access & OPEN4_SHARE_ACCESS_WRITE) && readonly)
6313                 return (NFS4ERR_ROFS);
6314 
6315         if (access & OPEN4_SHARE_ACCESS_READ) {
6316                 if ((VOP_ACCESS(vp, VREAD, 0, cr, NULL) != 0) &&
6317                     (VOP_ACCESS(vp, VEXEC, 0, cr, NULL) != 0)) {
6318                         return (NFS4ERR_ACCESS);
6319                 }
6320         }
6321 
6322         if (access & OPEN4_SHARE_ACCESS_WRITE) {
6323                 error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
6324                 if (error)
6325                         return (NFS4ERR_ACCESS);
6326         }
6327 
6328         return (NFS4_OK);
6329 }
6330 
6331 static nfsstat4
6332 rfs4_createfile(OPEN4args *args, struct svc_req *req, struct compound_state *cs,
6333     change_info4 *cinfo, bitmap4 *attrset, clientid4 clientid)
6334 {
6335         struct nfs4_svgetit_arg sarg;
6336         struct nfs4_ntov_table ntov;
6337 
6338         bool_t ntov_table_init = FALSE;
6339         struct statvfs64 sb;
6340         nfsstat4 status;
6341         vnode_t *vp;
6342         vattr_t bva, ava, iva, cva, *vap;
6343         vnode_t *dvp;
6344         timespec32_t *mtime;
6345         char *nm = NULL;
6346         uint_t buflen;
6347         bool_t created;
6348         bool_t setsize = FALSE;
6349         len_t reqsize;
6350         int error;
6351         bool_t trunc;
6352         caller_context_t ct;
6353         component4 *component;
6354         bslabel_t *clabel;
6355         struct sockaddr *ca;
6356         char *name = NULL;
6357 
6358         sarg.sbp = &sb;
6359         sarg.is_referral = B_FALSE;
6360 
6361         dvp = cs->vp;
6362 
6363         /* Check if the file system is read only */
6364         if (rdonly4(req, cs))
6365                 return (NFS4ERR_ROFS);
6366 
6367         /* check the label of including directory */
6368         if (is_system_labeled()) {
6369                 ASSERT(req->rq_label != NULL);
6370                 clabel = req->rq_label;
6371                 DTRACE_PROBE2(tx__rfs4__log__info__opremove__clabel, char *,
6372                     "got client label from request(1)",
6373                     struct svc_req *, req);
6374                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
6375                         if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
6376                             cs->exi)) {
6377                                 return (NFS4ERR_ACCESS);
6378                         }
6379                 }
6380         }
6381 
6382         /*
6383          * Get the last component of path name in nm. cs will reference
6384          * the including directory on success.
6385          */
6386         component = &args->open_claim4_u.file;
6387         status = utf8_dir_verify(component);
6388         if (status != NFS4_OK)
6389                 return (status);
6390 
6391         nm = utf8_to_fn(component, &buflen, NULL);
6392 
6393         if (nm == NULL)
6394                 return (NFS4ERR_RESOURCE);
6395 
6396         if (buflen > MAXNAMELEN) {
6397                 kmem_free(nm, buflen);
6398                 return (NFS4ERR_NAMETOOLONG);
6399         }
6400 
6401         bva.va_mask = AT_TYPE|AT_CTIME|AT_SEQ;
6402         error = VOP_GETATTR(dvp, &bva, 0, cs->cr, NULL);
6403         if (error) {
6404                 kmem_free(nm, buflen);
6405                 return (puterrno4(error));
6406         }
6407 
6408         if (bva.va_type != VDIR) {
6409                 kmem_free(nm, buflen);
6410                 return (NFS4ERR_NOTDIR);
6411         }
6412 
6413         NFS4_SET_FATTR4_CHANGE(cinfo->before, bva.va_ctime)
6414 
6415         switch (args->mode) {
6416         case GUARDED4:
6417                 /*FALLTHROUGH*/
6418         case UNCHECKED4:
6419                 nfs4_ntov_table_init(&ntov);
6420                 ntov_table_init = TRUE;
6421 
6422                 *attrset = 0;
6423                 status = do_rfs4_set_attrs(attrset,
6424                     &args->createhow4_u.createattrs,
6425                     cs, &sarg, &ntov, NFS4ATTR_SETIT);
6426 
6427                 if (status == NFS4_OK && (sarg.vap->va_mask & AT_TYPE) &&
6428                     sarg.vap->va_type != VREG) {
6429                         if (sarg.vap->va_type == VDIR)
6430                                 status = NFS4ERR_ISDIR;
6431                         else if (sarg.vap->va_type == VLNK)
6432                                 status = NFS4ERR_SYMLINK;
6433                         else
6434                                 status = NFS4ERR_INVAL;
6435                 }
6436 
6437                 if (status != NFS4_OK) {
6438                         kmem_free(nm, buflen);
6439                         nfs4_ntov_table_free(&ntov, &sarg);
6440                         *attrset = 0;
6441                         return (status);
6442                 }
6443 
6444                 vap = sarg.vap;
6445                 vap->va_type = VREG;
6446                 vap->va_mask |= AT_TYPE;
6447 
6448                 if ((vap->va_mask & AT_MODE) == 0) {
6449                         vap->va_mask |= AT_MODE;
6450                         vap->va_mode = (mode_t)0600;
6451                 }
6452 
6453                 if (vap->va_mask & AT_SIZE) {
6454 
6455                         /* Disallow create with a non-zero size */
6456 
6457                         if ((reqsize = sarg.vap->va_size) != 0) {
6458                                 kmem_free(nm, buflen);
6459                                 nfs4_ntov_table_free(&ntov, &sarg);
6460                                 *attrset = 0;
6461                                 return (NFS4ERR_INVAL);
6462                         }
6463                         setsize = TRUE;
6464                 }
6465                 break;
6466 
6467         case EXCLUSIVE4:
6468                 /* prohibit EXCL create of named attributes */
6469                 if (dvp->v_flag & V_XATTRDIR) {
6470                         kmem_free(nm, buflen);
6471                         *attrset = 0;
6472                         return (NFS4ERR_INVAL);
6473                 }
6474 
6475                 cva.va_mask = AT_TYPE | AT_MTIME | AT_MODE;
6476                 cva.va_type = VREG;
6477                 /*
6478                  * Ensure no time overflows. Assumes underlying
6479                  * filesystem supports at least 32 bits.
6480                  * Truncate nsec to usec resolution to allow valid
6481                  * compares even if the underlying filesystem truncates.
6482                  */
6483                 mtime = (timespec32_t *)&args->createhow4_u.createverf;
6484                 cva.va_mtime.tv_sec = mtime->tv_sec % TIME32_MAX;
6485                 cva.va_mtime.tv_nsec = (mtime->tv_nsec / 1000) * 1000;
6486                 cva.va_mode = (mode_t)0;
6487                 vap = &cva;
6488 
6489                 /*
6490                  * For EXCL create, attrset is set to the server attr
6491                  * used to cache the client's verifier.
6492                  */
6493                 *attrset = FATTR4_TIME_MODIFY_MASK;
6494                 break;
6495         }
6496 
6497         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
6498         name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
6499             MAXPATHLEN  + 1);
6500 
6501         if (name == NULL) {
6502                 kmem_free(nm, buflen);
6503                 return (NFS4ERR_SERVERFAULT);
6504         }
6505 
6506         status = create_vnode(dvp, name, vap, args->mode,
6507             cs->cr, &vp, &created);
6508         if (nm != name)
6509                 kmem_free(name, MAXPATHLEN + 1);
6510         kmem_free(nm, buflen);
6511 
6512         if (status != NFS4_OK) {
6513                 if (ntov_table_init)
6514                         nfs4_ntov_table_free(&ntov, &sarg);
6515                 *attrset = 0;
6516                 return (status);
6517         }
6518 
6519         trunc = (setsize && !created);
6520 
6521         if (args->mode != EXCLUSIVE4) {
6522                 bitmap4 createmask = args->createhow4_u.createattrs.attrmask;
6523 
6524                 /*
6525                  * True verification that object was created with correct
6526                  * attrs is impossible.  The attrs could have been changed
6527                  * immediately after object creation.  If attributes did
6528                  * not verify, the only recourse for the server is to
6529                  * destroy the object.  Maybe if some attrs (like gid)
6530                  * are set incorrectly, the object should be destroyed;
6531                  * however, seems bad as a default policy.  Do we really
6532                  * want to destroy an object over one of the times not
6533                  * verifying correctly?  For these reasons, the server
6534                  * currently sets bits in attrset for createattrs
6535                  * that were set; however, no verification is done.
6536                  *
6537                  * vmask_to_nmask accounts for vattr bits set on create
6538                  *      [do_rfs4_set_attrs() only sets resp bits for
6539                  *       non-vattr/vfs bits.]
6540                  * Mask off any bits we set by default so as not to return
6541                  * more attrset bits than were requested in createattrs
6542                  */
6543                 if (created) {
6544                         nfs4_vmask_to_nmask(sarg.vap->va_mask, attrset);
6545                         *attrset &= createmask;
6546                 } else {
6547                         /*
6548                          * We did not create the vnode (we tried but it
6549                          * already existed).  In this case, the only createattr
6550                          * that the spec allows the server to set is size,
6551                          * and even then, it can only be set if it is 0.
6552                          */
6553                         *attrset = 0;
6554                         if (trunc)
6555                                 *attrset = FATTR4_SIZE_MASK;
6556                 }
6557         }
6558         if (ntov_table_init)
6559                 nfs4_ntov_table_free(&ntov, &sarg);
6560 
6561         /*
6562          * Get the initial "after" sequence number, if it fails,
6563          * set to zero, time to before.
6564          */
6565         iva.va_mask = AT_CTIME|AT_SEQ;
6566         if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL)) {
6567                 iva.va_seq = 0;
6568                 iva.va_ctime = bva.va_ctime;
6569         }
6570 
6571         /*
6572          * create_vnode attempts to create the file exclusive,
6573          * if it already exists the VOP_CREATE will fail and
6574          * may not increase va_seq. It is atomic if
6575          * we haven't changed the directory, but if it has changed
6576          * we don't know what changed it.
6577          */
6578         if (!created) {
6579                 if (bva.va_seq && iva.va_seq &&
6580                     bva.va_seq == iva.va_seq)
6581                         cinfo->atomic = TRUE;
6582                 else
6583                         cinfo->atomic = FALSE;
6584                 NFS4_SET_FATTR4_CHANGE(cinfo->after, iva.va_ctime);
6585         } else {
6586                 /*
6587                  * The entry was created, we need to sync the
6588                  * directory metadata.
6589                  */
6590                 (void) VOP_FSYNC(dvp, 0, cs->cr, NULL);
6591 
6592                 /*
6593                  * Get "after" change value, if it fails, simply return the
6594                  * before value.
6595                  */
6596                 ava.va_mask = AT_CTIME|AT_SEQ;
6597                 if (VOP_GETATTR(dvp, &ava, 0, cs->cr, NULL)) {
6598                         ava.va_ctime = bva.va_ctime;
6599                         ava.va_seq = 0;
6600                 }
6601 
6602                 NFS4_SET_FATTR4_CHANGE(cinfo->after, ava.va_ctime);
6603 
6604                 /*
6605                  * The cinfo->atomic = TRUE only if we have
6606                  * non-zero va_seq's, and it has incremented by exactly one
6607                  * during the create_vnode and it didn't
6608                  * change during the VOP_FSYNC.
6609                  */
6610                 if (bva.va_seq && iva.va_seq && ava.va_seq &&
6611                     iva.va_seq == (bva.va_seq + 1) && iva.va_seq == ava.va_seq)
6612                         cinfo->atomic = TRUE;
6613                 else
6614                         cinfo->atomic = FALSE;
6615         }
6616 
6617         /* Check for mandatory locking and that the size gets set. */
6618         cva.va_mask = AT_MODE;
6619         if (setsize)
6620                 cva.va_mask |= AT_SIZE;
6621 
6622         /* Assume the worst */
6623         cs->mandlock = TRUE;
6624 
6625         if (VOP_GETATTR(vp, &cva, 0, cs->cr, NULL) == 0) {
6626                 cs->mandlock = MANDLOCK(cs->vp, cva.va_mode);
6627 
6628                 /*
6629                  * Truncate the file if necessary; this would be
6630                  * the case for create over an existing file.
6631                  */
6632 
6633                 if (trunc) {
6634                         int in_crit = 0;
6635                         rfs4_file_t *fp;
6636                         nfs4_srv_t *nsrv4;
6637                         bool_t create = FALSE;
6638 
6639                         /*
6640                          * We are writing over an existing file.
6641                          * Check to see if we need to recall a delegation.
6642                          */
6643                         nsrv4 = zone_getspecific(rfs4_zone_key, curzone);
6644                         rfs4_hold_deleg_policy(nsrv4);
6645                         if ((fp = rfs4_findfile(vp, NULL, &create)) != NULL) {
6646                                 if (rfs4_check_delegated_byfp(FWRITE, fp,
6647                                     (reqsize == 0), FALSE, FALSE, &clientid)) {
6648                                         rfs4_file_rele(fp);
6649                                         rfs4_rele_deleg_policy(nsrv4);
6650                                         VN_RELE(vp);
6651                                         *attrset = 0;
6652                                         return (NFS4ERR_DELAY);
6653                                 }
6654                                 rfs4_file_rele(fp);
6655                         }
6656                         rfs4_rele_deleg_policy(nsrv4);
6657 
6658                         if (nbl_need_check(vp)) {
6659                                 in_crit = 1;
6660 
6661                                 ASSERT(reqsize == 0);
6662 
6663                                 nbl_start_crit(vp, RW_READER);
6664                                 if (nbl_conflict(vp, NBL_WRITE, 0,
6665                                     cva.va_size, 0, NULL)) {
6666                                         in_crit = 0;
6667                                         nbl_end_crit(vp);
6668                                         VN_RELE(vp);
6669                                         *attrset = 0;
6670                                         return (NFS4ERR_ACCESS);
6671                                 }
6672                         }
6673                         ct.cc_sysid = 0;
6674                         ct.cc_pid = 0;
6675                         ct.cc_caller_id = nfs4_srv_caller_id;
6676                         ct.cc_flags = CC_DONTBLOCK;
6677 
6678                         cva.va_mask = AT_SIZE;
6679                         cva.va_size = reqsize;
6680                         (void) VOP_SETATTR(vp, &cva, 0, cs->cr, &ct);
6681                         if (in_crit)
6682                                 nbl_end_crit(vp);
6683                 }
6684         }
6685 
6686         error = makefh4(&cs->fh, vp, cs->exi);
6687 
6688         /*
6689          * Force modified data and metadata out to stable storage.
6690          */
6691         (void) VOP_FSYNC(vp, FNODSYNC, cs->cr, NULL);
6692 
6693         if (error) {
6694                 VN_RELE(vp);
6695                 *attrset = 0;
6696                 return (puterrno4(error));
6697         }
6698 
6699         /* if parent dir is attrdir, set namedattr fh flag */
6700         if (dvp->v_flag & V_XATTRDIR)
6701                 set_fh4_flag(&cs->fh, FH4_NAMEDATTR);
6702 
6703         if (cs->vp)
6704                 VN_RELE(cs->vp);
6705 
6706         cs->vp = vp;
6707 
6708         /*
6709          * if we did not create the file, we will need to check
6710          * the access bits on the file
6711          */
6712 
6713         if (!created) {
6714                 if (setsize)
6715                         args->share_access |= OPEN4_SHARE_ACCESS_WRITE;
6716                 status = check_open_access(args->share_access, cs, req);
6717                 if (status != NFS4_OK)
6718                         *attrset = 0;
6719         }
6720         return (status);
6721 }
6722 
6723 /*ARGSUSED*/
6724 static void
6725 rfs4_do_open(struct compound_state *cs, struct svc_req *req,
6726     rfs4_openowner_t *oo, delegreq_t deleg,
6727     uint32_t access, uint32_t deny,
6728     OPEN4res *resp, int deleg_cur)
6729 {
6730         /* XXX Currently not using req  */
6731         rfs4_state_t *sp;
6732         rfs4_file_t *fp;
6733         bool_t screate = TRUE;
6734         bool_t fcreate = TRUE;
6735         uint32_t open_a, share_a;
6736         uint32_t open_d, share_d;
6737         rfs4_deleg_state_t *dsp;
6738         sysid_t sysid;
6739         nfsstat4 status;
6740         caller_context_t ct;
6741         int fflags = 0;
6742         int recall = 0;
6743         int err;
6744         int first_open;
6745 
6746         /* get the file struct and hold a lock on it during initial open */
6747         fp = rfs4_findfile_withlock(cs->vp, &cs->fh, &fcreate);
6748         if (fp == NULL) {
6749                 resp->status = NFS4ERR_RESOURCE;
6750                 DTRACE_PROBE1(nfss__e__do__open1, nfsstat4, resp->status);
6751                 return;
6752         }
6753 
6754         sp = rfs4_findstate_by_owner_file(oo, fp, &screate);
6755         if (sp == NULL) {
6756                 resp->status = NFS4ERR_RESOURCE;
6757                 DTRACE_PROBE1(nfss__e__do__open2, nfsstat4, resp->status);
6758                 /* No need to keep any reference */
6759                 rw_exit(&fp->rf_file_rwlock);
6760                 rfs4_file_rele(fp);
6761                 return;
6762         }
6763 
6764         /* try to get the sysid before continuing */
6765         if ((status = rfs4_client_sysid(oo->ro_client, &sysid)) != NFS4_OK) {
6766                 resp->status = status;
6767                 rfs4_file_rele(fp);
6768                 /* Not a fully formed open; "close" it */
6769                 if (screate == TRUE)
6770                         rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6771                 rfs4_state_rele(sp);
6772                 return;
6773         }
6774 
6775         /* Calculate the fflags for this OPEN. */
6776         if (access & OPEN4_SHARE_ACCESS_READ)
6777                 fflags |= FREAD;
6778         if (access & OPEN4_SHARE_ACCESS_WRITE)
6779                 fflags |= FWRITE;
6780 
6781         rfs4_dbe_lock(sp->rs_dbe);
6782 
6783         /*
6784          * Calculate the new deny and access mode that this open is adding to
6785          * the file for this open owner;
6786          */
6787         open_d = (deny & ~sp->rs_open_deny);
6788         open_a = (access & ~sp->rs_open_access);
6789 
6790         /*
6791          * Calculate the new share access and share deny modes that this open
6792          * is adding to the file for this open owner;
6793          */
6794         share_a = (access & ~sp->rs_share_access);
6795         share_d = (deny & ~sp->rs_share_deny);
6796 
6797         first_open = (sp->rs_open_access & OPEN4_SHARE_ACCESS_BOTH) == 0;
6798 
6799         /*
6800          * Check to see the client has already sent an open for this
6801          * open owner on this file with the same share/deny modes.
6802          * If so, we don't need to check for a conflict and we don't
6803          * need to add another shrlock.  If not, then we need to
6804          * check for conflicts in deny and access before checking for
6805          * conflicts in delegation.  We don't want to recall a
6806          * delegation based on an open that will eventually fail based
6807          * on shares modes.
6808          */
6809 
6810         if (share_a || share_d) {
6811                 if ((err = rfs4_share(sp, access, deny)) != 0) {
6812                         rfs4_dbe_unlock(sp->rs_dbe);
6813                         resp->status = err;
6814 
6815                         rfs4_file_rele(fp);
6816                         /* Not a fully formed open; "close" it */
6817                         if (screate == TRUE)
6818                                 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6819                         rfs4_state_rele(sp);
6820                         return;
6821                 }
6822         }
6823 
6824         rfs4_dbe_lock(fp->rf_dbe);
6825 
6826         /*
6827          * Check to see if this file is delegated and if so, if a
6828          * recall needs to be done.
6829          */
6830         if (rfs4_check_recall(sp, access)) {
6831                 rfs4_dbe_unlock(fp->rf_dbe);
6832                 rfs4_dbe_unlock(sp->rs_dbe);
6833                 rfs4_recall_deleg(fp, FALSE, sp->rs_owner->ro_client);
6834                 delay(NFS4_DELEGATION_CONFLICT_DELAY);
6835                 rfs4_dbe_lock(sp->rs_dbe);
6836 
6837                 /* if state closed while lock was dropped */
6838                 if (sp->rs_closed) {
6839                         if (share_a || share_d)
6840                                 (void) rfs4_unshare(sp);
6841                         rfs4_dbe_unlock(sp->rs_dbe);
6842                         rfs4_file_rele(fp);
6843                         /* Not a fully formed open; "close" it */
6844                         if (screate == TRUE)
6845                                 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6846                         rfs4_state_rele(sp);
6847                         resp->status = NFS4ERR_OLD_STATEID;
6848                         return;
6849                 }
6850 
6851                 rfs4_dbe_lock(fp->rf_dbe);
6852                 /* Let's see if the delegation was returned */
6853                 if (rfs4_check_recall(sp, access)) {
6854                         rfs4_dbe_unlock(fp->rf_dbe);
6855                         if (share_a || share_d)
6856                                 (void) rfs4_unshare(sp);
6857                         rfs4_dbe_unlock(sp->rs_dbe);
6858                         rfs4_file_rele(fp);
6859                         rfs4_update_lease(sp->rs_owner->ro_client);
6860 
6861                         /* Not a fully formed open; "close" it */
6862                         if (screate == TRUE)
6863                                 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6864                         rfs4_state_rele(sp);
6865                         resp->status = NFS4ERR_DELAY;
6866                         return;
6867                 }
6868         }
6869         /*
6870          * the share check passed and any delegation conflict has been
6871          * taken care of, now call vop_open.
6872          * if this is the first open then call vop_open with fflags.
6873          * if not, call vn_open_upgrade with just the upgrade flags.
6874          *
6875          * if the file has been opened already, it will have the current
6876          * access mode in the state struct.  if it has no share access, then
6877          * this is a new open.
6878          *
6879          * However, if this is open with CLAIM_DLEGATE_CUR, then don't
6880          * call VOP_OPEN(), just do the open upgrade.
6881          */
6882         if (first_open && !deleg_cur) {
6883                 ct.cc_sysid = sysid;
6884                 ct.cc_pid = rfs4_dbe_getid(sp->rs_owner->ro_dbe);
6885                 ct.cc_caller_id = nfs4_srv_caller_id;
6886                 ct.cc_flags = CC_DONTBLOCK;
6887                 err = VOP_OPEN(&cs->vp, fflags, cs->cr, &ct);
6888                 if (err) {
6889                         rfs4_dbe_unlock(fp->rf_dbe);
6890                         if (share_a || share_d)
6891                                 (void) rfs4_unshare(sp);
6892                         rfs4_dbe_unlock(sp->rs_dbe);
6893                         rfs4_file_rele(fp);
6894 
6895                         /* Not a fully formed open; "close" it */
6896                         if (screate == TRUE)
6897                                 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6898                         rfs4_state_rele(sp);
6899                         /* check if a monitor detected a delegation conflict */
6900                         if (err == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK))
6901                                 resp->status = NFS4ERR_DELAY;
6902                         else
6903                                 resp->status = NFS4ERR_SERVERFAULT;
6904                         return;
6905                 }
6906         } else { /* open upgrade */
6907                 /*
6908                  * calculate the fflags for the new mode that is being added
6909                  * by this upgrade.
6910                  */
6911                 fflags = 0;
6912                 if (open_a & OPEN4_SHARE_ACCESS_READ)
6913                         fflags |= FREAD;
6914                 if (open_a & OPEN4_SHARE_ACCESS_WRITE)
6915                         fflags |= FWRITE;
6916                 vn_open_upgrade(cs->vp, fflags);
6917         }
6918         sp->rs_open_access |= access;
6919         sp->rs_open_deny |= deny;
6920 
6921         if (open_d & OPEN4_SHARE_DENY_READ)
6922                 fp->rf_deny_read++;
6923         if (open_d & OPEN4_SHARE_DENY_WRITE)
6924                 fp->rf_deny_write++;
6925         fp->rf_share_deny |= deny;
6926 
6927         if (open_a & OPEN4_SHARE_ACCESS_READ)
6928                 fp->rf_access_read++;
6929         if (open_a & OPEN4_SHARE_ACCESS_WRITE)
6930                 fp->rf_access_write++;
6931         fp->rf_share_access |= access;
6932 
6933         /*
6934          * Check for delegation here. if the deleg argument is not
6935          * DELEG_ANY, then this is a reclaim from a client and
6936          * we must honor the delegation requested. If necessary we can
6937          * set the recall flag.
6938          */
6939 
6940         dsp = rfs4_grant_delegation(deleg, sp, &recall);
6941 
6942         cs->deleg = (fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_WRITE);
6943 
6944         next_stateid(&sp->rs_stateid);
6945 
6946         resp->stateid = sp->rs_stateid.stateid;
6947 
6948         rfs4_dbe_unlock(fp->rf_dbe);
6949         rfs4_dbe_unlock(sp->rs_dbe);
6950 
6951         if (dsp) {
6952                 rfs4_set_deleg_response(dsp, &resp->delegation, NULL, recall);
6953                 rfs4_deleg_state_rele(dsp);
6954         }
6955 
6956         rfs4_file_rele(fp);
6957         rfs4_state_rele(sp);
6958 
6959         resp->status = NFS4_OK;
6960 }
6961 
6962 /*ARGSUSED*/
6963 static void
6964 rfs4_do_opennull(struct compound_state *cs, struct svc_req *req,
6965     OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
6966 {
6967         change_info4 *cinfo = &resp->cinfo;
6968         bitmap4 *attrset = &resp->attrset;
6969 
6970         if (args->opentype == OPEN4_NOCREATE)
6971                 resp->status = rfs4_lookupfile(&args->open_claim4_u.file,
6972                     req, cs, args->share_access, cinfo);
6973         else {
6974                 /* inhibit delegation grants during exclusive create */
6975 
6976                 if (args->mode == EXCLUSIVE4)
6977                         rfs4_disable_delegation();
6978 
6979                 resp->status = rfs4_createfile(args, req, cs, cinfo, attrset,
6980                     oo->ro_client->rc_clientid);
6981         }
6982 
6983         if (resp->status == NFS4_OK) {
6984 
6985                 /* cs->vp cs->fh now reference the desired file */
6986 
6987                 rfs4_do_open(cs, req, oo,
6988                     oo->ro_need_confirm ? DELEG_NONE : DELEG_ANY,
6989                     args->share_access, args->share_deny, resp, 0);
6990 
6991                 /*
6992                  * If rfs4_createfile set attrset, we must
6993                  * clear this attrset before the response is copied.
6994                  */
6995                 if (resp->status != NFS4_OK && resp->attrset) {
6996                         resp->attrset = 0;
6997                 }
6998         }
6999         else
7000                 *cs->statusp = resp->status;
7001 
7002         if (args->mode == EXCLUSIVE4)
7003                 rfs4_enable_delegation();
7004 }
7005 
7006 /*ARGSUSED*/
7007 static void
7008 rfs4_do_openprev(struct compound_state *cs, struct svc_req *req,
7009     OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
7010 {
7011         change_info4 *cinfo = &resp->cinfo;
7012         vattr_t va;
7013         vtype_t v_type = cs->vp->v_type;
7014         int error = 0;
7015 
7016         /* Verify that we have a regular file */
7017         if (v_type != VREG) {
7018                 if (v_type == VDIR)
7019                         resp->status = NFS4ERR_ISDIR;
7020                 else if (v_type == VLNK)
7021                         resp->status = NFS4ERR_SYMLINK;
7022                 else
7023                         resp->status = NFS4ERR_INVAL;
7024                 return;
7025         }
7026 
7027         va.va_mask = AT_MODE|AT_UID;
7028         error = VOP_GETATTR(cs->vp, &va, 0, cs->cr, NULL);
7029         if (error) {
7030                 resp->status = puterrno4(error);
7031                 return;
7032         }
7033 
7034         cs->mandlock = MANDLOCK(cs->vp, va.va_mode);
7035 
7036         /*
7037          * Check if we have access to the file, Note the the file
7038          * could have originally been open UNCHECKED or GUARDED
7039          * with mode bits that will now fail, but there is nothing
7040          * we can really do about that except in the case that the
7041          * owner of the file is the one requesting the open.
7042          */
7043         if (crgetuid(cs->cr) != va.va_uid) {
7044                 resp->status = check_open_access(args->share_access, cs, req);
7045                 if (resp->status != NFS4_OK) {
7046                         return;
7047                 }
7048         }
7049 
7050         /*
7051          * cinfo on a CLAIM_PREVIOUS is undefined, initialize to zero
7052          */
7053         cinfo->before = 0;
7054         cinfo->after = 0;
7055         cinfo->atomic = FALSE;
7056 
7057         rfs4_do_open(cs, req, oo,
7058             NFS4_DELEG4TYPE2REQTYPE(args->open_claim4_u.delegate_type),
7059             args->share_access, args->share_deny, resp, 0);
7060 }
7061 
7062 static void
7063 rfs4_do_opendelcur(struct compound_state *cs, struct svc_req *req,
7064     OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
7065 {
7066         int error;
7067         nfsstat4 status;
7068         stateid4 stateid =
7069             args->open_claim4_u.delegate_cur_info.delegate_stateid;
7070         rfs4_deleg_state_t *dsp;
7071 
7072         /*
7073          * Find the state info from the stateid and confirm that the
7074          * file is delegated.  If the state openowner is the same as
7075          * the supplied openowner we're done. If not, get the file
7076          * info from the found state info. Use that file info to
7077          * create the state for this lock owner. Note solaris doen't
7078          * really need the pathname to find the file. We may want to
7079          * lookup the pathname and make sure that the vp exist and
7080          * matches the vp in the file structure. However it is
7081          * possible that the pathname nolonger exists (local process
7082          * unlinks the file), so this may not be that useful.
7083          */
7084 
7085         status = rfs4_get_deleg_state(&stateid, &dsp);
7086         if (status != NFS4_OK) {
7087                 resp->status = status;
7088                 return;
7089         }
7090 
7091         ASSERT(dsp->rds_finfo->rf_dinfo.rd_dtype != OPEN_DELEGATE_NONE);
7092 
7093         /*
7094          * New lock owner, create state. Since this was probably called
7095          * in response to a CB_RECALL we set deleg to DELEG_NONE
7096          */
7097 
7098         ASSERT(cs->vp != NULL);
7099         VN_RELE(cs->vp);
7100         VN_HOLD(dsp->rds_finfo->rf_vp);
7101         cs->vp = dsp->rds_finfo->rf_vp;
7102 
7103         if (error = makefh4(&cs->fh, cs->vp, cs->exi)) {
7104                 rfs4_deleg_state_rele(dsp);
7105                 *cs->statusp = resp->status = puterrno4(error);
7106                 return;
7107         }
7108 
7109         /* Mark progress for delegation returns */
7110         dsp->rds_finfo->rf_dinfo.rd_time_lastwrite = gethrestime_sec();
7111         rfs4_deleg_state_rele(dsp);
7112         rfs4_do_open(cs, req, oo, DELEG_NONE,
7113             args->share_access, args->share_deny, resp, 1);
7114 }
7115 
7116 /*ARGSUSED*/
7117 static void
7118 rfs4_do_opendelprev(struct compound_state *cs, struct svc_req *req,
7119     OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
7120 {
7121         /*
7122          * Lookup the pathname, it must already exist since this file
7123          * was delegated.
7124          *
7125          * Find the file and state info for this vp and open owner pair.
7126          *      check that they are in fact delegated.
7127          *      check that the state access and deny modes are the same.
7128          *
7129          * Return the delgation possibly seting the recall flag.
7130          */
7131         rfs4_file_t *fp;
7132         rfs4_state_t *sp;
7133         bool_t create = FALSE;
7134         bool_t dcreate = FALSE;
7135         rfs4_deleg_state_t *dsp;
7136         nfsace4 *ace;
7137 
7138         /* Note we ignore oflags */
7139         resp->status = rfs4_lookupfile(&args->open_claim4_u.file_delegate_prev,
7140             req, cs, args->share_access, &resp->cinfo);
7141 
7142         if (resp->status != NFS4_OK) {
7143                 return;
7144         }
7145 
7146         /* get the file struct and hold a lock on it during initial open */
7147         fp = rfs4_findfile_withlock(cs->vp, NULL, &create);
7148         if (fp == NULL) {
7149                 resp->status = NFS4ERR_RESOURCE;
7150                 DTRACE_PROBE1(nfss__e__do_opendelprev1, nfsstat4, resp->status);
7151                 return;
7152         }
7153 
7154         sp = rfs4_findstate_by_owner_file(oo, fp, &create);
7155         if (sp == NULL) {
7156                 resp->status = NFS4ERR_SERVERFAULT;
7157                 DTRACE_PROBE1(nfss__e__do_opendelprev2, nfsstat4, resp->status);
7158                 rw_exit(&fp->rf_file_rwlock);
7159                 rfs4_file_rele(fp);
7160                 return;
7161         }
7162 
7163         rfs4_dbe_lock(sp->rs_dbe);
7164         rfs4_dbe_lock(fp->rf_dbe);
7165         if (args->share_access != sp->rs_share_access ||
7166             args->share_deny != sp->rs_share_deny ||
7167             sp->rs_finfo->rf_dinfo.rd_dtype == OPEN_DELEGATE_NONE) {
7168                 NFS4_DEBUG(rfs4_debug,
7169                     (CE_NOTE, "rfs4_do_opendelprev: state mixup"));
7170                 rfs4_dbe_unlock(fp->rf_dbe);
7171                 rfs4_dbe_unlock(sp->rs_dbe);
7172                 rfs4_file_rele(fp);
7173                 rfs4_state_rele(sp);
7174                 resp->status = NFS4ERR_SERVERFAULT;
7175                 return;
7176         }
7177         rfs4_dbe_unlock(fp->rf_dbe);
7178         rfs4_dbe_unlock(sp->rs_dbe);
7179 
7180         dsp = rfs4_finddeleg(sp, &dcreate);
7181         if (dsp == NULL) {
7182                 rfs4_state_rele(sp);
7183                 rfs4_file_rele(fp);
7184                 resp->status = NFS4ERR_SERVERFAULT;
7185                 return;
7186         }
7187 
7188         next_stateid(&sp->rs_stateid);
7189 
7190         resp->stateid = sp->rs_stateid.stateid;
7191 
7192         resp->delegation.delegation_type = dsp->rds_dtype;
7193 
7194         if (dsp->rds_dtype == OPEN_DELEGATE_READ) {
7195                 open_read_delegation4 *rv =
7196                     &resp->delegation.open_delegation4_u.read;
7197 
7198                 rv->stateid = dsp->rds_delegid.stateid;
7199                 rv->recall = FALSE; /* no policy in place to set to TRUE */
7200                 ace = &rv->permissions;
7201         } else {
7202                 open_write_delegation4 *rv =
7203                     &resp->delegation.open_delegation4_u.write;
7204 
7205                 rv->stateid = dsp->rds_delegid.stateid;
7206                 rv->recall = FALSE;  /* no policy in place to set to TRUE */
7207                 ace = &rv->permissions;
7208                 rv->space_limit.limitby = NFS_LIMIT_SIZE;
7209                 rv->space_limit.nfs_space_limit4_u.filesize = UINT64_MAX;
7210         }
7211 
7212         /* XXX For now */
7213         ace->type = ACE4_ACCESS_ALLOWED_ACE_TYPE;
7214         ace->flag = 0;
7215         ace->access_mask = 0;
7216         ace->who.utf8string_len = 0;
7217         ace->who.utf8string_val = 0;
7218 
7219         rfs4_deleg_state_rele(dsp);
7220         rfs4_state_rele(sp);
7221         rfs4_file_rele(fp);
7222 }
7223 
7224 typedef enum {
7225         NFS4_CHKSEQ_OKAY = 0,
7226         NFS4_CHKSEQ_REPLAY = 1,
7227         NFS4_CHKSEQ_BAD = 2
7228 } rfs4_chkseq_t;
7229 
7230 /*
7231  * Generic function for sequence number checks.
7232  */
7233 static rfs4_chkseq_t
7234 rfs4_check_seqid(seqid4 seqid, nfs_resop4 *lastop,
7235     seqid4 rqst_seq, nfs_resop4 *resop, bool_t copyres)
7236 {
7237         /* Same sequence ids and matching operations? */
7238         if (seqid == rqst_seq && resop->resop == lastop->resop) {
7239                 if (copyres == TRUE) {
7240                         rfs4_free_reply(resop);
7241                         rfs4_copy_reply(resop, lastop);
7242                 }
7243                 NFS4_DEBUG(rfs4_debug, (CE_NOTE,
7244                     "Replayed SEQID %d\n", seqid));
7245                 return (NFS4_CHKSEQ_REPLAY);
7246         }
7247 
7248         /* If the incoming sequence is not the next expected then it is bad */
7249         if (rqst_seq != seqid + 1) {
7250                 if (rqst_seq == seqid) {
7251                         NFS4_DEBUG(rfs4_debug,
7252                             (CE_NOTE, "BAD SEQID: Replayed sequence id "
7253                             "but last op was %d current op is %d\n",
7254                             lastop->resop, resop->resop));
7255                         return (NFS4_CHKSEQ_BAD);
7256                 }
7257                 NFS4_DEBUG(rfs4_debug,
7258                     (CE_NOTE, "BAD SEQID: got %u expecting %u\n",
7259                     rqst_seq, seqid));
7260                 return (NFS4_CHKSEQ_BAD);
7261         }
7262 
7263         /* Everything okay -- next expected */
7264         return (NFS4_CHKSEQ_OKAY);
7265 }
7266 
7267 
7268 static rfs4_chkseq_t
7269 rfs4_check_open_seqid(seqid4 seqid, rfs4_openowner_t *op, nfs_resop4 *resop)
7270 {
7271         rfs4_chkseq_t rc;
7272 
7273         rfs4_dbe_lock(op->ro_dbe);
7274         rc = rfs4_check_seqid(op->ro_open_seqid, &op->ro_reply, seqid, resop,
7275             TRUE);
7276         rfs4_dbe_unlock(op->ro_dbe);
7277 
7278         if (rc == NFS4_CHKSEQ_OKAY)
7279                 rfs4_update_lease(op->ro_client);
7280 
7281         return (rc);
7282 }
7283 
7284 static rfs4_chkseq_t
7285 rfs4_check_olo_seqid(seqid4 olo_seqid, rfs4_openowner_t *op, nfs_resop4 *resop)
7286 {
7287         rfs4_chkseq_t rc;
7288 
7289         rfs4_dbe_lock(op->ro_dbe);
7290         rc = rfs4_check_seqid(op->ro_open_seqid, &op->ro_reply,
7291             olo_seqid, resop, FALSE);
7292         rfs4_dbe_unlock(op->ro_dbe);
7293 
7294         return (rc);
7295 }
7296 
7297 static rfs4_chkseq_t
7298 rfs4_check_lock_seqid(seqid4 seqid, rfs4_lo_state_t *lsp, nfs_resop4 *resop)
7299 {
7300         rfs4_chkseq_t rc = NFS4_CHKSEQ_OKAY;
7301 
7302         rfs4_dbe_lock(lsp->rls_dbe);
7303         if (!lsp->rls_skip_seqid_check)
7304                 rc = rfs4_check_seqid(lsp->rls_seqid, &lsp->rls_reply, seqid,
7305                     resop, TRUE);
7306         rfs4_dbe_unlock(lsp->rls_dbe);
7307 
7308         return (rc);
7309 }
7310 
7311 static void
7312 rfs4_op_open(nfs_argop4 *argop, nfs_resop4 *resop,
7313     struct svc_req *req, struct compound_state *cs)
7314 {
7315         OPEN4args *args = &argop->nfs_argop4_u.opopen;
7316         OPEN4res *resp = &resop->nfs_resop4_u.opopen;
7317         open_owner4 *owner = &args->owner;
7318         open_claim_type4 claim = args->claim;
7319         rfs4_client_t *cp;
7320         rfs4_openowner_t *oo;
7321         bool_t create;
7322         bool_t replay = FALSE;
7323         int can_reclaim;
7324 
7325         DTRACE_NFSV4_2(op__open__start, struct compound_state *, cs,
7326             OPEN4args *, args);
7327 
7328         if (cs->vp == NULL) {
7329                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
7330                 goto end;
7331         }
7332 
7333         /*
7334          * Need to check clientid and lease expiration first based on
7335          * error ordering and incrementing sequence id.
7336          */
7337         cp = rfs4_findclient_by_id(owner->clientid, FALSE);
7338         if (cp == NULL) {
7339                 *cs->statusp = resp->status =
7340                     rfs4_check_clientid(&owner->clientid, 0);
7341                 goto end;
7342         }
7343 
7344         if (rfs4_lease_expired(cp)) {
7345                 rfs4_client_close(cp);
7346                 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
7347                 goto end;
7348         }
7349         can_reclaim = cp->rc_can_reclaim;
7350 
7351         /*
7352          * Find the open_owner for use from this point forward.  Take
7353          * care in updating the sequence id based on the type of error
7354          * being returned.
7355          */
7356 retry:
7357         create = TRUE;
7358         oo = rfs4_findopenowner(owner, &create, args->seqid);
7359         if (oo == NULL) {
7360                 *cs->statusp = resp->status = NFS4ERR_RESOURCE;
7361                 rfs4_client_rele(cp);
7362                 goto end;
7363         }
7364 
7365         /* Hold off access to the sequence space while the open is done */
7366         rfs4_sw_enter(&oo->ro_sw);
7367 
7368         /*
7369          * If the open_owner existed before at the server, then check
7370          * the sequence id.
7371          */
7372         if (!create && !oo->ro_postpone_confirm) {
7373                 switch (rfs4_check_open_seqid(args->seqid, oo, resop)) {
7374                 case NFS4_CHKSEQ_BAD:
7375                         if ((args->seqid > oo->ro_open_seqid) &&
7376                             oo->ro_need_confirm) {
7377                                 rfs4_free_opens(oo, TRUE, FALSE);
7378                                 rfs4_sw_exit(&oo->ro_sw);
7379                                 rfs4_openowner_rele(oo);
7380                                 goto retry;
7381                         }
7382                         resp->status = NFS4ERR_BAD_SEQID;
7383                         goto out;
7384                 case NFS4_CHKSEQ_REPLAY: /* replay of previous request */
7385                         replay = TRUE;
7386                         goto out;
7387                 default:
7388                         break;
7389                 }
7390 
7391                 /*
7392                  * Sequence was ok and open owner exists
7393                  * check to see if we have yet to see an
7394                  * open_confirm.
7395                  */
7396                 if (oo->ro_need_confirm) {
7397                         rfs4_free_opens(oo, TRUE, FALSE);
7398                         rfs4_sw_exit(&oo->ro_sw);
7399                         rfs4_openowner_rele(oo);
7400                         goto retry;
7401                 }
7402         }
7403         /* Grace only applies to regular-type OPENs */
7404         if (rfs4_clnt_in_grace(cp) &&
7405             (claim == CLAIM_NULL || claim == CLAIM_DELEGATE_CUR)) {
7406                 *cs->statusp = resp->status = NFS4ERR_GRACE;
7407                 goto out;
7408         }
7409 
7410         /*
7411          * If previous state at the server existed then can_reclaim
7412          * will be set. If not reply NFS4ERR_NO_GRACE to the
7413          * client.
7414          */
7415         if (rfs4_clnt_in_grace(cp) && claim == CLAIM_PREVIOUS && !can_reclaim) {
7416                 *cs->statusp = resp->status = NFS4ERR_NO_GRACE;
7417                 goto out;
7418         }
7419 
7420 
7421         /*
7422          * Reject the open if the client has missed the grace period
7423          */
7424         if (!rfs4_clnt_in_grace(cp) && claim == CLAIM_PREVIOUS) {
7425                 *cs->statusp = resp->status = NFS4ERR_NO_GRACE;
7426                 goto out;
7427         }
7428 
7429         /* Couple of up-front bookkeeping items */
7430         if (oo->ro_need_confirm) {
7431                 /*
7432                  * If this is a reclaim OPEN then we should not ask
7433                  * for a confirmation of the open_owner per the
7434                  * protocol specification.
7435                  */
7436                 if (claim == CLAIM_PREVIOUS)
7437                         oo->ro_need_confirm = FALSE;
7438                 else
7439                         resp->rflags |= OPEN4_RESULT_CONFIRM;
7440         }
7441         resp->rflags |= OPEN4_RESULT_LOCKTYPE_POSIX;
7442 
7443         /*
7444          * If there is an unshared filesystem mounted on this vnode,
7445          * do not allow to open/create in this directory.
7446          */
7447         if (vn_ismntpt(cs->vp)) {
7448                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
7449                 goto out;
7450         }
7451 
7452         /*
7453          * access must READ, WRITE, or BOTH.  No access is invalid.
7454          * deny can be READ, WRITE, BOTH, or NONE.
7455          * bits not defined for access/deny are invalid.
7456          */
7457         if (! (args->share_access & OPEN4_SHARE_ACCESS_BOTH) ||
7458             (args->share_access & ~OPEN4_SHARE_ACCESS_BOTH) ||
7459             (args->share_deny & ~OPEN4_SHARE_DENY_BOTH)) {
7460                 *cs->statusp = resp->status = NFS4ERR_INVAL;
7461                 goto out;
7462         }
7463 
7464 
7465         /*
7466          * make sure attrset is zero before response is built.
7467          */
7468         resp->attrset = 0;
7469 
7470         switch (claim) {
7471         case CLAIM_NULL:
7472                 rfs4_do_opennull(cs, req, args, oo, resp);
7473                 break;
7474         case CLAIM_PREVIOUS:
7475                 rfs4_do_openprev(cs, req, args, oo, resp);
7476                 break;
7477         case CLAIM_DELEGATE_CUR:
7478                 rfs4_do_opendelcur(cs, req, args, oo, resp);
7479                 break;
7480         case CLAIM_DELEGATE_PREV:
7481                 rfs4_do_opendelprev(cs, req, args, oo, resp);
7482                 break;
7483         default:
7484                 resp->status = NFS4ERR_INVAL;
7485                 break;
7486         }
7487 
7488 out:
7489         rfs4_client_rele(cp);
7490 
7491         /* Catch sequence id handling here to make it a little easier */
7492         switch (resp->status) {
7493         case NFS4ERR_BADXDR:
7494         case NFS4ERR_BAD_SEQID:
7495         case NFS4ERR_BAD_STATEID:
7496         case NFS4ERR_NOFILEHANDLE:
7497         case NFS4ERR_RESOURCE:
7498         case NFS4ERR_STALE_CLIENTID:
7499         case NFS4ERR_STALE_STATEID:
7500                 /*
7501                  * The protocol states that if any of these errors are
7502                  * being returned, the sequence id should not be
7503                  * incremented.  Any other return requires an
7504                  * increment.
7505                  */
7506                 break;
7507         default:
7508                 /* Always update the lease in this case */
7509                 rfs4_update_lease(oo->ro_client);
7510 
7511                 /* Regular response - copy the result */
7512                 if (!replay)
7513                         rfs4_update_open_resp(oo, resop, &cs->fh);
7514 
7515                 /*
7516                  * REPLAY case: Only if the previous response was OK
7517                  * do we copy the filehandle.  If not OK, no
7518                  * filehandle to copy.
7519                  */
7520                 if (replay == TRUE &&
7521                     resp->status == NFS4_OK &&
7522                     oo->ro_reply_fh.nfs_fh4_val) {
7523                         /*
7524                          * If this is a replay, we must restore the
7525                          * current filehandle/vp to that of what was
7526                          * returned originally.  Try our best to do
7527                          * it.
7528                          */
7529                         nfs_fh4_fmt_t *fh_fmtp =
7530                             (nfs_fh4_fmt_t *)oo->ro_reply_fh.nfs_fh4_val;
7531 
7532                         cs->exi = checkexport4(&fh_fmtp->fh4_fsid,
7533                             (fid_t *)&fh_fmtp->fh4_xlen, NULL);
7534 
7535                         if (cs->exi == NULL) {
7536                                 resp->status = NFS4ERR_STALE;
7537                                 goto finish;
7538                         }
7539 
7540                         VN_RELE(cs->vp);
7541 
7542                         cs->vp = nfs4_fhtovp(&oo->ro_reply_fh, cs->exi,
7543                             &resp->status);
7544 
7545                         if (cs->vp == NULL)
7546                                 goto finish;
7547 
7548                         nfs_fh4_copy(&oo->ro_reply_fh, &cs->fh);
7549                 }
7550 
7551                 /*
7552                  * If this was a replay, no need to update the
7553                  * sequence id. If the open_owner was not created on
7554                  * this pass, then update.  The first use of an
7555                  * open_owner will not bump the sequence id.
7556                  */
7557                 if (replay == FALSE && !create)
7558                         rfs4_update_open_sequence(oo);
7559                 /*
7560                  * If the client is receiving an error and the
7561                  * open_owner needs to be confirmed, there is no way
7562                  * to notify the client of this fact ignoring the fact
7563                  * that the server has no method of returning a
7564                  * stateid to confirm.  Therefore, the server needs to
7565                  * mark this open_owner in a way as to avoid the
7566                  * sequence id checking the next time the client uses
7567                  * this open_owner.
7568                  */
7569                 if (resp->status != NFS4_OK && oo->ro_need_confirm)
7570                         oo->ro_postpone_confirm = TRUE;
7571                 /*
7572                  * If OK response then clear the postpone flag and
7573                  * reset the sequence id to keep in sync with the
7574                  * client.
7575                  */
7576                 if (resp->status == NFS4_OK && oo->ro_postpone_confirm) {
7577                         oo->ro_postpone_confirm = FALSE;
7578                         oo->ro_open_seqid = args->seqid;
7579                 }
7580                 break;
7581         }
7582 
7583 finish:
7584         *cs->statusp = resp->status;
7585 
7586         rfs4_sw_exit(&oo->ro_sw);
7587         rfs4_openowner_rele(oo);
7588 
7589 end:
7590         DTRACE_NFSV4_2(op__open__done, struct compound_state *, cs,
7591             OPEN4res *, resp);
7592 }
7593 
7594 /*ARGSUSED*/
7595 void
7596 rfs4_op_open_confirm(nfs_argop4 *argop, nfs_resop4 *resop,
7597     struct svc_req *req, struct compound_state *cs)
7598 {
7599         OPEN_CONFIRM4args *args = &argop->nfs_argop4_u.opopen_confirm;
7600         OPEN_CONFIRM4res *resp = &resop->nfs_resop4_u.opopen_confirm;
7601         rfs4_state_t *sp;
7602         nfsstat4 status;
7603 
7604         DTRACE_NFSV4_2(op__open__confirm__start, struct compound_state *, cs,
7605             OPEN_CONFIRM4args *, args);
7606 
7607         if (cs->vp == NULL) {
7608                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
7609                 goto out;
7610         }
7611 
7612         if (cs->vp->v_type != VREG) {
7613                 *cs->statusp = resp->status =
7614                     cs->vp->v_type == VDIR ? NFS4ERR_ISDIR : NFS4ERR_INVAL;
7615                 return;
7616         }
7617 
7618         status = rfs4_get_state(&args->open_stateid, &sp, RFS4_DBS_VALID);
7619         if (status != NFS4_OK) {
7620                 *cs->statusp = resp->status = status;
7621                 goto out;
7622         }
7623 
7624         /* Ensure specified filehandle matches */
7625         if (cs->vp != sp->rs_finfo->rf_vp) {
7626                 rfs4_state_rele(sp);
7627                 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7628                 goto out;
7629         }
7630 
7631         /* hold off other access to open_owner while we tinker */
7632         rfs4_sw_enter(&sp->rs_owner->ro_sw);
7633 
7634         switch (rfs4_check_stateid_seqid(sp, &args->open_stateid)) {
7635         case NFS4_CHECK_STATEID_OKAY:
7636                 if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7637                     resop) != 0) {
7638                         *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7639                         break;
7640                 }
7641                 /*
7642                  * If it is the appropriate stateid and determined to
7643                  * be "OKAY" then this means that the stateid does not
7644                  * need to be confirmed and the client is in error for
7645                  * sending an OPEN_CONFIRM.
7646                  */
7647                 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7648                 break;
7649         case NFS4_CHECK_STATEID_OLD:
7650                 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7651                 break;
7652         case NFS4_CHECK_STATEID_BAD:
7653                 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7654                 break;
7655         case NFS4_CHECK_STATEID_EXPIRED:
7656                 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
7657                 break;
7658         case NFS4_CHECK_STATEID_CLOSED:
7659                 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7660                 break;
7661         case NFS4_CHECK_STATEID_REPLAY:
7662                 switch (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7663                     resop)) {
7664                 case NFS4_CHKSEQ_OKAY:
7665                         /*
7666                          * This is replayed stateid; if seqid matches
7667                          * next expected, then client is using wrong seqid.
7668                          */
7669                         /* fall through */
7670                 case NFS4_CHKSEQ_BAD:
7671                         *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7672                         break;
7673                 case NFS4_CHKSEQ_REPLAY:
7674                         /*
7675                          * Note this case is the duplicate case so
7676                          * resp->status is already set.
7677                          */
7678                         *cs->statusp = resp->status;
7679                         rfs4_update_lease(sp->rs_owner->ro_client);
7680                         break;
7681                 }
7682                 break;
7683         case NFS4_CHECK_STATEID_UNCONFIRMED:
7684                 if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7685                     resop) != NFS4_CHKSEQ_OKAY) {
7686                         *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7687                         break;
7688                 }
7689                 *cs->statusp = resp->status = NFS4_OK;
7690 
7691                 next_stateid(&sp->rs_stateid);
7692                 resp->open_stateid = sp->rs_stateid.stateid;
7693                 sp->rs_owner->ro_need_confirm = FALSE;
7694                 rfs4_update_lease(sp->rs_owner->ro_client);
7695                 rfs4_update_open_sequence(sp->rs_owner);
7696                 rfs4_update_open_resp(sp->rs_owner, resop, NULL);
7697                 break;
7698         default:
7699                 ASSERT(FALSE);
7700                 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
7701                 break;
7702         }
7703         rfs4_sw_exit(&sp->rs_owner->ro_sw);
7704         rfs4_state_rele(sp);
7705 
7706 out:
7707         DTRACE_NFSV4_2(op__open__confirm__done, struct compound_state *, cs,
7708             OPEN_CONFIRM4res *, resp);
7709 }
7710 
7711 /*ARGSUSED*/
7712 void
7713 rfs4_op_open_downgrade(nfs_argop4 *argop, nfs_resop4 *resop,
7714     struct svc_req *req, struct compound_state *cs)
7715 {
7716         OPEN_DOWNGRADE4args *args = &argop->nfs_argop4_u.opopen_downgrade;
7717         OPEN_DOWNGRADE4res *resp = &resop->nfs_resop4_u.opopen_downgrade;
7718         uint32_t access = args->share_access;
7719         uint32_t deny = args->share_deny;
7720         nfsstat4 status;
7721         rfs4_state_t *sp;
7722         rfs4_file_t *fp;
7723         int fflags = 0;
7724 
7725         DTRACE_NFSV4_2(op__open__downgrade__start, struct compound_state *, cs,
7726             OPEN_DOWNGRADE4args *, args);
7727 
7728         if (cs->vp == NULL) {
7729                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
7730                 goto out;
7731         }
7732 
7733         if (cs->vp->v_type != VREG) {
7734                 *cs->statusp = resp->status = NFS4ERR_INVAL;
7735                 return;
7736         }
7737 
7738         status = rfs4_get_state(&args->open_stateid, &sp, RFS4_DBS_VALID);
7739         if (status != NFS4_OK) {
7740                 *cs->statusp = resp->status = status;
7741                 goto out;
7742         }
7743 
7744         /* Ensure specified filehandle matches */
7745         if (cs->vp != sp->rs_finfo->rf_vp) {
7746                 rfs4_state_rele(sp);
7747                 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7748                 goto out;
7749         }
7750 
7751         /* hold off other access to open_owner while we tinker */
7752         rfs4_sw_enter(&sp->rs_owner->ro_sw);
7753 
7754         switch (rfs4_check_stateid_seqid(sp, &args->open_stateid)) {
7755         case NFS4_CHECK_STATEID_OKAY:
7756                 if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7757                     resop) != NFS4_CHKSEQ_OKAY) {
7758                         *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7759                         goto end;
7760                 }
7761                 break;
7762         case NFS4_CHECK_STATEID_OLD:
7763                 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7764                 goto end;
7765         case NFS4_CHECK_STATEID_BAD:
7766                 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7767                 goto end;
7768         case NFS4_CHECK_STATEID_EXPIRED:
7769                 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
7770                 goto end;
7771         case NFS4_CHECK_STATEID_CLOSED:
7772                 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7773                 goto end;
7774         case NFS4_CHECK_STATEID_UNCONFIRMED:
7775                 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7776                 goto end;
7777         case NFS4_CHECK_STATEID_REPLAY:
7778                 /* Check the sequence id for the open owner */
7779                 switch (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7780                     resop)) {
7781                 case NFS4_CHKSEQ_OKAY:
7782                         /*
7783                          * This is replayed stateid; if seqid matches
7784                          * next expected, then client is using wrong seqid.
7785                          */
7786                         /* fall through */
7787                 case NFS4_CHKSEQ_BAD:
7788                         *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7789                         goto end;
7790                 case NFS4_CHKSEQ_REPLAY:
7791                         /*
7792                          * Note this case is the duplicate case so
7793                          * resp->status is already set.
7794                          */
7795                         *cs->statusp = resp->status;
7796                         rfs4_update_lease(sp->rs_owner->ro_client);
7797                         goto end;
7798                 }
7799                 break;
7800         default:
7801                 ASSERT(FALSE);
7802                 break;
7803         }
7804 
7805         rfs4_dbe_lock(sp->rs_dbe);
7806         /*
7807          * Check that the new access modes and deny modes are valid.
7808          * Check that no invalid bits are set.
7809          */
7810         if ((access & ~(OPEN4_SHARE_ACCESS_READ | OPEN4_SHARE_ACCESS_WRITE)) ||
7811             (deny & ~(OPEN4_SHARE_DENY_READ | OPEN4_SHARE_DENY_WRITE))) {
7812                 *cs->statusp = resp->status = NFS4ERR_INVAL;
7813                 rfs4_update_open_sequence(sp->rs_owner);
7814                 rfs4_dbe_unlock(sp->rs_dbe);
7815                 goto end;
7816         }
7817 
7818         /*
7819          * The new modes must be a subset of the current modes and
7820          * the access must specify at least one mode. To test that
7821          * the new mode is a subset of the current modes we bitwise
7822          * AND them together and check that the result equals the new
7823          * mode. For example:
7824          * New mode, access == R and current mode, sp->rs_open_access  == RW
7825          * access & sp->rs_open_access == R == access, so the new access mode
7826          * is valid. Consider access == RW, sp->rs_open_access = R
7827          * access & sp->rs_open_access == R != access, so the new access mode
7828          * is invalid.
7829          */
7830         if ((access & sp->rs_open_access) != access ||
7831             (deny & sp->rs_open_deny) != deny ||
7832             (access &
7833             (OPEN4_SHARE_ACCESS_READ | OPEN4_SHARE_ACCESS_WRITE)) == 0) {
7834                 *cs->statusp = resp->status = NFS4ERR_INVAL;
7835                 rfs4_update_open_sequence(sp->rs_owner);
7836                 rfs4_dbe_unlock(sp->rs_dbe);
7837                 goto end;
7838         }
7839 
7840         /*
7841          * Release any share locks associated with this stateID.
7842          * Strictly speaking, this violates the spec because the
7843          * spec effectively requires that open downgrade be atomic.
7844          * At present, fs_shrlock does not have this capability.
7845          */
7846         (void) rfs4_unshare(sp);
7847 
7848         status = rfs4_share(sp, access, deny);
7849         if (status != NFS4_OK) {
7850                 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
7851                 rfs4_update_open_sequence(sp->rs_owner);
7852                 rfs4_dbe_unlock(sp->rs_dbe);
7853                 goto end;
7854         }
7855 
7856         fp = sp->rs_finfo;
7857         rfs4_dbe_lock(fp->rf_dbe);
7858 
7859         /*
7860          * If the current mode has deny read and the new mode
7861          * does not, decrement the number of deny read mode bits
7862          * and if it goes to zero turn off the deny read bit
7863          * on the file.
7864          */
7865         if ((sp->rs_open_deny & OPEN4_SHARE_DENY_READ) &&
7866             (deny & OPEN4_SHARE_DENY_READ) == 0) {
7867                 fp->rf_deny_read--;
7868                 if (fp->rf_deny_read == 0)
7869                         fp->rf_share_deny &= ~OPEN4_SHARE_DENY_READ;
7870         }
7871 
7872         /*
7873          * If the current mode has deny write and the new mode
7874          * does not, decrement the number of deny write mode bits
7875          * and if it goes to zero turn off the deny write bit
7876          * on the file.
7877          */
7878         if ((sp->rs_open_deny & OPEN4_SHARE_DENY_WRITE) &&
7879             (deny & OPEN4_SHARE_DENY_WRITE) == 0) {
7880                 fp->rf_deny_write--;
7881                 if (fp->rf_deny_write == 0)
7882                         fp->rf_share_deny &= ~OPEN4_SHARE_DENY_WRITE;
7883         }
7884 
7885         /*
7886          * If the current mode has access read and the new mode
7887          * does not, decrement the number of access read mode bits
7888          * and if it goes to zero turn off the access read bit
7889          * on the file.  set fflags to FREAD for the call to
7890          * vn_open_downgrade().
7891          */
7892         if ((sp->rs_open_access & OPEN4_SHARE_ACCESS_READ) &&
7893             (access & OPEN4_SHARE_ACCESS_READ) == 0) {
7894                 fp->rf_access_read--;
7895                 if (fp->rf_access_read == 0)
7896                         fp->rf_share_access &= ~OPEN4_SHARE_ACCESS_READ;
7897                 fflags |= FREAD;
7898         }
7899 
7900         /*
7901          * If the current mode has access write and the new mode
7902          * does not, decrement the number of access write mode bits
7903          * and if it goes to zero turn off the access write bit
7904          * on the file.  set fflags to FWRITE for the call to
7905          * vn_open_downgrade().
7906          */
7907         if ((sp->rs_open_access & OPEN4_SHARE_ACCESS_WRITE) &&
7908             (access & OPEN4_SHARE_ACCESS_WRITE) == 0) {
7909                 fp->rf_access_write--;
7910                 if (fp->rf_access_write == 0)
7911                         fp->rf_share_deny &= ~OPEN4_SHARE_ACCESS_WRITE;
7912                 fflags |= FWRITE;
7913         }
7914 
7915         /* Check that the file is still accessible */
7916         ASSERT(fp->rf_share_access);
7917 
7918         rfs4_dbe_unlock(fp->rf_dbe);
7919 
7920         /* now set the new open access and deny modes */
7921         sp->rs_open_access = access;
7922         sp->rs_open_deny = deny;
7923 
7924         /*
7925          * we successfully downgraded the share lock, now we need to downgrade
7926          * the open. it is possible that the downgrade was only for a deny
7927          * mode and we have nothing else to do.
7928          */
7929         if ((fflags & (FREAD|FWRITE)) != 0)
7930                 vn_open_downgrade(cs->vp, fflags);
7931 
7932         /* Update the stateid */
7933         next_stateid(&sp->rs_stateid);
7934         resp->open_stateid = sp->rs_stateid.stateid;
7935 
7936         rfs4_dbe_unlock(sp->rs_dbe);
7937 
7938         *cs->statusp = resp->status = NFS4_OK;
7939         /* Update the lease */
7940         rfs4_update_lease(sp->rs_owner->ro_client);
7941         /* And the sequence */
7942         rfs4_update_open_sequence(sp->rs_owner);
7943         rfs4_update_open_resp(sp->rs_owner, resop, NULL);
7944 
7945 end:
7946         rfs4_sw_exit(&sp->rs_owner->ro_sw);
7947         rfs4_state_rele(sp);
7948 out:
7949         DTRACE_NFSV4_2(op__open__downgrade__done, struct compound_state *, cs,
7950             OPEN_DOWNGRADE4res *, resp);
7951 }
7952 
7953 static void *
7954 memstr(const void *s1, const char *s2, size_t n)
7955 {
7956         size_t l = strlen(s2);
7957         char *p = (char *)s1;
7958 
7959         while (n >= l) {
7960                 if (bcmp(p, s2, l) == 0)
7961                         return (p);
7962                 p++;
7963                 n--;
7964         }
7965 
7966         return (NULL);
7967 }
7968 
7969 /*
7970  * The logic behind this function is detailed in the NFSv4 RFC in the
7971  * SETCLIENTID operation description under IMPLEMENTATION.  Refer to
7972  * that section for explicit guidance to server behavior for
7973  * SETCLIENTID.
7974  */
7975 void
7976 rfs4_op_setclientid(nfs_argop4 *argop, nfs_resop4 *resop,
7977     struct svc_req *req, struct compound_state *cs)
7978 {
7979         SETCLIENTID4args *args = &argop->nfs_argop4_u.opsetclientid;
7980         SETCLIENTID4res *res = &resop->nfs_resop4_u.opsetclientid;
7981         rfs4_client_t *cp, *newcp, *cp_confirmed, *cp_unconfirmed;
7982         rfs4_clntip_t *ci;
7983         bool_t create;
7984         char *addr, *netid;
7985         int len;
7986 
7987         DTRACE_NFSV4_2(op__setclientid__start, struct compound_state *, cs,
7988             SETCLIENTID4args *, args);
7989 retry:
7990         newcp = cp_confirmed = cp_unconfirmed = NULL;
7991 
7992         /*
7993          * Save the caller's IP address
7994          */
7995         args->client.cl_addr =
7996             (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
7997 
7998         /*
7999          * Record if it is a Solaris client that cannot handle referrals.
8000          */
8001         if (memstr(args->client.id_val, "Solaris", args->client.id_len) &&
8002             !memstr(args->client.id_val, "+referrals", args->client.id_len)) {
8003                 /* Add a "yes, it's downrev" record */
8004                 create = TRUE;
8005                 ci = rfs4_find_clntip(args->client.cl_addr, &create);
8006                 ASSERT(ci != NULL);
8007                 rfs4_dbe_rele(ci->ri_dbe);
8008         } else {
8009                 /* Remove any previous record */
8010                 rfs4_invalidate_clntip(args->client.cl_addr);
8011         }
8012 
8013         /*
8014          * In search of an EXISTING client matching the incoming
8015          * request to establish a new client identifier at the server
8016          */
8017         create = TRUE;
8018         cp = rfs4_findclient(&args->client, &create, NULL);
8019 
8020         /* Should never happen */
8021         ASSERT(cp != NULL);
8022 
8023         if (cp == NULL) {
8024                 *cs->statusp = res->status = NFS4ERR_SERVERFAULT;
8025                 goto out;
8026         }
8027 
8028         /*
8029          * Easiest case. Client identifier is newly created and is
8030          * unconfirmed.  Also note that for this case, no other
8031          * entries exist for the client identifier.  Nothing else to
8032          * check.  Just setup the response and respond.
8033          */
8034         if (create) {
8035                 *cs->statusp = res->status = NFS4_OK;
8036                 res->SETCLIENTID4res_u.resok4.clientid = cp->rc_clientid;
8037                 res->SETCLIENTID4res_u.resok4.setclientid_confirm =
8038                     cp->rc_confirm_verf;
8039                 /* Setup callback information; CB_NULL confirmation later */
8040                 rfs4_client_setcb(cp, &args->callback, args->callback_ident);
8041 
8042                 rfs4_client_rele(cp);
8043                 goto out;
8044         }
8045 
8046         /*
8047          * An existing, confirmed client may exist but it may not have
8048          * been active for at least one lease period.  If so, then
8049          * "close" the client and create a new client identifier
8050          */
8051         if (rfs4_lease_expired(cp)) {
8052                 rfs4_client_close(cp);
8053                 goto retry;
8054         }
8055 
8056         if (cp->rc_need_confirm == TRUE)
8057                 cp_unconfirmed = cp;
8058         else
8059                 cp_confirmed = cp;
8060 
8061         cp = NULL;
8062 
8063         /*
8064          * We have a confirmed client, now check for an
8065          * unconfimred entry
8066          */
8067         if (cp_confirmed) {
8068                 /* If creds don't match then client identifier is inuse */
8069                 if (!creds_ok(cp_confirmed->rc_cr_set, req, cs)) {
8070                         rfs4_cbinfo_t *cbp;
8071                         /*
8072                          * Some one else has established this client
8073                          * id. Try and say * who they are. We will use
8074                          * the call back address supplied by * the
8075                          * first client.
8076                          */
8077                         *cs->statusp = res->status = NFS4ERR_CLID_INUSE;
8078 
8079                         addr = netid = NULL;
8080 
8081                         cbp = &cp_confirmed->rc_cbinfo;
8082                         if (cbp->cb_callback.cb_location.r_addr &&
8083                             cbp->cb_callback.cb_location.r_netid) {
8084                                 cb_client4 *cbcp = &cbp->cb_callback;
8085 
8086                                 len = strlen(cbcp->cb_location.r_addr)+1;
8087                                 addr = kmem_alloc(len, KM_SLEEP);
8088                                 bcopy(cbcp->cb_location.r_addr, addr, len);
8089                                 len = strlen(cbcp->cb_location.r_netid)+1;
8090                                 netid = kmem_alloc(len, KM_SLEEP);
8091                                 bcopy(cbcp->cb_location.r_netid, netid, len);
8092                         }
8093 
8094                         res->SETCLIENTID4res_u.client_using.r_addr = addr;
8095                         res->SETCLIENTID4res_u.client_using.r_netid = netid;
8096 
8097                         rfs4_client_rele(cp_confirmed);
8098                 }
8099 
8100                 /*
8101                  * Confirmed, creds match, and verifier matches; must
8102                  * be an update of the callback info
8103                  */
8104                 if (cp_confirmed->rc_nfs_client.verifier ==
8105                     args->client.verifier) {
8106                         /* Setup callback information */
8107                         rfs4_client_setcb(cp_confirmed, &args->callback,
8108                             args->callback_ident);
8109 
8110                         /* everything okay -- move ahead */
8111                         *cs->statusp = res->status = NFS4_OK;
8112                         res->SETCLIENTID4res_u.resok4.clientid =
8113                             cp_confirmed->rc_clientid;
8114 
8115                         /* update the confirm_verifier and return it */
8116                         rfs4_client_scv_next(cp_confirmed);
8117                         res->SETCLIENTID4res_u.resok4.setclientid_confirm =
8118                             cp_confirmed->rc_confirm_verf;
8119 
8120                         rfs4_client_rele(cp_confirmed);
8121                         goto out;
8122                 }
8123 
8124                 /*
8125                  * Creds match but the verifier doesn't.  Must search
8126                  * for an unconfirmed client that would be replaced by
8127                  * this request.
8128                  */
8129                 create = FALSE;
8130                 cp_unconfirmed = rfs4_findclient(&args->client, &create,
8131                     cp_confirmed);
8132         }
8133 
8134         /*
8135          * At this point, we have taken care of the brand new client
8136          * struct, INUSE case, update of an existing, and confirmed
8137          * client struct.
8138          */
8139 
8140         /*
8141          * check to see if things have changed while we originally
8142          * picked up the client struct.  If they have, then return and
8143          * retry the processing of this SETCLIENTID request.
8144          */
8145         if (cp_unconfirmed) {
8146                 rfs4_dbe_lock(cp_unconfirmed->rc_dbe);
8147                 if (!cp_unconfirmed->rc_need_confirm) {
8148                         rfs4_dbe_unlock(cp_unconfirmed->rc_dbe);
8149                         rfs4_client_rele(cp_unconfirmed);
8150                         if (cp_confirmed)
8151                                 rfs4_client_rele(cp_confirmed);
8152                         goto retry;
8153                 }
8154                 /* do away with the old unconfirmed one */
8155                 rfs4_dbe_invalidate(cp_unconfirmed->rc_dbe);
8156                 rfs4_dbe_unlock(cp_unconfirmed->rc_dbe);
8157                 rfs4_client_rele(cp_unconfirmed);
8158                 cp_unconfirmed = NULL;
8159         }
8160 
8161         /*
8162          * This search will temporarily hide the confirmed client
8163          * struct while a new client struct is created as the
8164          * unconfirmed one.
8165          */
8166         create = TRUE;
8167         newcp = rfs4_findclient(&args->client, &create, cp_confirmed);
8168 
8169         ASSERT(newcp != NULL);
8170 
8171         if (newcp == NULL) {
8172                 *cs->statusp = res->status = NFS4ERR_SERVERFAULT;
8173                 rfs4_client_rele(cp_confirmed);
8174                 goto out;
8175         }
8176 
8177         /*
8178          * If one was not created, then a similar request must be in
8179          * process so release and start over with this one
8180          */
8181         if (create != TRUE) {
8182                 rfs4_client_rele(newcp);
8183                 if (cp_confirmed)
8184                         rfs4_client_rele(cp_confirmed);
8185                 goto retry;
8186         }
8187 
8188         *cs->statusp = res->status = NFS4_OK;
8189         res->SETCLIENTID4res_u.resok4.clientid = newcp->rc_clientid;
8190         res->SETCLIENTID4res_u.resok4.setclientid_confirm =
8191             newcp->rc_confirm_verf;
8192         /* Setup callback information; CB_NULL confirmation later */
8193         rfs4_client_setcb(newcp, &args->callback, args->callback_ident);
8194 
8195         newcp->rc_cp_confirmed = cp_confirmed;
8196 
8197         rfs4_client_rele(newcp);
8198 
8199 out:
8200         DTRACE_NFSV4_2(op__setclientid__done, struct compound_state *, cs,
8201             SETCLIENTID4res *, res);
8202 }
8203 
8204 /*ARGSUSED*/
8205 void
8206 rfs4_op_setclientid_confirm(nfs_argop4 *argop, nfs_resop4 *resop,
8207     struct svc_req *req, struct compound_state *cs)
8208 {
8209         SETCLIENTID_CONFIRM4args *args =
8210             &argop->nfs_argop4_u.opsetclientid_confirm;
8211         SETCLIENTID_CONFIRM4res *res =
8212             &resop->nfs_resop4_u.opsetclientid_confirm;
8213         rfs4_client_t *cp, *cptoclose = NULL;
8214         nfs4_srv_t *nsrv4;
8215 
8216         DTRACE_NFSV4_2(op__setclientid__confirm__start,
8217             struct compound_state *, cs,
8218             SETCLIENTID_CONFIRM4args *, args);
8219 
8220         nsrv4 = zone_getspecific(rfs4_zone_key, curzone);
8221         *cs->statusp = res->status = NFS4_OK;
8222 
8223         cp = rfs4_findclient_by_id(args->clientid, TRUE);
8224 
8225         if (cp == NULL) {
8226                 *cs->statusp = res->status =
8227                     rfs4_check_clientid(&args->clientid, 1);
8228                 goto out;
8229         }
8230 
8231         if (!creds_ok(cp, req, cs)) {
8232                 *cs->statusp = res->status = NFS4ERR_CLID_INUSE;
8233                 rfs4_client_rele(cp);
8234                 goto out;
8235         }
8236 
8237         /* If the verifier doesn't match, the record doesn't match */
8238         if (cp->rc_confirm_verf != args->setclientid_confirm) {
8239                 *cs->statusp = res->status = NFS4ERR_STALE_CLIENTID;
8240                 rfs4_client_rele(cp);
8241                 goto out;
8242         }
8243 
8244         rfs4_dbe_lock(cp->rc_dbe);
8245         cp->rc_need_confirm = FALSE;
8246         if (cp->rc_cp_confirmed) {
8247                 cptoclose = cp->rc_cp_confirmed;
8248                 cptoclose->rc_ss_remove = 1;
8249                 cp->rc_cp_confirmed = NULL;
8250         }
8251 
8252         /*
8253          * Update the client's associated server instance, if it's changed
8254          * since the client was created.
8255          */
8256         if (rfs4_servinst(cp) != nsrv4->nfs4_cur_servinst)
8257                 rfs4_servinst_assign(nsrv4, cp, nsrv4->nfs4_cur_servinst);
8258 
8259         /*
8260          * Record clientid in stable storage.
8261          * Must be done after server instance has been assigned.
8262          */
8263         rfs4_ss_clid(nsrv4, cp);
8264 
8265         rfs4_dbe_unlock(cp->rc_dbe);
8266 
8267         if (cptoclose)
8268                 /* don't need to rele, client_close does it */
8269                 rfs4_client_close(cptoclose);
8270 
8271         /* If needed, initiate CB_NULL call for callback path */
8272         rfs4_deleg_cb_check(cp);
8273         rfs4_update_lease(cp);
8274 
8275         /*
8276          * Check to see if client can perform reclaims
8277          */
8278         rfs4_ss_chkclid(nsrv4, cp);
8279 
8280         rfs4_client_rele(cp);
8281 
8282 out:
8283         DTRACE_NFSV4_2(op__setclientid__confirm__done,
8284             struct compound_state *, cs,
8285             SETCLIENTID_CONFIRM4 *, res);
8286 }
8287 
8288 
8289 /*ARGSUSED*/
8290 void
8291 rfs4_op_close(nfs_argop4 *argop, nfs_resop4 *resop,
8292     struct svc_req *req, struct compound_state *cs)
8293 {
8294         CLOSE4args *args = &argop->nfs_argop4_u.opclose;
8295         CLOSE4res *resp = &resop->nfs_resop4_u.opclose;
8296         rfs4_state_t *sp;
8297         nfsstat4 status;
8298 
8299         DTRACE_NFSV4_2(op__close__start, struct compound_state *, cs,
8300             CLOSE4args *, args);
8301 
8302         if (cs->vp == NULL) {
8303                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
8304                 goto out;
8305         }
8306 
8307         status = rfs4_get_state(&args->open_stateid, &sp, RFS4_DBS_INVALID);
8308         if (status != NFS4_OK) {
8309                 *cs->statusp = resp->status = status;
8310                 goto out;
8311         }
8312 
8313         /* Ensure specified filehandle matches */
8314         if (cs->vp != sp->rs_finfo->rf_vp) {
8315                 rfs4_state_rele(sp);
8316                 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8317                 goto out;
8318         }
8319 
8320         /* hold off other access to open_owner while we tinker */
8321         rfs4_sw_enter(&sp->rs_owner->ro_sw);
8322 
8323         switch (rfs4_check_stateid_seqid(sp, &args->open_stateid)) {
8324         case NFS4_CHECK_STATEID_OKAY:
8325                 if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
8326                     resop) != NFS4_CHKSEQ_OKAY) {
8327                         *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8328                         goto end;
8329                 }
8330                 break;
8331         case NFS4_CHECK_STATEID_OLD:
8332                 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8333                 goto end;
8334         case NFS4_CHECK_STATEID_BAD:
8335                 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8336                 goto end;
8337         case NFS4_CHECK_STATEID_EXPIRED:
8338                 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
8339                 goto end;
8340         case NFS4_CHECK_STATEID_CLOSED:
8341                 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8342                 goto end;
8343         case NFS4_CHECK_STATEID_UNCONFIRMED:
8344                 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8345                 goto end;
8346         case NFS4_CHECK_STATEID_REPLAY:
8347                 /* Check the sequence id for the open owner */
8348                 switch (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
8349                     resop)) {
8350                 case NFS4_CHKSEQ_OKAY:
8351                         /*
8352                          * This is replayed stateid; if seqid matches
8353                          * next expected, then client is using wrong seqid.
8354                          */
8355                         /* FALL THROUGH */
8356                 case NFS4_CHKSEQ_BAD:
8357                         *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8358                         goto end;
8359                 case NFS4_CHKSEQ_REPLAY:
8360                         /*
8361                          * Note this case is the duplicate case so
8362                          * resp->status is already set.
8363                          */
8364                         *cs->statusp = resp->status;
8365                         rfs4_update_lease(sp->rs_owner->ro_client);
8366                         goto end;
8367                 }
8368                 break;
8369         default:
8370                 ASSERT(FALSE);
8371                 break;
8372         }
8373 
8374         rfs4_dbe_lock(sp->rs_dbe);
8375 
8376         /* Update the stateid. */
8377         next_stateid(&sp->rs_stateid);
8378         resp->open_stateid = sp->rs_stateid.stateid;
8379 
8380         rfs4_dbe_unlock(sp->rs_dbe);
8381 
8382         rfs4_update_lease(sp->rs_owner->ro_client);
8383         rfs4_update_open_sequence(sp->rs_owner);
8384         rfs4_update_open_resp(sp->rs_owner, resop, NULL);
8385 
8386         rfs4_state_close(sp, FALSE, FALSE, cs->cr);
8387 
8388         *cs->statusp = resp->status = status;
8389 
8390 end:
8391         rfs4_sw_exit(&sp->rs_owner->ro_sw);
8392         rfs4_state_rele(sp);
8393 out:
8394         DTRACE_NFSV4_2(op__close__done, struct compound_state *, cs,
8395             CLOSE4res *, resp);
8396 }
8397 
8398 /*
8399  * Manage the counts on the file struct and close all file locks
8400  */
8401 /*ARGSUSED*/
8402 void
8403 rfs4_release_share_lock_state(rfs4_state_t *sp, cred_t *cr,
8404     bool_t close_of_client)
8405 {
8406         rfs4_file_t *fp = sp->rs_finfo;
8407         rfs4_lo_state_t *lsp;
8408         int fflags = 0;
8409 
8410         /*
8411          * If this call is part of the larger closing down of client
8412          * state then it is just easier to release all locks
8413          * associated with this client instead of going through each
8414          * individual file and cleaning locks there.
8415          */
8416         if (close_of_client) {
8417                 if (sp->rs_owner->ro_client->rc_unlksys_completed == FALSE &&
8418                     !list_is_empty(&sp->rs_lostatelist) &&
8419                     sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID) {
8420                         /* Is the PxFS kernel module loaded? */
8421                         if (lm_remove_file_locks != NULL) {
8422                                 int new_sysid;
8423 
8424                                 /* Encode the cluster nodeid in new sysid */
8425                                 new_sysid = sp->rs_owner->ro_client->rc_sysidt;
8426                                 lm_set_nlmid_flk(&new_sysid);
8427 
8428                                 /*
8429                                  * This PxFS routine removes file locks for a
8430                                  * client over all nodes of a cluster.
8431                                  */
8432                                 NFS4_DEBUG(rfs4_debug, (CE_NOTE,
8433                                     "lm_remove_file_locks(sysid=0x%x)\n",
8434                                     new_sysid));
8435                                 (*lm_remove_file_locks)(new_sysid);
8436                         } else {
8437                                 struct flock64 flk;
8438 
8439                                 /* Release all locks for this client */
8440                                 flk.l_type = F_UNLKSYS;
8441                                 flk.l_whence = 0;
8442                                 flk.l_start = 0;
8443                                 flk.l_len = 0;
8444                                 flk.l_sysid =
8445                                     sp->rs_owner->ro_client->rc_sysidt;
8446                                 flk.l_pid = 0;
8447                                 (void) VOP_FRLOCK(sp->rs_finfo->rf_vp, F_SETLK,
8448                                     &flk, F_REMOTELOCK | FREAD | FWRITE,
8449                                     (u_offset_t)0, NULL, CRED(), NULL);
8450                         }
8451 
8452                         sp->rs_owner->ro_client->rc_unlksys_completed = TRUE;
8453                 }
8454         }
8455 
8456         /*
8457          * Release all locks on this file by this lock owner or at
8458          * least mark the locks as having been released
8459          */
8460         for (lsp = list_head(&sp->rs_lostatelist); lsp != NULL;
8461             lsp = list_next(&sp->rs_lostatelist, lsp)) {
8462                 lsp->rls_locks_cleaned = TRUE;
8463 
8464                 /* Was this already taken care of above? */
8465                 if (!close_of_client &&
8466                     sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID)
8467                         (void) cleanlocks(sp->rs_finfo->rf_vp,
8468                             lsp->rls_locker->rl_pid,
8469                             lsp->rls_locker->rl_client->rc_sysidt);
8470         }
8471 
8472         /*
8473          * Release any shrlocks associated with this open state ID.
8474          * This must be done before the rfs4_state gets marked closed.
8475          */
8476         if (sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID)
8477                 (void) rfs4_unshare(sp);
8478 
8479         if (sp->rs_open_access) {
8480                 rfs4_dbe_lock(fp->rf_dbe);
8481 
8482                 /*
8483                  * Decrement the count for each access and deny bit that this
8484                  * state has contributed to the file.
8485                  * If the file counts go to zero
8486                  * clear the appropriate bit in the appropriate mask.
8487                  */
8488                 if (sp->rs_open_access & OPEN4_SHARE_ACCESS_READ) {
8489                         fp->rf_access_read--;
8490                         fflags |= FREAD;
8491                         if (fp->rf_access_read == 0)
8492                                 fp->rf_share_access &= ~OPEN4_SHARE_ACCESS_READ;
8493                 }
8494                 if (sp->rs_open_access & OPEN4_SHARE_ACCESS_WRITE) {
8495                         fp->rf_access_write--;
8496                         fflags |= FWRITE;
8497                         if (fp->rf_access_write == 0)
8498                                 fp->rf_share_access &=
8499                                     ~OPEN4_SHARE_ACCESS_WRITE;
8500                 }
8501                 if (sp->rs_open_deny & OPEN4_SHARE_DENY_READ) {
8502                         fp->rf_deny_read--;
8503                         if (fp->rf_deny_read == 0)
8504                                 fp->rf_share_deny &= ~OPEN4_SHARE_DENY_READ;
8505                 }
8506                 if (sp->rs_open_deny & OPEN4_SHARE_DENY_WRITE) {
8507                         fp->rf_deny_write--;
8508                         if (fp->rf_deny_write == 0)
8509                                 fp->rf_share_deny &= ~OPEN4_SHARE_DENY_WRITE;
8510                 }
8511 
8512                 (void) VOP_CLOSE(fp->rf_vp, fflags, 1, (offset_t)0, cr, NULL);
8513 
8514                 rfs4_dbe_unlock(fp->rf_dbe);
8515 
8516                 sp->rs_open_access = 0;
8517                 sp->rs_open_deny = 0;
8518         }
8519 }
8520 
8521 /*
8522  * lock_denied: Fill in a LOCK4deneid structure given an flock64 structure.
8523  */
8524 static nfsstat4
8525 lock_denied(LOCK4denied *dp, struct flock64 *flk)
8526 {
8527         rfs4_lockowner_t *lo;
8528         rfs4_client_t *cp;
8529         uint32_t len;
8530 
8531         lo = rfs4_findlockowner_by_pid(flk->l_pid);
8532         if (lo != NULL) {
8533                 cp = lo->rl_client;
8534                 if (rfs4_lease_expired(cp)) {
8535                         rfs4_lockowner_rele(lo);
8536                         rfs4_dbe_hold(cp->rc_dbe);
8537                         rfs4_client_close(cp);
8538                         return (NFS4ERR_EXPIRED);
8539                 }
8540                 dp->owner.clientid = lo->rl_owner.clientid;
8541                 len = lo->rl_owner.owner_len;
8542                 dp->owner.owner_val = kmem_alloc(len, KM_SLEEP);
8543                 bcopy(lo->rl_owner.owner_val, dp->owner.owner_val, len);
8544                 dp->owner.owner_len = len;
8545                 rfs4_lockowner_rele(lo);
8546                 goto finish;
8547         }
8548 
8549         /*
8550          * Its not a NFS4 lock. We take advantage that the upper 32 bits
8551          * of the client id contain the boot time for a NFS4 lock. So we
8552          * fabricate and identity by setting clientid to the sysid, and
8553          * the lock owner to the pid.
8554          */
8555         dp->owner.clientid = flk->l_sysid;
8556         len = sizeof (pid_t);
8557         dp->owner.owner_len = len;
8558         dp->owner.owner_val = kmem_alloc(len, KM_SLEEP);
8559         bcopy(&flk->l_pid, dp->owner.owner_val, len);
8560 finish:
8561         dp->offset = flk->l_start;
8562         dp->length = flk->l_len;
8563 
8564         if (flk->l_type == F_RDLCK)
8565                 dp->locktype = READ_LT;
8566         else if (flk->l_type == F_WRLCK)
8567                 dp->locktype = WRITE_LT;
8568         else
8569                 return (NFS4ERR_INVAL); /* no mapping from POSIX ltype to v4 */
8570 
8571         return (NFS4_OK);
8572 }
8573 
8574 /*
8575  * The NFSv4.0 LOCK operation does not support the blocking lock (at the
8576  * NFSv4.0 protocol level) so the client needs to resend the LOCK request in a
8577  * case the lock is denied by the NFSv4.0 server.  NFSv4.0 clients are prepared
8578  * for that (obviously); they are sending the LOCK requests with some delays
8579  * between the attempts.  See nfs4frlock() and nfs4_block_and_wait() for the
8580  * locking and delay implementation at the client side.
8581  *
8582  * To make the life of the clients easier, the NFSv4.0 server tries to do some
8583  * fast retries on its own (the for loop below) in a hope the lock will be
8584  * available soon.  And if not, the client won't need to resend the LOCK
8585  * requests so fast to check the lock availability.  This basically saves some
8586  * network traffic and tries to make sure the client gets the lock ASAP.
8587  */
8588 static int
8589 setlock(vnode_t *vp, struct flock64 *flock, int flag, cred_t *cred)
8590 {
8591         int error;
8592         struct flock64 flk;
8593         int i;
8594         clock_t delaytime;
8595         int cmd;
8596         int spin_cnt = 0;
8597 
8598         cmd = nbl_need_check(vp) ? F_SETLK_NBMAND : F_SETLK;
8599 retry:
8600         delaytime = MSEC_TO_TICK_ROUNDUP(rfs4_lock_delay);
8601 
8602         for (i = 0; i < rfs4_maxlock_tries; i++) {
8603                 LOCK_PRINT(rfs4_debug, "setlock", cmd, flock);
8604                 error = VOP_FRLOCK(vp, cmd,
8605                     flock, flag, (u_offset_t)0, NULL, cred, NULL);
8606 
8607                 if (error != EAGAIN && error != EACCES)
8608                         break;
8609 
8610                 if (i < rfs4_maxlock_tries - 1) {
8611                         delay(delaytime);
8612                         delaytime *= 2;
8613                 }
8614         }
8615 
8616         if (error == EAGAIN || error == EACCES) {
8617                 /* Get the owner of the lock */
8618                 flk = *flock;
8619                 LOCK_PRINT(rfs4_debug, "setlock", F_GETLK, &flk);
8620                 if (VOP_FRLOCK(vp, F_GETLK, &flk, flag, 0, NULL, cred,
8621                     NULL) == 0) {
8622                         /*
8623                          * There's a race inherent in the current VOP_FRLOCK
8624                          * design where:
8625                          * a: "other guy" takes a lock that conflicts with a
8626                          * lock we want
8627                          * b: we attempt to take our lock (non-blocking) and
8628                          * the attempt fails.
8629                          * c: "other guy" releases the conflicting lock
8630                          * d: we ask what lock conflicts with the lock we want,
8631                          * getting F_UNLCK (no lock blocks us)
8632                          *
8633                          * If we retry the non-blocking lock attempt in this
8634                          * case (restart at step 'b') there's some possibility
8635                          * that many such attempts might fail.  However a test
8636                          * designed to actually provoke this race shows that
8637                          * the vast majority of cases require no retry, and
8638                          * only a few took as many as three retries.  Here's
8639                          * the test outcome:
8640                          *
8641                          *         number of retries    how many times we needed
8642                          *                              that many retries
8643                          *         0                    79461
8644                          *         1                      862
8645                          *         2                       49
8646                          *         3                        5
8647                          *
8648                          * Given those empirical results, we arbitrarily limit
8649                          * the retry count to ten.
8650                          *
8651                          * If we actually make to ten retries and give up,
8652                          * nothing catastrophic happens, but we're unable to
8653                          * return the information about the conflicting lock to
8654                          * the NFS client.  That's an acceptable trade off vs.
8655                          * letting this retry loop run forever.
8656                          */
8657                         if (flk.l_type == F_UNLCK) {
8658                                 if (spin_cnt++ < 10) {
8659                                         /* No longer locked, retry */
8660                                         goto retry;
8661                                 }
8662                         } else {
8663                                 *flock = flk;
8664                                 LOCK_PRINT(rfs4_debug, "setlock(blocking lock)",
8665                                     F_GETLK, &flk);
8666                         }
8667                 }
8668         }
8669 
8670         return (error);
8671 }
8672 
8673 /*ARGSUSED*/
8674 static nfsstat4
8675 rfs4_do_lock(rfs4_lo_state_t *lsp, nfs_lock_type4 locktype,
8676     offset4 offset, length4 length, cred_t *cred, nfs_resop4 *resop)
8677 {
8678         nfsstat4 status;
8679         rfs4_lockowner_t *lo = lsp->rls_locker;
8680         rfs4_state_t *sp = lsp->rls_state;
8681         struct flock64 flock;
8682         int16_t ltype;
8683         int flag;
8684         int error;
8685         sysid_t sysid;
8686         LOCK4res *lres;
8687         vnode_t *vp;
8688 
8689         if (rfs4_lease_expired(lo->rl_client)) {
8690                 return (NFS4ERR_EXPIRED);
8691         }
8692 
8693         if ((status = rfs4_client_sysid(lo->rl_client, &sysid)) != NFS4_OK)
8694                 return (status);
8695 
8696         /* Check for zero length. To lock to end of file use all ones for V4 */
8697         if (length == 0)
8698                 return (NFS4ERR_INVAL);
8699         else if (length == (length4)(~0))
8700                 length = 0;             /* Posix to end of file  */
8701 
8702 retry:
8703         rfs4_dbe_lock(sp->rs_dbe);
8704         if (sp->rs_closed == TRUE) {
8705                 rfs4_dbe_unlock(sp->rs_dbe);
8706                 return (NFS4ERR_OLD_STATEID);
8707         }
8708 
8709         if (resop->resop != OP_LOCKU) {
8710                 switch (locktype) {
8711                 case READ_LT:
8712                 case READW_LT:
8713                         if ((sp->rs_share_access
8714                             & OPEN4_SHARE_ACCESS_READ) == 0) {
8715                                 rfs4_dbe_unlock(sp->rs_dbe);
8716 
8717                                 return (NFS4ERR_OPENMODE);
8718                         }
8719                         ltype = F_RDLCK;
8720                         break;
8721                 case WRITE_LT:
8722                 case WRITEW_LT:
8723                         if ((sp->rs_share_access
8724                             & OPEN4_SHARE_ACCESS_WRITE) == 0) {
8725                                 rfs4_dbe_unlock(sp->rs_dbe);
8726 
8727                                 return (NFS4ERR_OPENMODE);
8728                         }
8729                         ltype = F_WRLCK;
8730                         break;
8731                 }
8732         } else
8733                 ltype = F_UNLCK;
8734 
8735         flock.l_type = ltype;
8736         flock.l_whence = 0;             /* SEEK_SET */
8737         flock.l_start = offset;
8738         flock.l_len = length;
8739         flock.l_sysid = sysid;
8740         flock.l_pid = lsp->rls_locker->rl_pid;
8741 
8742         /* Note that length4 is uint64_t but l_len and l_start are off64_t */
8743         if (flock.l_len < 0 || flock.l_start < 0) {
8744                 rfs4_dbe_unlock(sp->rs_dbe);
8745                 return (NFS4ERR_INVAL);
8746         }
8747 
8748         /*
8749          * N.B. FREAD has the same value as OPEN4_SHARE_ACCESS_READ and
8750          * FWRITE has the same value as OPEN4_SHARE_ACCESS_WRITE.
8751          */
8752         flag = (int)sp->rs_share_access | F_REMOTELOCK;
8753 
8754         vp = sp->rs_finfo->rf_vp;
8755         VN_HOLD(vp);
8756 
8757         /*
8758          * We need to unlock sp before we call the underlying filesystem to
8759          * acquire the file lock.
8760          */
8761         rfs4_dbe_unlock(sp->rs_dbe);
8762 
8763         error = setlock(vp, &flock, flag, cred);
8764 
8765         /*
8766          * Make sure the file is still open.  In a case the file was closed in
8767          * the meantime, clean the lock we acquired using the setlock() call
8768          * above, and return the appropriate error.
8769          */
8770         rfs4_dbe_lock(sp->rs_dbe);
8771         if (sp->rs_closed == TRUE) {
8772                 cleanlocks(vp, lsp->rls_locker->rl_pid, sysid);
8773                 rfs4_dbe_unlock(sp->rs_dbe);
8774 
8775                 VN_RELE(vp);
8776 
8777                 return (NFS4ERR_OLD_STATEID);
8778         }
8779         rfs4_dbe_unlock(sp->rs_dbe);
8780 
8781         VN_RELE(vp);
8782 
8783         if (error == 0) {
8784                 rfs4_dbe_lock(lsp->rls_dbe);
8785                 next_stateid(&lsp->rls_lockid);
8786                 rfs4_dbe_unlock(lsp->rls_dbe);
8787         }
8788 
8789         /*
8790          * N.B. We map error values to nfsv4 errors. This is differrent
8791          * than puterrno4 routine.
8792          */
8793         switch (error) {
8794         case 0:
8795                 status = NFS4_OK;
8796                 break;
8797         case EAGAIN:
8798         case EACCES:            /* Old value */
8799                 /* Can only get here if op is OP_LOCK */
8800                 ASSERT(resop->resop == OP_LOCK);
8801                 lres = &resop->nfs_resop4_u.oplock;
8802                 status = NFS4ERR_DENIED;
8803                 if (lock_denied(&lres->LOCK4res_u.denied, &flock)
8804                     == NFS4ERR_EXPIRED)
8805                         goto retry;
8806                 break;
8807         case ENOLCK:
8808                 status = NFS4ERR_DELAY;
8809                 break;
8810         case EOVERFLOW:
8811                 status = NFS4ERR_INVAL;
8812                 break;
8813         case EINVAL:
8814                 status = NFS4ERR_NOTSUPP;
8815                 break;
8816         default:
8817                 status = NFS4ERR_SERVERFAULT;
8818                 break;
8819         }
8820 
8821         return (status);
8822 }
8823 
8824 /*ARGSUSED*/
8825 void
8826 rfs4_op_lock(nfs_argop4 *argop, nfs_resop4 *resop,
8827     struct svc_req *req, struct compound_state *cs)
8828 {
8829         LOCK4args *args = &argop->nfs_argop4_u.oplock;
8830         LOCK4res *resp = &resop->nfs_resop4_u.oplock;
8831         nfsstat4 status;
8832         stateid4 *stateid;
8833         rfs4_lockowner_t *lo;
8834         rfs4_client_t *cp;
8835         rfs4_state_t *sp = NULL;
8836         rfs4_lo_state_t *lsp = NULL;
8837         bool_t ls_sw_held = FALSE;
8838         bool_t create = TRUE;
8839         bool_t lcreate = TRUE;
8840         bool_t dup_lock = FALSE;
8841         int rc;
8842 
8843         DTRACE_NFSV4_2(op__lock__start, struct compound_state *, cs,
8844             LOCK4args *, args);
8845 
8846         if (cs->vp == NULL) {
8847                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
8848                 DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
8849                     cs, LOCK4res *, resp);
8850                 return;
8851         }
8852 
8853         if (args->locker.new_lock_owner) {
8854                 /* Create a new lockowner for this instance */
8855                 open_to_lock_owner4 *olo = &args->locker.locker4_u.open_owner;
8856 
8857                 NFS4_DEBUG(rfs4_debug, (CE_NOTE, "Creating new lock owner"));
8858 
8859                 stateid = &olo->open_stateid;
8860                 status = rfs4_get_state(stateid, &sp, RFS4_DBS_VALID);
8861                 if (status != NFS4_OK) {
8862                         NFS4_DEBUG(rfs4_debug,
8863                             (CE_NOTE, "Get state failed in lock %d", status));
8864                         *cs->statusp = resp->status = status;
8865                         DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
8866                             cs, LOCK4res *, resp);
8867                         return;
8868                 }
8869 
8870                 /* Ensure specified filehandle matches */
8871                 if (cs->vp != sp->rs_finfo->rf_vp) {
8872                         rfs4_state_rele(sp);
8873                         *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8874                         DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
8875                             cs, LOCK4res *, resp);
8876                         return;
8877                 }
8878 
8879                 /* hold off other access to open_owner while we tinker */
8880                 rfs4_sw_enter(&sp->rs_owner->ro_sw);
8881 
8882                 switch (rc = rfs4_check_stateid_seqid(sp, stateid)) {
8883                 case NFS4_CHECK_STATEID_OLD:
8884                         *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8885                         goto end;
8886                 case NFS4_CHECK_STATEID_BAD:
8887                         *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8888                         goto end;
8889                 case NFS4_CHECK_STATEID_EXPIRED:
8890                         *cs->statusp = resp->status = NFS4ERR_EXPIRED;
8891                         goto end;
8892                 case NFS4_CHECK_STATEID_UNCONFIRMED:
8893                         *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8894                         goto end;
8895                 case NFS4_CHECK_STATEID_CLOSED:
8896                         *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8897                         goto end;
8898                 case NFS4_CHECK_STATEID_OKAY:
8899                 case NFS4_CHECK_STATEID_REPLAY:
8900                         switch (rfs4_check_olo_seqid(olo->open_seqid,
8901                             sp->rs_owner, resop)) {
8902                         case NFS4_CHKSEQ_OKAY:
8903                                 if (rc == NFS4_CHECK_STATEID_OKAY)
8904                                         break;
8905                                 /*
8906                                  * This is replayed stateid; if seqid
8907                                  * matches next expected, then client
8908                                  * is using wrong seqid.
8909                                  */
8910                                 /* FALLTHROUGH */
8911                         case NFS4_CHKSEQ_BAD:
8912                                 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8913                                 goto end;
8914                         case NFS4_CHKSEQ_REPLAY:
8915                                 /* This is a duplicate LOCK request */
8916                                 dup_lock = TRUE;
8917 
8918                                 /*
8919                                  * For a duplicate we do not want to
8920                                  * create a new lockowner as it should
8921                                  * already exist.
8922                                  * Turn off the lockowner create flag.
8923                                  */
8924                                 lcreate = FALSE;
8925                         }
8926                         break;
8927                 }
8928 
8929                 lo = rfs4_findlockowner(&olo->lock_owner, &lcreate);
8930                 if (lo == NULL) {
8931                         NFS4_DEBUG(rfs4_debug,
8932                             (CE_NOTE, "rfs4_op_lock: no lock owner"));
8933                         *cs->statusp = resp->status = NFS4ERR_RESOURCE;
8934                         goto end;
8935                 }
8936 
8937                 lsp = rfs4_findlo_state_by_owner(lo, sp, &create);
8938                 if (lsp == NULL) {
8939                         rfs4_update_lease(sp->rs_owner->ro_client);
8940                         /*
8941                          * Only update theh open_seqid if this is not
8942                          * a duplicate request
8943                          */
8944                         if (dup_lock == FALSE) {
8945                                 rfs4_update_open_sequence(sp->rs_owner);
8946                         }
8947 
8948                         NFS4_DEBUG(rfs4_debug,
8949                             (CE_NOTE, "rfs4_op_lock: no state"));
8950                         *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
8951                         rfs4_update_open_resp(sp->rs_owner, resop, NULL);
8952                         rfs4_lockowner_rele(lo);
8953                         goto end;
8954                 }
8955 
8956                 /*
8957                  * This is the new_lock_owner branch and the client is
8958                  * supposed to be associating a new lock_owner with
8959                  * the open file at this point.  If we find that a
8960                  * lock_owner/state association already exists and a
8961                  * successful LOCK request was returned to the client,
8962                  * an error is returned to the client since this is
8963                  * not appropriate.  The client should be using the
8964                  * existing lock_owner branch.
8965                  */
8966                 if (dup_lock == FALSE && create == FALSE) {
8967                         if (lsp->rls_lock_completed == TRUE) {
8968                                 *cs->statusp =
8969                                     resp->status = NFS4ERR_BAD_SEQID;
8970                                 rfs4_lockowner_rele(lo);
8971                                 goto end;
8972                         }
8973                 }
8974 
8975                 rfs4_update_lease(sp->rs_owner->ro_client);
8976 
8977                 /*
8978                  * Only update theh open_seqid if this is not
8979                  * a duplicate request
8980                  */
8981                 if (dup_lock == FALSE) {
8982                         rfs4_update_open_sequence(sp->rs_owner);
8983                 }
8984 
8985                 /*
8986                  * If this is a duplicate lock request, just copy the
8987                  * previously saved reply and return.
8988                  */
8989                 if (dup_lock == TRUE) {
8990                         /* verify that lock_seqid's match */
8991                         if (lsp->rls_seqid != olo->lock_seqid) {
8992                                 NFS4_DEBUG(rfs4_debug,
8993                                     (CE_NOTE, "rfs4_op_lock: Dup-Lock seqid bad"
8994                                     "lsp->seqid=%d old->seqid=%d",
8995                                     lsp->rls_seqid, olo->lock_seqid));
8996                                 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8997                         } else {
8998                                 rfs4_copy_reply(resop, &lsp->rls_reply);
8999                                 /*
9000                                  * Make sure to copy the just
9001                                  * retrieved reply status into the
9002                                  * overall compound status
9003                                  */
9004                                 *cs->statusp = resp->status;
9005                         }
9006                         rfs4_lockowner_rele(lo);
9007                         goto end;
9008                 }
9009 
9010                 rfs4_dbe_lock(lsp->rls_dbe);
9011 
9012                 /* Make sure to update the lock sequence id */
9013                 lsp->rls_seqid = olo->lock_seqid;
9014 
9015                 NFS4_DEBUG(rfs4_debug,
9016                     (CE_NOTE, "Lock seqid established as %d", lsp->rls_seqid));
9017 
9018                 /*
9019                  * This is used to signify the newly created lockowner
9020                  * stateid and its sequence number.  The checks for
9021                  * sequence number and increment don't occur on the
9022                  * very first lock request for a lockowner.
9023                  */
9024                 lsp->rls_skip_seqid_check = TRUE;
9025 
9026                 /* hold off other access to lsp while we tinker */
9027                 rfs4_sw_enter(&lsp->rls_sw);
9028                 ls_sw_held = TRUE;
9029 
9030                 rfs4_dbe_unlock(lsp->rls_dbe);
9031 
9032                 rfs4_lockowner_rele(lo);
9033         } else {
9034                 stateid = &args->locker.locker4_u.lock_owner.lock_stateid;
9035                 /* get lsp and hold the lock on the underlying file struct */
9036                 if ((status = rfs4_get_lo_state(stateid, &lsp, TRUE))
9037                     != NFS4_OK) {
9038                         *cs->statusp = resp->status = status;
9039                         DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
9040                             cs, LOCK4res *, resp);
9041                         return;
9042                 }
9043                 create = FALSE; /* We didn't create lsp */
9044 
9045                 /* Ensure specified filehandle matches */
9046                 if (cs->vp != lsp->rls_state->rs_finfo->rf_vp) {
9047                         rfs4_lo_state_rele(lsp, TRUE);
9048                         *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9049                         DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
9050                             cs, LOCK4res *, resp);
9051                         return;
9052                 }
9053 
9054                 /* hold off other access to lsp while we tinker */
9055                 rfs4_sw_enter(&lsp->rls_sw);
9056                 ls_sw_held = TRUE;
9057 
9058                 switch (rfs4_check_lo_stateid_seqid(lsp, stateid)) {
9059                 /*
9060                  * The stateid looks like it was okay (expected to be
9061                  * the next one)
9062                  */
9063                 case NFS4_CHECK_STATEID_OKAY:
9064                         /*
9065                          * The sequence id is now checked.  Determine
9066                          * if this is a replay or if it is in the
9067                          * expected (next) sequence.  In the case of a
9068                          * replay, there are two replay conditions
9069                          * that may occur.  The first is the normal
9070                          * condition where a LOCK is done with a
9071                          * NFS4_OK response and the stateid is
9072                          * updated.  That case is handled below when
9073                          * the stateid is identified as a REPLAY.  The
9074                          * second is the case where an error is
9075                          * returned, like NFS4ERR_DENIED, and the
9076                          * sequence number is updated but the stateid
9077                          * is not updated.  This second case is dealt
9078                          * with here.  So it may seem odd that the
9079                          * stateid is okay but the sequence id is a
9080                          * replay but it is okay.
9081                          */
9082                         switch (rfs4_check_lock_seqid(
9083                             args->locker.locker4_u.lock_owner.lock_seqid,
9084                             lsp, resop)) {
9085                         case NFS4_CHKSEQ_REPLAY:
9086                                 if (resp->status != NFS4_OK) {
9087                                         /*
9088                                          * Here is our replay and need
9089                                          * to verify that the last
9090                                          * response was an error.
9091                                          */
9092                                         *cs->statusp = resp->status;
9093                                         goto end;
9094                                 }
9095                                 /*
9096                                  * This is done since the sequence id
9097                                  * looked like a replay but it didn't
9098                                  * pass our check so a BAD_SEQID is
9099                                  * returned as a result.
9100                                  */
9101                                 /*FALLTHROUGH*/
9102                         case NFS4_CHKSEQ_BAD:
9103                                 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9104                                 goto end;
9105                         case NFS4_CHKSEQ_OKAY:
9106                                 /* Everything looks okay move ahead */
9107                                 break;
9108                         }
9109                         break;
9110                 case NFS4_CHECK_STATEID_OLD:
9111                         *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9112                         goto end;
9113                 case NFS4_CHECK_STATEID_BAD:
9114                         *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9115                         goto end;
9116                 case NFS4_CHECK_STATEID_EXPIRED:
9117                         *cs->statusp = resp->status = NFS4ERR_EXPIRED;
9118                         goto end;
9119                 case NFS4_CHECK_STATEID_CLOSED:
9120                         *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9121                         goto end;
9122                 case NFS4_CHECK_STATEID_REPLAY:
9123                         switch (rfs4_check_lock_seqid(
9124                             args->locker.locker4_u.lock_owner.lock_seqid,
9125                             lsp, resop)) {
9126                         case NFS4_CHKSEQ_OKAY:
9127                                 /*
9128                                  * This is a replayed stateid; if
9129                                  * seqid matches the next expected,
9130                                  * then client is using wrong seqid.
9131                                  */
9132                         case NFS4_CHKSEQ_BAD:
9133                                 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9134                                 goto end;
9135                         case NFS4_CHKSEQ_REPLAY:
9136                                 rfs4_update_lease(lsp->rls_locker->rl_client);
9137                                 *cs->statusp = status = resp->status;
9138                                 goto end;
9139                         }
9140                         break;
9141                 default:
9142                         ASSERT(FALSE);
9143                         break;
9144                 }
9145 
9146                 rfs4_update_lock_sequence(lsp);
9147                 rfs4_update_lease(lsp->rls_locker->rl_client);
9148         }
9149 
9150         /*
9151          * NFS4 only allows locking on regular files, so
9152          * verify type of object.
9153          */
9154         if (cs->vp->v_type != VREG) {
9155                 if (cs->vp->v_type == VDIR)
9156                         status = NFS4ERR_ISDIR;
9157                 else
9158                         status = NFS4ERR_INVAL;
9159                 goto out;
9160         }
9161 
9162         cp = lsp->rls_state->rs_owner->ro_client;
9163 
9164         if (rfs4_clnt_in_grace(cp) && !args->reclaim) {
9165                 status = NFS4ERR_GRACE;
9166                 goto out;
9167         }
9168 
9169         if (rfs4_clnt_in_grace(cp) && args->reclaim && !cp->rc_can_reclaim) {
9170                 status = NFS4ERR_NO_GRACE;
9171                 goto out;
9172         }
9173 
9174         if (!rfs4_clnt_in_grace(cp) && args->reclaim) {
9175                 status = NFS4ERR_NO_GRACE;
9176                 goto out;
9177         }
9178 
9179         if (lsp->rls_state->rs_finfo->rf_dinfo.rd_dtype == OPEN_DELEGATE_WRITE)
9180                 cs->deleg = TRUE;
9181 
9182         status = rfs4_do_lock(lsp, args->locktype,
9183             args->offset, args->length, cs->cr, resop);
9184 
9185 out:
9186         lsp->rls_skip_seqid_check = FALSE;
9187 
9188         *cs->statusp = resp->status = status;
9189 
9190         if (status == NFS4_OK) {
9191                 resp->LOCK4res_u.lock_stateid = lsp->rls_lockid.stateid;
9192                 lsp->rls_lock_completed = TRUE;
9193         }
9194         /*
9195          * Only update the "OPEN" response here if this was a new
9196          * lock_owner
9197          */
9198         if (sp)
9199                 rfs4_update_open_resp(sp->rs_owner, resop, NULL);
9200 
9201         rfs4_update_lock_resp(lsp, resop);
9202 
9203 end:
9204         if (lsp) {
9205                 if (ls_sw_held)
9206                         rfs4_sw_exit(&lsp->rls_sw);
9207                 /*
9208                  * If an sp obtained, then the lsp does not represent
9209                  * a lock on the file struct.
9210                  */
9211                 if (sp != NULL)
9212                         rfs4_lo_state_rele(lsp, FALSE);
9213                 else
9214                         rfs4_lo_state_rele(lsp, TRUE);
9215         }
9216         if (sp) {
9217                 rfs4_sw_exit(&sp->rs_owner->ro_sw);
9218                 rfs4_state_rele(sp);
9219         }
9220 
9221         DTRACE_NFSV4_2(op__lock__done, struct compound_state *, cs,
9222             LOCK4res *, resp);
9223 }
9224 
9225 /* free function for LOCK/LOCKT */
9226 static void
9227 lock_denied_free(nfs_resop4 *resop)
9228 {
9229         LOCK4denied *dp = NULL;
9230 
9231         switch (resop->resop) {
9232         case OP_LOCK:
9233                 if (resop->nfs_resop4_u.oplock.status == NFS4ERR_DENIED)
9234                         dp = &resop->nfs_resop4_u.oplock.LOCK4res_u.denied;
9235                 break;
9236         case OP_LOCKT:
9237                 if (resop->nfs_resop4_u.oplockt.status == NFS4ERR_DENIED)
9238                         dp = &resop->nfs_resop4_u.oplockt.denied;
9239                 break;
9240         default:
9241                 break;
9242         }
9243 
9244         if (dp)
9245                 kmem_free(dp->owner.owner_val, dp->owner.owner_len);
9246 }
9247 
9248 /*ARGSUSED*/
9249 void
9250 rfs4_op_locku(nfs_argop4 *argop, nfs_resop4 *resop,
9251     struct svc_req *req, struct compound_state *cs)
9252 {
9253         LOCKU4args *args = &argop->nfs_argop4_u.oplocku;
9254         LOCKU4res *resp = &resop->nfs_resop4_u.oplocku;
9255         nfsstat4 status;
9256         stateid4 *stateid = &args->lock_stateid;
9257         rfs4_lo_state_t *lsp;
9258 
9259         DTRACE_NFSV4_2(op__locku__start, struct compound_state *, cs,
9260             LOCKU4args *, args);
9261 
9262         if (cs->vp == NULL) {
9263                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
9264                 DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
9265                     LOCKU4res *, resp);
9266                 return;
9267         }
9268 
9269         if ((status = rfs4_get_lo_state(stateid, &lsp, TRUE)) != NFS4_OK) {
9270                 *cs->statusp = resp->status = status;
9271                 DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
9272                     LOCKU4res *, resp);
9273                 return;
9274         }
9275 
9276         /* Ensure specified filehandle matches */
9277         if (cs->vp != lsp->rls_state->rs_finfo->rf_vp) {
9278                 rfs4_lo_state_rele(lsp, TRUE);
9279                 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9280                 DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
9281                     LOCKU4res *, resp);
9282                 return;
9283         }
9284 
9285         /* hold off other access to lsp while we tinker */
9286         rfs4_sw_enter(&lsp->rls_sw);
9287 
9288         switch (rfs4_check_lo_stateid_seqid(lsp, stateid)) {
9289         case NFS4_CHECK_STATEID_OKAY:
9290                 if (rfs4_check_lock_seqid(args->seqid, lsp, resop)
9291                     != NFS4_CHKSEQ_OKAY) {
9292                         *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9293                         goto end;
9294                 }
9295                 break;
9296         case NFS4_CHECK_STATEID_OLD:
9297                 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9298                 goto end;
9299         case NFS4_CHECK_STATEID_BAD:
9300                 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9301                 goto end;
9302         case NFS4_CHECK_STATEID_EXPIRED:
9303                 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
9304                 goto end;
9305         case NFS4_CHECK_STATEID_CLOSED:
9306                 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9307                 goto end;
9308         case NFS4_CHECK_STATEID_REPLAY:
9309                 switch (rfs4_check_lock_seqid(args->seqid, lsp, resop)) {
9310                 case NFS4_CHKSEQ_OKAY:
9311                                 /*
9312                                  * This is a replayed stateid; if
9313                                  * seqid matches the next expected,
9314                                  * then client is using wrong seqid.
9315                                  */
9316                 case NFS4_CHKSEQ_BAD:
9317                         *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9318                         goto end;
9319                 case NFS4_CHKSEQ_REPLAY:
9320                         rfs4_update_lease(lsp->rls_locker->rl_client);
9321                         *cs->statusp = status = resp->status;
9322                         goto end;
9323                 }
9324                 break;
9325         default:
9326                 ASSERT(FALSE);
9327                 break;
9328         }
9329 
9330         rfs4_update_lock_sequence(lsp);
9331         rfs4_update_lease(lsp->rls_locker->rl_client);
9332 
9333         /*
9334          * NFS4 only allows locking on regular files, so
9335          * verify type of object.
9336          */
9337         if (cs->vp->v_type != VREG) {
9338                 if (cs->vp->v_type == VDIR)
9339                         status = NFS4ERR_ISDIR;
9340                 else
9341                         status = NFS4ERR_INVAL;
9342                 goto out;
9343         }
9344 
9345         if (rfs4_clnt_in_grace(lsp->rls_state->rs_owner->ro_client)) {
9346                 status = NFS4ERR_GRACE;
9347                 goto out;
9348         }
9349 
9350         status = rfs4_do_lock(lsp, args->locktype,
9351             args->offset, args->length, cs->cr, resop);
9352 
9353 out:
9354         *cs->statusp = resp->status = status;
9355 
9356         if (status == NFS4_OK)
9357                 resp->lock_stateid = lsp->rls_lockid.stateid;
9358 
9359         rfs4_update_lock_resp(lsp, resop);
9360 
9361 end:
9362         rfs4_sw_exit(&lsp->rls_sw);
9363         rfs4_lo_state_rele(lsp, TRUE);
9364 
9365         DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
9366             LOCKU4res *, resp);
9367 }
9368 
9369 /*
9370  * LOCKT is a best effort routine, the client can not be guaranteed that
9371  * the status return is still in effect by the time the reply is received.
9372  * They are numerous race conditions in this routine, but we are not required
9373  * and can not be accurate.
9374  */
9375 /*ARGSUSED*/
9376 void
9377 rfs4_op_lockt(nfs_argop4 *argop, nfs_resop4 *resop,
9378     struct svc_req *req, struct compound_state *cs)
9379 {
9380         LOCKT4args *args = &argop->nfs_argop4_u.oplockt;
9381         LOCKT4res *resp = &resop->nfs_resop4_u.oplockt;
9382         rfs4_lockowner_t *lo;
9383         rfs4_client_t *cp;
9384         bool_t create = FALSE;
9385         struct flock64 flk;
9386         int error;
9387         int flag = FREAD | FWRITE;
9388         int ltype;
9389         length4 posix_length;
9390         sysid_t sysid;
9391         pid_t pid;
9392 
9393         DTRACE_NFSV4_2(op__lockt__start, struct compound_state *, cs,
9394             LOCKT4args *, args);
9395 
9396         if (cs->vp == NULL) {
9397                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
9398                 goto out;
9399         }
9400 
9401         /*
9402          * NFS4 only allows locking on regular files, so
9403          * verify type of object.
9404          */
9405         if (cs->vp->v_type != VREG) {
9406                 if (cs->vp->v_type == VDIR)
9407                         *cs->statusp = resp->status = NFS4ERR_ISDIR;
9408                 else
9409                         *cs->statusp = resp->status =  NFS4ERR_INVAL;
9410                 goto out;
9411         }
9412 
9413         /*
9414          * Check out the clientid to ensure the server knows about it
9415          * so that we correctly inform the client of a server reboot.
9416          */
9417         if ((cp = rfs4_findclient_by_id(args->owner.clientid, FALSE))
9418             == NULL) {
9419                 *cs->statusp = resp->status =
9420                     rfs4_check_clientid(&args->owner.clientid, 0);
9421                 goto out;
9422         }
9423         if (rfs4_lease_expired(cp)) {
9424                 rfs4_client_close(cp);
9425                 /*
9426                  * Protocol doesn't allow returning NFS4ERR_STALE as
9427                  * other operations do on this check so STALE_CLIENTID
9428                  * is returned instead
9429                  */
9430                 *cs->statusp = resp->status = NFS4ERR_STALE_CLIENTID;
9431                 goto out;
9432         }
9433 
9434         if (rfs4_clnt_in_grace(cp) && !(cp->rc_can_reclaim)) {
9435                 *cs->statusp = resp->status = NFS4ERR_GRACE;
9436                 rfs4_client_rele(cp);
9437                 goto out;
9438         }
9439         rfs4_client_rele(cp);
9440 
9441         resp->status = NFS4_OK;
9442 
9443         switch (args->locktype) {
9444         case READ_LT:
9445         case READW_LT:
9446                 ltype = F_RDLCK;
9447                 break;
9448         case WRITE_LT:
9449         case WRITEW_LT:
9450                 ltype = F_WRLCK;
9451                 break;
9452         }
9453 
9454         posix_length = args->length;
9455         /* Check for zero length. To lock to end of file use all ones for V4 */
9456         if (posix_length == 0) {
9457                 *cs->statusp = resp->status = NFS4ERR_INVAL;
9458                 goto out;
9459         } else if (posix_length == (length4)(~0)) {
9460                 posix_length = 0;       /* Posix to end of file  */
9461         }
9462 
9463         /* Find or create a lockowner */
9464         lo = rfs4_findlockowner(&args->owner, &create);
9465 
9466         if (lo) {
9467                 pid = lo->rl_pid;
9468                 if ((resp->status =
9469                     rfs4_client_sysid(lo->rl_client, &sysid)) != NFS4_OK)
9470                         goto err;
9471         } else {
9472                 pid = 0;
9473                 sysid = lockt_sysid;
9474         }
9475 retry:
9476         flk.l_type = ltype;
9477         flk.l_whence = 0;               /* SEEK_SET */
9478         flk.l_start = args->offset;
9479         flk.l_len = posix_length;
9480         flk.l_sysid = sysid;
9481         flk.l_pid = pid;
9482         flag |= F_REMOTELOCK;
9483 
9484         LOCK_PRINT(rfs4_debug, "rfs4_op_lockt", F_GETLK, &flk);
9485 
9486         /* Note that length4 is uint64_t but l_len and l_start are off64_t */
9487         if (flk.l_len < 0 || flk.l_start < 0) {
9488                 resp->status = NFS4ERR_INVAL;
9489                 goto err;
9490         }
9491         error = VOP_FRLOCK(cs->vp, F_GETLK, &flk, flag, (u_offset_t)0,
9492             NULL, cs->cr, NULL);
9493 
9494         /*
9495          * N.B. We map error values to nfsv4 errors. This is differrent
9496          * than puterrno4 routine.
9497          */
9498         switch (error) {
9499         case 0:
9500                 if (flk.l_type == F_UNLCK)
9501                         resp->status = NFS4_OK;
9502                 else {
9503                         if (lock_denied(&resp->denied, &flk) == NFS4ERR_EXPIRED)
9504                                 goto retry;
9505                         resp->status = NFS4ERR_DENIED;
9506                 }
9507                 break;
9508         case EOVERFLOW:
9509                 resp->status = NFS4ERR_INVAL;
9510                 break;
9511         case EINVAL:
9512                 resp->status = NFS4ERR_NOTSUPP;
9513                 break;
9514         default:
9515                 cmn_err(CE_WARN, "rfs4_op_lockt: unexpected errno (%d)",
9516                     error);
9517                 resp->status = NFS4ERR_SERVERFAULT;
9518                 break;
9519         }
9520 
9521 err:
9522         if (lo)
9523                 rfs4_lockowner_rele(lo);
9524         *cs->statusp = resp->status;
9525 out:
9526         DTRACE_NFSV4_2(op__lockt__done, struct compound_state *, cs,
9527             LOCKT4res *, resp);
9528 }
9529 
9530 int
9531 rfs4_share(rfs4_state_t *sp, uint32_t access, uint32_t deny)
9532 {
9533         int err;
9534         int cmd;
9535         vnode_t *vp;
9536         struct shrlock shr;
9537         struct shr_locowner shr_loco;
9538         int fflags = 0;
9539 
9540         ASSERT(rfs4_dbe_islocked(sp->rs_dbe));
9541         ASSERT(sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID);
9542 
9543         if (sp->rs_closed)
9544                 return (NFS4ERR_OLD_STATEID);
9545 
9546         vp = sp->rs_finfo->rf_vp;
9547         ASSERT(vp);
9548 
9549         shr.s_access = shr.s_deny = 0;
9550 
9551         if (access & OPEN4_SHARE_ACCESS_READ) {
9552                 fflags |= FREAD;
9553                 shr.s_access |= F_RDACC;
9554         }
9555         if (access & OPEN4_SHARE_ACCESS_WRITE) {
9556                 fflags |= FWRITE;
9557                 shr.s_access |= F_WRACC;
9558         }
9559         ASSERT(shr.s_access);
9560 
9561         if (deny & OPEN4_SHARE_DENY_READ)
9562                 shr.s_deny |= F_RDDNY;
9563         if (deny & OPEN4_SHARE_DENY_WRITE)
9564                 shr.s_deny |= F_WRDNY;
9565 
9566         shr.s_pid = rfs4_dbe_getid(sp->rs_owner->ro_dbe);
9567         shr.s_sysid = sp->rs_owner->ro_client->rc_sysidt;
9568         shr_loco.sl_pid = shr.s_pid;
9569         shr_loco.sl_id = shr.s_sysid;
9570         shr.s_owner = (caddr_t)&shr_loco;
9571         shr.s_own_len = sizeof (shr_loco);
9572 
9573         cmd = nbl_need_check(vp) ? F_SHARE_NBMAND : F_SHARE;
9574 
9575         err = VOP_SHRLOCK(vp, cmd, &shr, fflags, CRED(), NULL);
9576         if (err != 0) {
9577                 if (err == EAGAIN)
9578                         err = NFS4ERR_SHARE_DENIED;
9579                 else
9580                         err = puterrno4(err);
9581                 return (err);
9582         }
9583 
9584         sp->rs_share_access |= access;
9585         sp->rs_share_deny |= deny;
9586 
9587         return (0);
9588 }
9589 
9590 int
9591 rfs4_unshare(rfs4_state_t *sp)
9592 {
9593         int err;
9594         struct shrlock shr;
9595         struct shr_locowner shr_loco;
9596 
9597         ASSERT(rfs4_dbe_islocked(sp->rs_dbe));
9598 
9599         if (sp->rs_closed || sp->rs_share_access == 0)
9600                 return (0);
9601 
9602         ASSERT(sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID);
9603         ASSERT(sp->rs_finfo->rf_vp);
9604 
9605         shr.s_access = shr.s_deny = 0;
9606         shr.s_pid = rfs4_dbe_getid(sp->rs_owner->ro_dbe);
9607         shr.s_sysid = sp->rs_owner->ro_client->rc_sysidt;
9608         shr_loco.sl_pid = shr.s_pid;
9609         shr_loco.sl_id = shr.s_sysid;
9610         shr.s_owner = (caddr_t)&shr_loco;
9611         shr.s_own_len = sizeof (shr_loco);
9612 
9613         err = VOP_SHRLOCK(sp->rs_finfo->rf_vp, F_UNSHARE, &shr, 0, CRED(),
9614             NULL);
9615         if (err != 0) {
9616                 err = puterrno4(err);
9617                 return (err);
9618         }
9619 
9620         sp->rs_share_access = 0;
9621         sp->rs_share_deny = 0;
9622 
9623         return (0);
9624 
9625 }
9626 
9627 static int
9628 rdma_setup_read_data4(READ4args *args, READ4res *rok)
9629 {
9630         struct clist    *wcl;
9631         count4          count = rok->data_len;
9632         int             wlist_len;
9633 
9634         wcl = args->wlist;
9635         if (rdma_setup_read_chunks(wcl, count, &wlist_len) == FALSE) {
9636                 return (FALSE);
9637         }
9638         wcl = args->wlist;
9639         rok->wlist_len = wlist_len;
9640         rok->wlist = wcl;
9641         return (TRUE);
9642 }
9643 
9644 /* tunable to disable server referrals */
9645 int rfs4_no_referrals = 0;
9646 
9647 /*
9648  * Find an NFS record in reparse point data.
9649  * Returns 0 for success and <0 or an errno value on failure.
9650  */
9651 int
9652 vn_find_nfs_record(vnode_t *vp, nvlist_t **nvlp, char **svcp, char **datap)
9653 {
9654         int err;
9655         char *stype, *val;
9656         nvlist_t *nvl;
9657         nvpair_t *curr;
9658 
9659         if ((nvl = reparse_init()) == NULL)
9660                 return (-1);
9661 
9662         if ((err = reparse_vnode_parse(vp, nvl)) != 0) {
9663                 reparse_free(nvl);
9664                 return (err);
9665         }
9666 
9667         curr = NULL;
9668         while ((curr = nvlist_next_nvpair(nvl, curr)) != NULL) {
9669                 if ((stype = nvpair_name(curr)) == NULL) {
9670                         reparse_free(nvl);
9671                         return (-2);
9672                 }
9673                 if (strncasecmp(stype, "NFS", 3) == 0)
9674                         break;
9675         }
9676 
9677         if ((curr == NULL) ||
9678             (nvpair_value_string(curr, &val))) {
9679                 reparse_free(nvl);
9680                 return (-3);
9681         }
9682         *nvlp = nvl;
9683         *svcp = stype;
9684         *datap = val;
9685         return (0);
9686 }
9687 
9688 int
9689 vn_is_nfs_reparse(vnode_t *vp, cred_t *cr)
9690 {
9691         nvlist_t *nvl;
9692         char *s, *d;
9693 
9694         if (rfs4_no_referrals != 0)
9695                 return (B_FALSE);
9696 
9697         if (vn_is_reparse(vp, cr, NULL) == B_FALSE)
9698                 return (B_FALSE);
9699 
9700         if (vn_find_nfs_record(vp, &nvl, &s, &d) != 0)
9701                 return (B_FALSE);
9702 
9703         reparse_free(nvl);
9704 
9705         return (B_TRUE);
9706 }
9707 
9708 /*
9709  * There is a user-level copy of this routine in ref_subr.c.
9710  * Changes should be kept in sync.
9711  */
9712 static int
9713 nfs4_create_components(char *path, component4 *comp4)
9714 {
9715         int slen, plen, ncomp;
9716         char *ori_path, *nxtc, buf[MAXNAMELEN];
9717 
9718         if (path == NULL)
9719                 return (0);
9720 
9721         plen = strlen(path) + 1;        /* include the terminator */
9722         ori_path = path;
9723         ncomp = 0;
9724 
9725         /* count number of components in the path */
9726         for (nxtc = path; nxtc < ori_path + plen; nxtc++) {
9727                 if (*nxtc == '/' || *nxtc == '\0' || *nxtc == '\n') {
9728                         if ((slen = nxtc - path) == 0) {
9729                                 path = nxtc + 1;
9730                                 continue;
9731                         }
9732 
9733                         if (comp4 != NULL) {
9734                                 bcopy(path, buf, slen);
9735                                 buf[slen] = '\0';
9736                                 (void) str_to_utf8(buf, &comp4[ncomp]);
9737                         }
9738 
9739                         ncomp++;        /* 1 valid component */
9740                         path = nxtc + 1;
9741                 }
9742                 if (*nxtc == '\0' || *nxtc == '\n')
9743                         break;
9744         }
9745 
9746         return (ncomp);
9747 }
9748 
9749 /*
9750  * There is a user-level copy of this routine in ref_subr.c.
9751  * Changes should be kept in sync.
9752  */
9753 static int
9754 make_pathname4(char *path, pathname4 *pathname)
9755 {
9756         int ncomp;
9757         component4 *comp4;
9758 
9759         if (pathname == NULL)
9760                 return (0);
9761 
9762         if (path == NULL) {
9763                 pathname->pathname4_val = NULL;
9764                 pathname->pathname4_len = 0;
9765                 return (0);
9766         }
9767 
9768         /* count number of components to alloc buffer */
9769         if ((ncomp = nfs4_create_components(path, NULL)) == 0) {
9770                 pathname->pathname4_val = NULL;
9771                 pathname->pathname4_len = 0;
9772                 return (0);
9773         }
9774         comp4 = kmem_zalloc(ncomp * sizeof (component4), KM_SLEEP);
9775 
9776         /* copy components into allocated buffer */
9777         ncomp = nfs4_create_components(path, comp4);
9778 
9779         pathname->pathname4_val = comp4;
9780         pathname->pathname4_len = ncomp;
9781 
9782         return (ncomp);
9783 }
9784 
9785 #define xdr_fs_locations4 xdr_fattr4_fs_locations
9786 
9787 fs_locations4 *
9788 fetch_referral(vnode_t *vp, cred_t *cr)
9789 {
9790         nvlist_t *nvl;
9791         char *stype, *sdata;
9792         fs_locations4 *result;
9793         char buf[1024];
9794         size_t bufsize;
9795         XDR xdr;
9796         int err;
9797 
9798         /*
9799          * Check attrs to ensure it's a reparse point
9800          */
9801         if (vn_is_reparse(vp, cr, NULL) == B_FALSE)
9802                 return (NULL);
9803 
9804         /*
9805          * Look for an NFS record and get the type and data
9806          */
9807         if (vn_find_nfs_record(vp, &nvl, &stype, &sdata) != 0)
9808                 return (NULL);
9809 
9810         /*
9811          * With the type and data, upcall to get the referral
9812          */
9813         bufsize = sizeof (buf);
9814         bzero(buf, sizeof (buf));
9815         err = reparse_kderef((const char *)stype, (const char *)sdata,
9816             buf, &bufsize);
9817         reparse_free(nvl);
9818 
9819         DTRACE_PROBE4(nfs4serv__func__referral__upcall,
9820             char *, stype, char *, sdata, char *, buf, int, err);
9821         if (err) {
9822                 cmn_err(CE_NOTE,
9823                     "reparsed daemon not running: unable to get referral (%d)",
9824                     err);
9825                 return (NULL);
9826         }
9827 
9828         /*
9829          * We get an XDR'ed record back from the kderef call
9830          */
9831         xdrmem_create(&xdr, buf, bufsize, XDR_DECODE);
9832         result = kmem_alloc(sizeof (fs_locations4), KM_SLEEP);
9833         err = xdr_fs_locations4(&xdr, result);
9834         XDR_DESTROY(&xdr);
9835         if (err != TRUE) {
9836                 DTRACE_PROBE1(nfs4serv__func__referral__upcall__xdrfail,
9837                     int, err);
9838                 return (NULL);
9839         }
9840 
9841         /*
9842          * Look at path to recover fs_root, ignoring the leading '/'
9843          */
9844         (void) make_pathname4(vp->v_path, &result->fs_root);
9845 
9846         return (result);
9847 }
9848 
9849 char *
9850 build_symlink(vnode_t *vp, cred_t *cr, size_t *strsz)
9851 {
9852         fs_locations4 *fsl;
9853         fs_location4 *fs;
9854         char *server, *path, *symbuf;
9855         static char *prefix = "/net/";
9856         int i, size, npaths;
9857         uint_t len;
9858 
9859         /* Get the referral */
9860         if ((fsl = fetch_referral(vp, cr)) == NULL)
9861                 return (NULL);
9862 
9863         /* Deal with only the first location and first server */
9864         fs = &fsl->locations_val[0];
9865         server = utf8_to_str(&fs->server_val[0], &len, NULL);
9866         if (server == NULL) {
9867                 rfs4_free_fs_locations4(fsl);
9868                 kmem_free(fsl, sizeof (fs_locations4));
9869                 return (NULL);
9870         }
9871 
9872         /* Figure out size for "/net/" + host + /path/path/path + NULL */
9873         size = strlen(prefix) + len;
9874         for (i = 0; i < fs->rootpath.pathname4_len; i++)
9875                 size += fs->rootpath.pathname4_val[i].utf8string_len + 1;
9876 
9877         /* Allocate the symlink buffer and fill it */
9878         symbuf = kmem_zalloc(size, KM_SLEEP);
9879         (void) strcat(symbuf, prefix);
9880         (void) strcat(symbuf, server);
9881         kmem_free(server, len);
9882 
9883         npaths = 0;
9884         for (i = 0; i < fs->rootpath.pathname4_len; i++) {
9885                 path = utf8_to_str(&fs->rootpath.pathname4_val[i], &len, NULL);
9886                 if (path == NULL)
9887                         continue;
9888                 (void) strcat(symbuf, "/");
9889                 (void) strcat(symbuf, path);
9890                 npaths++;
9891                 kmem_free(path, len);
9892         }
9893 
9894         rfs4_free_fs_locations4(fsl);
9895         kmem_free(fsl, sizeof (fs_locations4));
9896 
9897         if (strsz != NULL)
9898                 *strsz = size;
9899         return (symbuf);
9900 }
9901 
9902 /*
9903  * Check to see if we have a downrev Solaris client, so that we
9904  * can send it a symlink instead of a referral.
9905  */
9906 int
9907 client_is_downrev(struct svc_req *req)
9908 {
9909         struct sockaddr *ca;
9910         rfs4_clntip_t *ci;
9911         bool_t create = FALSE;
9912         int is_downrev;
9913 
9914         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
9915         ASSERT(ca);
9916         ci = rfs4_find_clntip(ca, &create);
9917         if (ci == NULL)
9918                 return (0);
9919         is_downrev = ci->ri_no_referrals;
9920         rfs4_dbe_rele(ci->ri_dbe);
9921         return (is_downrev);
9922 }
9923 
9924 /*
9925  * Do the main work of handling HA-NFSv4 Resource Group failover on
9926  * Sun Cluster.
9927  * We need to detect whether any RG admin paths have been added or removed,
9928  * and adjust resources accordingly.
9929  * Currently we're using a very inefficient algorithm, ~ 2 * O(n**2). In
9930  * order to scale, the list and array of paths need to be held in more
9931  * suitable data structures.
9932  */
9933 static void
9934 hanfsv4_failover(nfs4_srv_t *nsrv4)
9935 {
9936         int i, start_grace, numadded_paths = 0;
9937         char **added_paths = NULL;
9938         rfs4_dss_path_t *dss_path;
9939 
9940         /*
9941          * Note: currently, dss_pathlist cannot be NULL, since
9942          * it will always include an entry for NFS4_DSS_VAR_DIR. If we
9943          * make the latter dynamically specified too, the following will
9944          * need to be adjusted.
9945          */
9946 
9947         /*
9948          * First, look for removed paths: RGs that have been failed-over
9949          * away from this node.
9950          * Walk the "currently-serving" dss_pathlist and, for each
9951          * path, check if it is on the "passed-in" rfs4_dss_newpaths array
9952          * from nfsd. If not, that RG path has been removed.
9953          *
9954          * Note that nfsd has sorted rfs4_dss_newpaths for us, and removed
9955          * any duplicates.
9956          */
9957         dss_path = nsrv4->dss_pathlist;
9958         do {
9959                 int found = 0;
9960                 char *path = dss_path->path;
9961 
9962                 /* used only for non-HA so may not be removed */
9963                 if (strcmp(path, NFS4_DSS_VAR_DIR) == 0) {
9964                         dss_path = dss_path->next;
9965                         continue;
9966                 }
9967 
9968                 for (i = 0; i < rfs4_dss_numnewpaths; i++) {
9969                         int cmpret;
9970                         char *newpath = rfs4_dss_newpaths[i];
9971 
9972                         /*
9973                          * Since nfsd has sorted rfs4_dss_newpaths for us,
9974                          * once the return from strcmp is negative we know
9975                          * we've passed the point where "path" should be,
9976                          * and can stop searching: "path" has been removed.
9977                          */
9978                         cmpret = strcmp(path, newpath);
9979                         if (cmpret < 0)
9980                                 break;
9981                         if (cmpret == 0) {
9982                                 found = 1;
9983                                 break;
9984                         }
9985                 }
9986 
9987                 if (found == 0) {
9988                         unsigned index = dss_path->index;
9989                         rfs4_servinst_t *sip = dss_path->sip;
9990                         rfs4_dss_path_t *path_next = dss_path->next;
9991 
9992                         /*
9993                          * This path has been removed.
9994                          * We must clear out the servinst reference to
9995                          * it, since it's now owned by another
9996                          * node: we should not attempt to touch it.
9997                          */
9998                         ASSERT(dss_path == sip->dss_paths[index]);
9999                         sip->dss_paths[index] = NULL;
10000 
10001                         /* remove from "currently-serving" list, and destroy */
10002                         remque(dss_path);
10003                         /* allow for NUL */
10004                         kmem_free(dss_path->path, strlen(dss_path->path) + 1);
10005                         kmem_free(dss_path, sizeof (rfs4_dss_path_t));
10006 
10007                         dss_path = path_next;
10008                 } else {
10009                         /* path was found; not removed */
10010                         dss_path = dss_path->next;
10011                 }
10012         } while (dss_path != nsrv4->dss_pathlist);
10013 
10014         /*
10015          * Now, look for added paths: RGs that have been failed-over
10016          * to this node.
10017          * Walk the "passed-in" rfs4_dss_newpaths array from nfsd and,
10018          * for each path, check if it is on the "currently-serving"
10019          * dss_pathlist. If not, that RG path has been added.
10020          *
10021          * Note: we don't do duplicate detection here; nfsd does that for us.
10022          *
10023          * Note: numadded_paths <= rfs4_dss_numnewpaths, which gives us
10024          * an upper bound for the size needed for added_paths[numadded_paths].
10025          */
10026 
10027         /* probably more space than we need, but guaranteed to be enough */
10028         if (rfs4_dss_numnewpaths > 0) {
10029                 size_t sz = rfs4_dss_numnewpaths * sizeof (char *);
10030                 added_paths = kmem_zalloc(sz, KM_SLEEP);
10031         }
10032 
10033         /* walk the "passed-in" rfs4_dss_newpaths array from nfsd */
10034         for (i = 0; i < rfs4_dss_numnewpaths; i++) {
10035                 int found = 0;
10036                 char *newpath = rfs4_dss_newpaths[i];
10037 
10038                 dss_path = nsrv4->dss_pathlist;
10039                 do {
10040                         char *path = dss_path->path;
10041 
10042                         /* used only for non-HA */
10043                         if (strcmp(path, NFS4_DSS_VAR_DIR) == 0) {
10044                                 dss_path = dss_path->next;
10045                                 continue;
10046                         }
10047 
10048                         if (strncmp(path, newpath, strlen(path)) == 0) {
10049                                 found = 1;
10050                                 break;
10051                         }
10052 
10053                         dss_path = dss_path->next;
10054                 } while (dss_path != nsrv4->dss_pathlist);
10055 
10056                 if (found == 0) {
10057                         added_paths[numadded_paths] = newpath;
10058                         numadded_paths++;
10059                 }
10060         }
10061 
10062         /* did we find any added paths? */
10063         if (numadded_paths > 0) {
10064 
10065                 /* create a new server instance, and start its grace period */
10066                 start_grace = 1;
10067                 /* CSTYLED */
10068                 rfs4_servinst_create(nsrv4, start_grace, numadded_paths, added_paths);
10069 
10070                 /* read in the stable storage state from these paths */
10071                 rfs4_dss_readstate(nsrv4, numadded_paths, added_paths);
10072 
10073                 /*
10074                  * Multiple failovers during a grace period will cause
10075                  * clients of the same resource group to be partitioned
10076                  * into different server instances, with different
10077                  * grace periods.  Since clients of the same resource
10078                  * group must be subject to the same grace period,
10079                  * we need to reset all currently active grace periods.
10080                  */
10081                 rfs4_grace_reset_all(nsrv4);
10082         }
10083 
10084         if (rfs4_dss_numnewpaths > 0)
10085                 kmem_free(added_paths, rfs4_dss_numnewpaths * sizeof (char *));
10086 }