1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
  24  */
  25 
  26 /*
  27  *      Copyright (c) 1983,1984,1985,1986,1987,1988,1989  AT&T.
  28  *      All Rights Reserved
  29  */
  30 
  31 /*
  32  * Copyright (c) 2012, 2016 by Delphix. All rights reserved.
  33  * Copyright 2019 Nexenta Systems, Inc.
  34  * Copyright 2019 Nexenta by DDN, Inc.
  35  */
  36 
  37 #include <sys/param.h>
  38 #include <sys/types.h>
  39 #include <sys/systm.h>
  40 #include <sys/cred.h>
  41 #include <sys/buf.h>
  42 #include <sys/vfs.h>
  43 #include <sys/vfs_opreg.h>
  44 #include <sys/vnode.h>
  45 #include <sys/uio.h>
  46 #include <sys/errno.h>
  47 #include <sys/sysmacros.h>
  48 #include <sys/statvfs.h>
  49 #include <sys/kmem.h>
  50 #include <sys/dirent.h>
  51 #include <sys/cmn_err.h>
  52 #include <sys/debug.h>
  53 #include <sys/systeminfo.h>
  54 #include <sys/flock.h>
  55 #include <sys/pathname.h>
  56 #include <sys/nbmlock.h>
  57 #include <sys/share.h>
  58 #include <sys/atomic.h>
  59 #include <sys/policy.h>
  60 #include <sys/fem.h>
  61 #include <sys/sdt.h>
  62 #include <sys/ddi.h>
  63 #include <sys/zone.h>
  64 
  65 #include <fs/fs_reparse.h>
  66 
  67 #include <rpc/types.h>
  68 #include <rpc/auth.h>
  69 #include <rpc/rpcsec_gss.h>
  70 #include <rpc/svc.h>
  71 
  72 #include <nfs/nfs.h>
  73 #include <nfs/nfssys.h>
  74 #include <nfs/export.h>
  75 #include <nfs/nfs_cmd.h>
  76 #include <nfs/lm.h>
  77 #include <nfs/nfs4.h>
  78 #include <nfs/nfs4_drc.h>
  79 
  80 #include <sys/strsubr.h>
  81 #include <sys/strsun.h>
  82 
  83 #include <inet/common.h>
  84 #include <inet/ip.h>
  85 #include <inet/ip6.h>
  86 
  87 #include <sys/tsol/label.h>
  88 #include <sys/tsol/tndb.h>
  89 
  90 #define RFS4_MAXLOCK_TRIES 4    /* Try to get the lock this many times */
  91 static int rfs4_maxlock_tries = RFS4_MAXLOCK_TRIES;
  92 #define RFS4_LOCK_DELAY 10      /* Milliseconds */
  93 static clock_t  rfs4_lock_delay = RFS4_LOCK_DELAY;
  94 extern struct svc_ops rdma_svc_ops;
  95 extern int nfs_loaned_buffers;
  96 /* End of Tunables */
  97 
  98 static int rdma_setup_read_data4(READ4args *, READ4res *);
  99 
 100 /*
 101  * Used to bump the stateid4.seqid value and show changes in the stateid
 102  */
 103 #define next_stateid(sp) (++(sp)->bits.chgseq)
 104 
 105 /*
 106  * RFS4_MINLEN_ENTRY4: XDR-encoded size of smallest possible dirent.
 107  *      This is used to return NFS4ERR_TOOSMALL when clients specify
 108  *      maxcount that isn't large enough to hold the smallest possible
 109  *      XDR encoded dirent.
 110  *
 111  *          sizeof cookie (8 bytes) +
 112  *          sizeof name_len (4 bytes) +
 113  *          sizeof smallest (padded) name (4 bytes) +
 114  *          sizeof bitmap4_len (12 bytes) +   NOTE: we always encode len=2 bm4
 115  *          sizeof attrlist4_len (4 bytes) +
 116  *          sizeof next boolean (4 bytes)
 117  *
 118  * RFS4_MINLEN_RDDIR4: XDR-encoded size of READDIR op reply containing
 119  * the smallest possible entry4 (assumes no attrs requested).
 120  *      sizeof nfsstat4 (4 bytes) +
 121  *      sizeof verifier4 (8 bytes) +
 122  *      sizeof entry4list bool (4 bytes) +
 123  *      sizeof entry4   (36 bytes) +
 124  *      sizeof eof bool  (4 bytes)
 125  *
 126  * RFS4_MINLEN_RDDIR_BUF: minimum length of buffer server will provide to
 127  *      VOP_READDIR.  Its value is the size of the maximum possible dirent
 128  *      for solaris.  The DIRENT64_RECLEN macro returns the size of dirent
 129  *      required for a given name length.  MAXNAMELEN is the maximum
 130  *      filename length allowed in Solaris.  The first two DIRENT64_RECLEN()
 131  *      macros are to allow for . and .. entries -- just a minor tweak to try
 132  *      and guarantee that buffer we give to VOP_READDIR will be large enough
 133  *      to hold ., .., and the largest possible solaris dirent64.
 134  */
 135 #define RFS4_MINLEN_ENTRY4 36
 136 #define RFS4_MINLEN_RDDIR4 (4 + NFS4_VERIFIER_SIZE + 4 + RFS4_MINLEN_ENTRY4 + 4)
 137 #define RFS4_MINLEN_RDDIR_BUF \
 138         (DIRENT64_RECLEN(1) + DIRENT64_RECLEN(2) + DIRENT64_RECLEN(MAXNAMELEN))
 139 
 140 /*
 141  * It would be better to pad to 4 bytes since that's what XDR would do,
 142  * but the dirents UFS gives us are already padded to 8, so just take
 143  * what we're given.  Dircount is only a hint anyway.  Currently the
 144  * solaris kernel is ASCII only, so there's no point in calling the
 145  * UTF8 functions.
 146  *
 147  * dirent64: named padded to provide 8 byte struct alignment
 148  *      d_ino(8) + d_off(8) + d_reclen(2) + d_name(namelen + null(1) + pad)
 149  *
 150  * cookie: uint64_t   +  utf8namelen: uint_t  +   utf8name padded to 8 bytes
 151  *
 152  */
 153 #define DIRENT64_TO_DIRCOUNT(dp) \
 154         (3 * BYTES_PER_XDR_UNIT + DIRENT64_NAMELEN((dp)->d_reclen))
 155 
 156 
 157 static sysid_t          lockt_sysid;    /* dummy sysid for all LOCKT calls */
 158 
 159 u_longlong_t    nfs4_srv_caller_id;
 160 uint_t          nfs4_srv_vkey = 0;
 161 
 162 void    rfs4_init_compound_state(struct compound_state *);
 163 
 164 static void     nullfree(caddr_t);
 165 static void     rfs4_op_inval(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 166                     struct compound_state *);
 167 static void     rfs4_op_access(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 168                     struct compound_state *);
 169 static void     rfs4_op_close(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 170                     struct compound_state *);
 171 static void     rfs4_op_commit(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 172                     struct compound_state *);
 173 static void     rfs4_op_create(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 174                     struct compound_state *);
 175 static void     rfs4_op_create_free(nfs_resop4 *resop);
 176 static void     rfs4_op_delegreturn(nfs_argop4 *, nfs_resop4 *,
 177                     struct svc_req *, struct compound_state *);
 178 static void     rfs4_op_delegpurge(nfs_argop4 *, nfs_resop4 *,
 179                     struct svc_req *, struct compound_state *);
 180 static void     rfs4_op_getattr(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 181                     struct compound_state *);
 182 static void     rfs4_op_getattr_free(nfs_resop4 *);
 183 static void     rfs4_op_getfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 184                     struct compound_state *);
 185 static void     rfs4_op_getfh_free(nfs_resop4 *);
 186 static void     rfs4_op_illegal(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 187                     struct compound_state *);
 188 static void     rfs4_op_link(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 189                     struct compound_state *);
 190 static void     rfs4_op_lock(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 191                     struct compound_state *);
 192 static void     lock_denied_free(nfs_resop4 *);
 193 static void     rfs4_op_locku(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 194                     struct compound_state *);
 195 static void     rfs4_op_lockt(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 196                     struct compound_state *);
 197 static void     rfs4_op_lookup(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 198                     struct compound_state *);
 199 static void     rfs4_op_lookupp(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 200                     struct compound_state *);
 201 static void     rfs4_op_openattr(nfs_argop4 *argop, nfs_resop4 *resop,
 202                     struct svc_req *req, struct compound_state *cs);
 203 static void     rfs4_op_nverify(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 204                     struct compound_state *);
 205 static void     rfs4_op_open(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 206                     struct compound_state *);
 207 static void     rfs4_op_open_confirm(nfs_argop4 *, nfs_resop4 *,
 208                     struct svc_req *, struct compound_state *);
 209 static void     rfs4_op_open_downgrade(nfs_argop4 *, nfs_resop4 *,
 210                     struct svc_req *, struct compound_state *);
 211 static void     rfs4_op_putfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 212                     struct compound_state *);
 213 static void     rfs4_op_putpubfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 214                     struct compound_state *);
 215 static void     rfs4_op_putrootfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 216                     struct compound_state *);
 217 static void     rfs4_op_read(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 218                     struct compound_state *);
 219 static void     rfs4_op_read_free(nfs_resop4 *);
 220 static void     rfs4_op_readdir_free(nfs_resop4 *resop);
 221 static void     rfs4_op_readlink(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 222                     struct compound_state *);
 223 static void     rfs4_op_readlink_free(nfs_resop4 *);
 224 static void     rfs4_op_release_lockowner(nfs_argop4 *, nfs_resop4 *,
 225                     struct svc_req *, struct compound_state *);
 226 static void     rfs4_op_remove(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 227                     struct compound_state *);
 228 static void     rfs4_op_rename(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 229                     struct compound_state *);
 230 static void     rfs4_op_renew(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 231                     struct compound_state *);
 232 static void     rfs4_op_restorefh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 233                     struct compound_state *);
 234 static void     rfs4_op_savefh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 235                     struct compound_state *);
 236 static void     rfs4_op_setattr(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 237                     struct compound_state *);
 238 static void     rfs4_op_verify(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 239                     struct compound_state *);
 240 static void     rfs4_op_write(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 241                     struct compound_state *);
 242 static void     rfs4_op_setclientid(nfs_argop4 *, nfs_resop4 *,
 243                     struct svc_req *, struct compound_state *);
 244 static void     rfs4_op_setclientid_confirm(nfs_argop4 *, nfs_resop4 *,
 245                     struct svc_req *req, struct compound_state *);
 246 static void     rfs4_op_secinfo(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 247                     struct compound_state *);
 248 static void     rfs4_op_secinfo_free(nfs_resop4 *);
 249 
 250 static nfsstat4 check_open_access(uint32_t, struct compound_state *,
 251                     struct svc_req *);
 252 nfsstat4        rfs4_client_sysid(rfs4_client_t *, sysid_t *);
 253 void            rfs4_ss_clid(nfs4_srv_t *, rfs4_client_t *);
 254 
 255 
 256 /*
 257  * translation table for attrs
 258  */
 259 struct nfs4_ntov_table {
 260         union nfs4_attr_u *na;
 261         uint8_t amap[NFS4_MAXNUM_ATTRS];
 262         int attrcnt;
 263         bool_t vfsstat;
 264 };
 265 
 266 static void     nfs4_ntov_table_init(struct nfs4_ntov_table *ntovp);
 267 static void     nfs4_ntov_table_free(struct nfs4_ntov_table *ntovp,
 268                     struct nfs4_svgetit_arg *sargp);
 269 
 270 static nfsstat4 do_rfs4_set_attrs(bitmap4 *resp, fattr4 *fattrp,
 271                     struct compound_state *cs, struct nfs4_svgetit_arg *sargp,
 272                     struct nfs4_ntov_table *ntovp, nfs4_attr_cmd_t cmd);
 273 
 274 static void     hanfsv4_failover(nfs4_srv_t *);
 275 
 276 fem_t           *deleg_rdops;
 277 fem_t           *deleg_wrops;
 278 
 279 /*
 280  * NFS4 op dispatch table
 281  */
 282 
 283 struct rfsv4disp {
 284         void    (*dis_proc)();          /* proc to call */
 285         void    (*dis_resfree)();       /* frees space allocated by proc */
 286         int     dis_flags;              /* RPC_IDEMPOTENT, etc... */
 287 };
 288 
 289 static struct rfsv4disp rfsv4disptab[] = {
 290         /*
 291          * NFS VERSION 4
 292          */
 293 
 294         /* RFS_NULL = 0 */
 295         {rfs4_op_illegal, nullfree, 0},
 296 
 297         /* UNUSED = 1 */
 298         {rfs4_op_illegal, nullfree, 0},
 299 
 300         /* UNUSED = 2 */
 301         {rfs4_op_illegal, nullfree, 0},
 302 
 303         /* OP_ACCESS = 3 */
 304         {rfs4_op_access, nullfree, RPC_IDEMPOTENT},
 305 
 306         /* OP_CLOSE = 4 */
 307         {rfs4_op_close, nullfree, 0},
 308 
 309         /* OP_COMMIT = 5 */
 310         {rfs4_op_commit, nullfree, RPC_IDEMPOTENT},
 311 
 312         /* OP_CREATE = 6 */
 313         {rfs4_op_create, nullfree, 0},
 314 
 315         /* OP_DELEGPURGE = 7 */
 316         {rfs4_op_delegpurge, nullfree, 0},
 317 
 318         /* OP_DELEGRETURN = 8 */
 319         {rfs4_op_delegreturn, nullfree, 0},
 320 
 321         /* OP_GETATTR = 9 */
 322         {rfs4_op_getattr, rfs4_op_getattr_free, RPC_IDEMPOTENT},
 323 
 324         /* OP_GETFH = 10 */
 325         {rfs4_op_getfh, rfs4_op_getfh_free, RPC_ALL},
 326 
 327         /* OP_LINK = 11 */
 328         {rfs4_op_link, nullfree, 0},
 329 
 330         /* OP_LOCK = 12 */
 331         {rfs4_op_lock, lock_denied_free, 0},
 332 
 333         /* OP_LOCKT = 13 */
 334         {rfs4_op_lockt, lock_denied_free, 0},
 335 
 336         /* OP_LOCKU = 14 */
 337         {rfs4_op_locku, nullfree, 0},
 338 
 339         /* OP_LOOKUP = 15 */
 340         {rfs4_op_lookup, nullfree, (RPC_IDEMPOTENT | RPC_PUBLICFH_OK)},
 341 
 342         /* OP_LOOKUPP = 16 */
 343         {rfs4_op_lookupp, nullfree, (RPC_IDEMPOTENT | RPC_PUBLICFH_OK)},
 344 
 345         /* OP_NVERIFY = 17 */
 346         {rfs4_op_nverify, nullfree, RPC_IDEMPOTENT},
 347 
 348         /* OP_OPEN = 18 */
 349         {rfs4_op_open, rfs4_free_reply, 0},
 350 
 351         /* OP_OPENATTR = 19 */
 352         {rfs4_op_openattr, nullfree, 0},
 353 
 354         /* OP_OPEN_CONFIRM = 20 */
 355         {rfs4_op_open_confirm, nullfree, 0},
 356 
 357         /* OP_OPEN_DOWNGRADE = 21 */
 358         {rfs4_op_open_downgrade, nullfree, 0},
 359 
 360         /* OP_OPEN_PUTFH = 22 */
 361         {rfs4_op_putfh, nullfree, RPC_ALL},
 362 
 363         /* OP_PUTPUBFH = 23 */
 364         {rfs4_op_putpubfh, nullfree, RPC_ALL},
 365 
 366         /* OP_PUTROOTFH = 24 */
 367         {rfs4_op_putrootfh, nullfree, RPC_ALL},
 368 
 369         /* OP_READ = 25 */
 370         {rfs4_op_read, rfs4_op_read_free, RPC_IDEMPOTENT},
 371 
 372         /* OP_READDIR = 26 */
 373         {rfs4_op_readdir, rfs4_op_readdir_free, RPC_IDEMPOTENT},
 374 
 375         /* OP_READLINK = 27 */
 376         {rfs4_op_readlink, rfs4_op_readlink_free, RPC_IDEMPOTENT},
 377 
 378         /* OP_REMOVE = 28 */
 379         {rfs4_op_remove, nullfree, 0},
 380 
 381         /* OP_RENAME = 29 */
 382         {rfs4_op_rename, nullfree, 0},
 383 
 384         /* OP_RENEW = 30 */
 385         {rfs4_op_renew, nullfree, 0},
 386 
 387         /* OP_RESTOREFH = 31 */
 388         {rfs4_op_restorefh, nullfree, RPC_ALL},
 389 
 390         /* OP_SAVEFH = 32 */
 391         {rfs4_op_savefh, nullfree, RPC_ALL},
 392 
 393         /* OP_SECINFO = 33 */
 394         {rfs4_op_secinfo, rfs4_op_secinfo_free, 0},
 395 
 396         /* OP_SETATTR = 34 */
 397         {rfs4_op_setattr, nullfree, 0},
 398 
 399         /* OP_SETCLIENTID = 35 */
 400         {rfs4_op_setclientid, nullfree, 0},
 401 
 402         /* OP_SETCLIENTID_CONFIRM = 36 */
 403         {rfs4_op_setclientid_confirm, nullfree, 0},
 404 
 405         /* OP_VERIFY = 37 */
 406         {rfs4_op_verify, nullfree, RPC_IDEMPOTENT},
 407 
 408         /* OP_WRITE = 38 */
 409         {rfs4_op_write, nullfree, 0},
 410 
 411         /* OP_RELEASE_LOCKOWNER = 39 */
 412         {rfs4_op_release_lockowner, nullfree, 0},
 413 };
 414 
 415 static uint_t rfsv4disp_cnt = sizeof (rfsv4disptab) / sizeof (rfsv4disptab[0]);
 416 
 417 #define OP_ILLEGAL_IDX (rfsv4disp_cnt)
 418 
 419 #ifdef DEBUG
 420 
 421 int             rfs4_fillone_debug = 0;
 422 int             rfs4_no_stub_access = 1;
 423 int             rfs4_rddir_debug = 0;
 424 
 425 static char    *rfs4_op_string[] = {
 426         "rfs4_op_null",
 427         "rfs4_op_1 unused",
 428         "rfs4_op_2 unused",
 429         "rfs4_op_access",
 430         "rfs4_op_close",
 431         "rfs4_op_commit",
 432         "rfs4_op_create",
 433         "rfs4_op_delegpurge",
 434         "rfs4_op_delegreturn",
 435         "rfs4_op_getattr",
 436         "rfs4_op_getfh",
 437         "rfs4_op_link",
 438         "rfs4_op_lock",
 439         "rfs4_op_lockt",
 440         "rfs4_op_locku",
 441         "rfs4_op_lookup",
 442         "rfs4_op_lookupp",
 443         "rfs4_op_nverify",
 444         "rfs4_op_open",
 445         "rfs4_op_openattr",
 446         "rfs4_op_open_confirm",
 447         "rfs4_op_open_downgrade",
 448         "rfs4_op_putfh",
 449         "rfs4_op_putpubfh",
 450         "rfs4_op_putrootfh",
 451         "rfs4_op_read",
 452         "rfs4_op_readdir",
 453         "rfs4_op_readlink",
 454         "rfs4_op_remove",
 455         "rfs4_op_rename",
 456         "rfs4_op_renew",
 457         "rfs4_op_restorefh",
 458         "rfs4_op_savefh",
 459         "rfs4_op_secinfo",
 460         "rfs4_op_setattr",
 461         "rfs4_op_setclientid",
 462         "rfs4_op_setclient_confirm",
 463         "rfs4_op_verify",
 464         "rfs4_op_write",
 465         "rfs4_op_release_lockowner",
 466         "rfs4_op_illegal"
 467 };
 468 #endif
 469 
 470 void    rfs4_ss_chkclid(nfs4_srv_t *, rfs4_client_t *);
 471 
 472 extern size_t   strlcpy(char *dst, const char *src, size_t dstsize);
 473 
 474 extern void     rfs4_free_fs_locations4(fs_locations4 *);
 475 
 476 #ifdef  nextdp
 477 #undef nextdp
 478 #endif
 479 #define nextdp(dp)      ((struct dirent64 *)((char *)(dp) + (dp)->d_reclen))
 480 
 481 static const fs_operation_def_t nfs4_rd_deleg_tmpl[] = {
 482         VOPNAME_OPEN,           { .femop_open = deleg_rd_open },
 483         VOPNAME_WRITE,          { .femop_write = deleg_rd_write },
 484         VOPNAME_SETATTR,        { .femop_setattr = deleg_rd_setattr },
 485         VOPNAME_RWLOCK,         { .femop_rwlock = deleg_rd_rwlock },
 486         VOPNAME_SPACE,          { .femop_space = deleg_rd_space },
 487         VOPNAME_SETSECATTR,     { .femop_setsecattr = deleg_rd_setsecattr },
 488         VOPNAME_VNEVENT,        { .femop_vnevent = deleg_rd_vnevent },
 489         NULL,                   NULL
 490 };
 491 static const fs_operation_def_t nfs4_wr_deleg_tmpl[] = {
 492         VOPNAME_OPEN,           { .femop_open = deleg_wr_open },
 493         VOPNAME_READ,           { .femop_read = deleg_wr_read },
 494         VOPNAME_WRITE,          { .femop_write = deleg_wr_write },
 495         VOPNAME_SETATTR,        { .femop_setattr = deleg_wr_setattr },
 496         VOPNAME_RWLOCK,         { .femop_rwlock = deleg_wr_rwlock },
 497         VOPNAME_SPACE,          { .femop_space = deleg_wr_space },
 498         VOPNAME_SETSECATTR,     { .femop_setsecattr = deleg_wr_setsecattr },
 499         VOPNAME_VNEVENT,        { .femop_vnevent = deleg_wr_vnevent },
 500         NULL,                   NULL
 501 };
 502 
 503 nfs4_srv_t *
 504 nfs4_get_srv(void)
 505 {
 506         nfs_globals_t *ng = zone_getspecific(nfssrv_zone_key, curzone);
 507         nfs4_srv_t *srv = ng->nfs4_srv;
 508         ASSERT(srv != NULL);
 509         return (srv);
 510 }
 511 
 512 void
 513 rfs4_srv_zone_init(nfs_globals_t *ng)
 514 {
 515         nfs4_srv_t *nsrv4;
 516         timespec32_t verf;
 517 
 518         nsrv4 = kmem_zalloc(sizeof (*nsrv4), KM_SLEEP);
 519 
 520         /*
 521          * The following algorithm attempts to find a unique verifier
 522          * to be used as the write verifier returned from the server
 523          * to the client.  It is important that this verifier change
 524          * whenever the server reboots.  Of secondary importance, it
 525          * is important for the verifier to be unique between two
 526          * different servers.
 527          *
 528          * Thus, an attempt is made to use the system hostid and the
 529          * current time in seconds when the nfssrv kernel module is
 530          * loaded.  It is assumed that an NFS server will not be able
 531          * to boot and then to reboot in less than a second.  If the
 532          * hostid has not been set, then the current high resolution
 533          * time is used.  This will ensure different verifiers each
 534          * time the server reboots and minimize the chances that two
 535          * different servers will have the same verifier.
 536          * XXX - this is broken on LP64 kernels.
 537          */
 538         verf.tv_sec = (time_t)zone_get_hostid(NULL);
 539         if (verf.tv_sec != 0) {
 540                 verf.tv_nsec = gethrestime_sec();
 541         } else {
 542                 timespec_t tverf;
 543 
 544                 gethrestime(&tverf);
 545                 verf.tv_sec = (time_t)tverf.tv_sec;
 546                 verf.tv_nsec = tverf.tv_nsec;
 547         }
 548         nsrv4->write4verf = *(uint64_t *)&verf;
 549 
 550         /* Used to manage create/destroy of server state */
 551         nsrv4->nfs4_server_state = NULL;
 552         nsrv4->nfs4_cur_servinst = NULL;
 553         nsrv4->nfs4_deleg_policy = SRV_NEVER_DELEGATE;
 554         mutex_init(&nsrv4->deleg_lock, NULL, MUTEX_DEFAULT, NULL);
 555         mutex_init(&nsrv4->state_lock, NULL, MUTEX_DEFAULT, NULL);
 556         mutex_init(&nsrv4->servinst_lock, NULL, MUTEX_DEFAULT, NULL);
 557         rw_init(&nsrv4->deleg_policy_lock, NULL, RW_DEFAULT, NULL);
 558 
 559         ng->nfs4_srv = nsrv4;
 560 }
 561 
 562 void
 563 rfs4_srv_zone_fini(nfs_globals_t *ng)
 564 {
 565         nfs4_srv_t *nsrv4 = ng->nfs4_srv;
 566 
 567         ng->nfs4_srv = NULL;
 568 
 569         mutex_destroy(&nsrv4->deleg_lock);
 570         mutex_destroy(&nsrv4->state_lock);
 571         mutex_destroy(&nsrv4->servinst_lock);
 572         rw_destroy(&nsrv4->deleg_policy_lock);
 573 
 574         kmem_free(nsrv4, sizeof (*nsrv4));
 575 }
 576 
 577 void
 578 rfs4_srvrinit(void)
 579 {
 580         extern void rfs4_attr_init();
 581 
 582         rfs4_attr_init();
 583 
 584         if (fem_create("deleg_rdops", nfs4_rd_deleg_tmpl, &deleg_rdops) != 0) {
 585                 rfs4_disable_delegation();
 586         } else if (fem_create("deleg_wrops", nfs4_wr_deleg_tmpl,
 587             &deleg_wrops) != 0) {
 588                 rfs4_disable_delegation();
 589                 fem_free(deleg_rdops);
 590         }
 591 
 592         nfs4_srv_caller_id = fs_new_caller_id();
 593         lockt_sysid = lm_alloc_sysidt();
 594         vsd_create(&nfs4_srv_vkey, NULL);
 595         rfs4_state_g_init();
 596 }
 597 
 598 void
 599 rfs4_srvrfini(void)
 600 {
 601         if (lockt_sysid != LM_NOSYSID) {
 602                 lm_free_sysidt(lockt_sysid);
 603                 lockt_sysid = LM_NOSYSID;
 604         }
 605 
 606         rfs4_state_g_fini();
 607 
 608         fem_free(deleg_rdops);
 609         fem_free(deleg_wrops);
 610 }
 611 
 612 void
 613 rfs4_do_server_start(int server_upordown,
 614     int srv_delegation, int cluster_booted)
 615 {
 616         nfs4_srv_t *nsrv4 = nfs4_get_srv();
 617 
 618         /* Is this a warm start? */
 619         if (server_upordown == NFS_SERVER_QUIESCED) {
 620                 cmn_err(CE_NOTE, "nfs4_srv: "
 621                     "server was previously quiesced; "
 622                     "existing NFSv4 state will be re-used");
 623 
 624                 /*
 625                  * HA-NFSv4: this is also the signal
 626                  * that a Resource Group failover has
 627                  * occurred.
 628                  */
 629                 if (cluster_booted)
 630                         hanfsv4_failover(nsrv4);
 631         } else {
 632                 /* Cold start */
 633                 nsrv4->rfs4_start_time = 0;
 634                 rfs4_state_zone_init(nsrv4);
 635                 nsrv4->nfs4_drc = rfs4_init_drc(nfs4_drc_max,
 636                     nfs4_drc_hash);
 637 
 638                 /*
 639                  * The nfsd service was started with the -s option
 640                  * we need to pull in any state from the paths indicated.
 641                  */
 642                 if (curzone == global_zone && rfs4_dss_numnewpaths > 0) {
 643                         /* read in the stable storage state from these paths */
 644                         rfs4_dss_readstate(nsrv4, rfs4_dss_numnewpaths,
 645                             rfs4_dss_newpaths);
 646                 }
 647         }
 648 
 649         /* Check if delegation is to be enabled */
 650         if (srv_delegation != FALSE)
 651                 rfs4_set_deleg_policy(nsrv4, SRV_NORMAL_DELEGATE);
 652 }
 653 
 654 void
 655 rfs4_init_compound_state(struct compound_state *cs)
 656 {
 657         bzero(cs, sizeof (*cs));
 658         cs->cont = TRUE;
 659         cs->access = CS_ACCESS_DENIED;
 660         cs->deleg = FALSE;
 661         cs->mandlock = FALSE;
 662         cs->fh.nfs_fh4_val = cs->fhbuf;
 663 }
 664 
 665 void
 666 rfs4_grace_start(rfs4_servinst_t *sip)
 667 {
 668         rw_enter(&sip->rwlock, RW_WRITER);
 669         sip->start_time = (time_t)TICK_TO_SEC(ddi_get_lbolt());
 670         sip->grace_period = rfs4_grace_period;
 671         rw_exit(&sip->rwlock);
 672 }
 673 
 674 /*
 675  * returns true if the instance's grace period has never been started
 676  */
 677 int
 678 rfs4_servinst_grace_new(rfs4_servinst_t *sip)
 679 {
 680         time_t start_time;
 681 
 682         rw_enter(&sip->rwlock, RW_READER);
 683         start_time = sip->start_time;
 684         rw_exit(&sip->rwlock);
 685 
 686         return (start_time == 0);
 687 }
 688 
 689 /*
 690  * Indicates if server instance is within the
 691  * grace period.
 692  */
 693 int
 694 rfs4_servinst_in_grace(rfs4_servinst_t *sip)
 695 {
 696         time_t grace_expiry;
 697 
 698         rw_enter(&sip->rwlock, RW_READER);
 699         grace_expiry = sip->start_time + sip->grace_period;
 700         rw_exit(&sip->rwlock);
 701 
 702         return (((time_t)TICK_TO_SEC(ddi_get_lbolt())) < grace_expiry);
 703 }
 704 
 705 int
 706 rfs4_clnt_in_grace(rfs4_client_t *cp)
 707 {
 708         ASSERT(rfs4_dbe_refcnt(cp->rc_dbe) > 0);
 709 
 710         return (rfs4_servinst_in_grace(cp->rc_server_instance));
 711 }
 712 
 713 /*
 714  * reset all currently active grace periods
 715  */
 716 void
 717 rfs4_grace_reset_all(nfs4_srv_t *nsrv4)
 718 {
 719         rfs4_servinst_t *sip;
 720 
 721         mutex_enter(&nsrv4->servinst_lock);
 722         for (sip = nsrv4->nfs4_cur_servinst; sip != NULL; sip = sip->prev)
 723                 if (rfs4_servinst_in_grace(sip))
 724                         rfs4_grace_start(sip);
 725         mutex_exit(&nsrv4->servinst_lock);
 726 }
 727 
 728 /*
 729  * start any new instances' grace periods
 730  */
 731 void
 732 rfs4_grace_start_new(nfs4_srv_t *nsrv4)
 733 {
 734         rfs4_servinst_t *sip;
 735 
 736         mutex_enter(&nsrv4->servinst_lock);
 737         for (sip = nsrv4->nfs4_cur_servinst; sip != NULL; sip = sip->prev)
 738                 if (rfs4_servinst_grace_new(sip))
 739                         rfs4_grace_start(sip);
 740         mutex_exit(&nsrv4->servinst_lock);
 741 }
 742 
 743 static rfs4_dss_path_t *
 744 rfs4_dss_newpath(nfs4_srv_t *nsrv4, rfs4_servinst_t *sip,
 745     char *path, unsigned index)
 746 {
 747         size_t len;
 748         rfs4_dss_path_t *dss_path;
 749 
 750         dss_path = kmem_alloc(sizeof (rfs4_dss_path_t), KM_SLEEP);
 751 
 752         /*
 753          * Take a copy of the string, since the original may be overwritten.
 754          * Sadly, no strdup() in the kernel.
 755          */
 756         /* allow for NUL */
 757         len = strlen(path) + 1;
 758         dss_path->path = kmem_alloc(len, KM_SLEEP);
 759         (void) strlcpy(dss_path->path, path, len);
 760 
 761         /* associate with servinst */
 762         dss_path->sip = sip;
 763         dss_path->index = index;
 764 
 765         /*
 766          * Add to list of served paths.
 767          * No locking required, as we're only ever called at startup.
 768          */
 769         if (nsrv4->dss_pathlist == NULL) {
 770                 /* this is the first dss_path_t */
 771 
 772                 /* needed for insque/remque */
 773                 dss_path->next = dss_path->prev = dss_path;
 774 
 775                 nsrv4->dss_pathlist = dss_path;
 776         } else {
 777                 insque(dss_path, nsrv4->dss_pathlist);
 778         }
 779 
 780         return (dss_path);
 781 }
 782 
 783 /*
 784  * Create a new server instance, and make it the currently active instance.
 785  * Note that starting the grace period too early will reduce the clients'
 786  * recovery window.
 787  */
 788 void
 789 rfs4_servinst_create(nfs4_srv_t *nsrv4, int start_grace,
 790     int dss_npaths, char **dss_paths)
 791 {
 792         unsigned i;
 793         rfs4_servinst_t *sip;
 794         rfs4_oldstate_t *oldstate;
 795 
 796         sip = kmem_alloc(sizeof (rfs4_servinst_t), KM_SLEEP);
 797         rw_init(&sip->rwlock, NULL, RW_DEFAULT, NULL);
 798 
 799         sip->start_time = (time_t)0;
 800         sip->grace_period = (time_t)0;
 801         sip->next = NULL;
 802         sip->prev = NULL;
 803 
 804         rw_init(&sip->oldstate_lock, NULL, RW_DEFAULT, NULL);
 805         /*
 806          * This initial dummy entry is required to setup for insque/remque.
 807          * It must be skipped over whenever the list is traversed.
 808          */
 809         oldstate = kmem_alloc(sizeof (rfs4_oldstate_t), KM_SLEEP);
 810         /* insque/remque require initial list entry to be self-terminated */
 811         oldstate->next = oldstate;
 812         oldstate->prev = oldstate;
 813         sip->oldstate = oldstate;
 814 
 815 
 816         sip->dss_npaths = dss_npaths;
 817         sip->dss_paths = kmem_alloc(dss_npaths *
 818             sizeof (rfs4_dss_path_t *), KM_SLEEP);
 819 
 820         for (i = 0; i < dss_npaths; i++) {
 821                 /* CSTYLED */
 822                 sip->dss_paths[i] = rfs4_dss_newpath(nsrv4, sip, dss_paths[i], i);
 823         }
 824 
 825         mutex_enter(&nsrv4->servinst_lock);
 826         if (nsrv4->nfs4_cur_servinst != NULL) {
 827                 /* add to linked list */
 828                 sip->prev = nsrv4->nfs4_cur_servinst;
 829                 nsrv4->nfs4_cur_servinst->next = sip;
 830         }
 831         if (start_grace)
 832                 rfs4_grace_start(sip);
 833         /* make the new instance "current" */
 834         nsrv4->nfs4_cur_servinst = sip;
 835 
 836         mutex_exit(&nsrv4->servinst_lock);
 837 }
 838 
 839 /*
 840  * In future, we might add a rfs4_servinst_destroy(sip) but, for now, destroy
 841  * all instances directly.
 842  */
 843 void
 844 rfs4_servinst_destroy_all(nfs4_srv_t *nsrv4)
 845 {
 846         rfs4_servinst_t *sip, *prev, *current;
 847 #ifdef DEBUG
 848         int n = 0;
 849 #endif
 850 
 851         mutex_enter(&nsrv4->servinst_lock);
 852         ASSERT(nsrv4->nfs4_cur_servinst != NULL);
 853         current = nsrv4->nfs4_cur_servinst;
 854         nsrv4->nfs4_cur_servinst = NULL;
 855         for (sip = current; sip != NULL; sip = prev) {
 856                 prev = sip->prev;
 857                 rw_destroy(&sip->rwlock);
 858                 if (sip->oldstate)
 859                         kmem_free(sip->oldstate, sizeof (rfs4_oldstate_t));
 860                 if (sip->dss_paths)
 861                         kmem_free(sip->dss_paths,
 862                             sip->dss_npaths * sizeof (rfs4_dss_path_t *));
 863                 kmem_free(sip, sizeof (rfs4_servinst_t));
 864 #ifdef DEBUG
 865                 n++;
 866 #endif
 867         }
 868         mutex_exit(&nsrv4->servinst_lock);
 869 }
 870 
 871 /*
 872  * Assign the current server instance to a client_t.
 873  * Should be called with cp->rc_dbe held.
 874  */
 875 void
 876 rfs4_servinst_assign(nfs4_srv_t *nsrv4, rfs4_client_t *cp,
 877     rfs4_servinst_t *sip)
 878 {
 879         ASSERT(rfs4_dbe_refcnt(cp->rc_dbe) > 0);
 880 
 881         /*
 882          * The lock ensures that if the current instance is in the process
 883          * of changing, we will see the new one.
 884          */
 885         mutex_enter(&nsrv4->servinst_lock);
 886         cp->rc_server_instance = sip;
 887         mutex_exit(&nsrv4->servinst_lock);
 888 }
 889 
 890 rfs4_servinst_t *
 891 rfs4_servinst(rfs4_client_t *cp)
 892 {
 893         ASSERT(rfs4_dbe_refcnt(cp->rc_dbe) > 0);
 894 
 895         return (cp->rc_server_instance);
 896 }
 897 
 898 /* ARGSUSED */
 899 static void
 900 nullfree(caddr_t resop)
 901 {
 902 }
 903 
 904 /*
 905  * This is a fall-through for invalid or not implemented (yet) ops
 906  */
 907 /* ARGSUSED */
 908 static void
 909 rfs4_op_inval(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
 910     struct compound_state *cs)
 911 {
 912         *cs->statusp = *((nfsstat4 *)&(resop)->nfs_resop4_u) = NFS4ERR_INVAL;
 913 }
 914 
 915 /*
 916  * Check if the security flavor, nfsnum, is in the flavor_list.
 917  */
 918 bool_t
 919 in_flavor_list(int nfsnum, int *flavor_list, int count)
 920 {
 921         int i;
 922 
 923         for (i = 0; i < count; i++) {
 924                 if (nfsnum == flavor_list[i])
 925                         return (TRUE);
 926         }
 927         return (FALSE);
 928 }
 929 
 930 /*
 931  * Used by rfs4_op_secinfo to get the security information from the
 932  * export structure associated with the component.
 933  */
 934 /* ARGSUSED */
 935 static nfsstat4
 936 do_rfs4_op_secinfo(struct compound_state *cs, char *nm, SECINFO4res *resp)
 937 {
 938         int error, different_export = 0;
 939         vnode_t *dvp, *vp;
 940         struct exportinfo *exi = NULL;
 941         fid_t fid;
 942         uint_t count, i;
 943         secinfo4 *resok_val;
 944         struct secinfo *secp;
 945         seconfig_t *si;
 946         bool_t did_traverse = FALSE;
 947         int dotdot, walk;
 948         nfs_export_t *ne = nfs_get_export();
 949 
 950         dvp = cs->vp;
 951         dotdot = (nm[0] == '.' && nm[1] == '.' && nm[2] == '\0');
 952 
 953         /*
 954          * If dotdotting, then need to check whether it's above the
 955          * root of a filesystem, or above an export point.
 956          */
 957         if (dotdot) {
 958 
 959                 /*
 960                  * If dotdotting at the root of a filesystem, then
 961                  * need to traverse back to the mounted-on filesystem
 962                  * and do the dotdot lookup there.
 963                  */
 964                 if (cs->vp->v_flag & VROOT) {
 965 
 966                         /*
 967                          * If at the system root, then can
 968                          * go up no further.
 969                          */
 970                         if (VN_CMP(dvp, ZONE_ROOTVP()))
 971                                 return (puterrno4(ENOENT));
 972 
 973                         /*
 974                          * Traverse back to the mounted-on filesystem
 975                          */
 976                         dvp = untraverse(cs->vp);
 977 
 978                         /*
 979                          * Set the different_export flag so we remember
 980                          * to pick up a new exportinfo entry for
 981                          * this new filesystem.
 982                          */
 983                         different_export = 1;
 984                 } else {
 985 
 986                         /*
 987                          * If dotdotting above an export point then set
 988                          * the different_export to get new export info.
 989                          */
 990                         different_export = nfs_exported(cs->exi, cs->vp);
 991                 }
 992         }
 993 
 994         /*
 995          * Get the vnode for the component "nm".
 996          */
 997         error = VOP_LOOKUP(dvp, nm, &vp, NULL, 0, NULL, cs->cr,
 998             NULL, NULL, NULL);
 999         if (error)
1000                 return (puterrno4(error));
1001 
1002         /*
1003          * If the vnode is in a pseudo filesystem, or if the security flavor
1004          * used in the request is valid but not an explicitly shared flavor,
1005          * or the access bit indicates that this is a limited access,
1006          * check whether this vnode is visible.
1007          */
1008         if (!different_export &&
1009             (PSEUDO(cs->exi) || ! is_exported_sec(cs->nfsflavor, cs->exi) ||
1010             cs->access & CS_ACCESS_LIMITED)) {
1011                 if (! nfs_visible(cs->exi, vp, &different_export)) {
1012                         VN_RELE(vp);
1013                         return (puterrno4(ENOENT));
1014                 }
1015         }
1016 
1017         /*
1018          * If it's a mountpoint, then traverse it.
1019          */
1020         if (vn_ismntpt(vp)) {
1021                 if ((error = traverse(&vp)) != 0) {
1022                         VN_RELE(vp);
1023                         return (puterrno4(error));
1024                 }
1025                 /* remember that we had to traverse mountpoint */
1026                 did_traverse = TRUE;
1027                 different_export = 1;
1028         } else if (vp->v_vfsp != dvp->v_vfsp) {
1029                 /*
1030                  * If vp isn't a mountpoint and the vfs ptrs aren't the same,
1031                  * then vp is probably an LOFS object.  We don't need the
1032                  * realvp, we just need to know that we might have crossed
1033                  * a server fs boundary and need to call checkexport4.
1034                  * (LOFS lookup hides server fs mountpoints, and actually calls
1035                  * traverse)
1036                  */
1037                 different_export = 1;
1038         }
1039 
1040         /*
1041          * Get the export information for it.
1042          */
1043         if (different_export) {
1044 
1045                 bzero(&fid, sizeof (fid));
1046                 fid.fid_len = MAXFIDSZ;
1047                 error = vop_fid_pseudo(vp, &fid);
1048                 if (error) {
1049                         VN_RELE(vp);
1050                         return (puterrno4(error));
1051                 }
1052 
1053                 if (dotdot)
1054                         exi = nfs_vptoexi(NULL, vp, cs->cr, &walk, NULL, TRUE);
1055                 else
1056                         exi = checkexport4(&vp->v_vfsp->vfs_fsid, &fid, vp);
1057 
1058                 if (exi == NULL) {
1059                         if (did_traverse == TRUE) {
1060                                 /*
1061                                  * If this vnode is a mounted-on vnode,
1062                                  * but the mounted-on file system is not
1063                                  * exported, send back the secinfo for
1064                                  * the exported node that the mounted-on
1065                                  * vnode lives in.
1066                                  */
1067                                 exi = cs->exi;
1068                         } else {
1069                                 VN_RELE(vp);
1070                                 return (puterrno4(EACCES));
1071                         }
1072                 }
1073         } else {
1074                 exi = cs->exi;
1075         }
1076         ASSERT(exi != NULL);
1077 
1078 
1079         /*
1080          * Create the secinfo result based on the security information
1081          * from the exportinfo structure (exi).
1082          *
1083          * Return all flavors for a pseudo node.
1084          * For a real export node, return the flavor that the client
1085          * has access with.
1086          */
1087         ASSERT(RW_LOCK_HELD(&ne->exported_lock));
1088         if (PSEUDO(exi)) {
1089                 count = exi->exi_export.ex_seccnt; /* total sec count */
1090                 resok_val = kmem_alloc(count * sizeof (secinfo4), KM_SLEEP);
1091                 secp = exi->exi_export.ex_secinfo;
1092 
1093                 for (i = 0; i < count; i++) {
1094                         si = &secp[i].s_secinfo;
1095                         resok_val[i].flavor = si->sc_rpcnum;
1096                         if (resok_val[i].flavor == RPCSEC_GSS) {
1097                                 rpcsec_gss_info *info;
1098 
1099                                 info = &resok_val[i].flavor_info;
1100                                 info->qop = si->sc_qop;
1101                                 info->service = (rpc_gss_svc_t)si->sc_service;
1102 
1103                                 /* get oid opaque data */
1104                                 info->oid.sec_oid4_len =
1105                                     si->sc_gss_mech_type->length;
1106                                 info->oid.sec_oid4_val = kmem_alloc(
1107                                     si->sc_gss_mech_type->length, KM_SLEEP);
1108                                 bcopy(
1109                                     si->sc_gss_mech_type->elements,
1110                                     info->oid.sec_oid4_val,
1111                                     info->oid.sec_oid4_len);
1112                         }
1113                 }
1114                 resp->SECINFO4resok_len = count;
1115                 resp->SECINFO4resok_val = resok_val;
1116         } else {
1117                 int ret_cnt = 0, k = 0;
1118                 int *flavor_list;
1119 
1120                 count = exi->exi_export.ex_seccnt; /* total sec count */
1121                 secp = exi->exi_export.ex_secinfo;
1122 
1123                 flavor_list = kmem_alloc(count * sizeof (int), KM_SLEEP);
1124                 /* find out which flavors to return */
1125                 for (i = 0; i < count; i ++) {
1126                         int access, flavor, perm;
1127 
1128                         flavor = secp[i].s_secinfo.sc_nfsnum;
1129                         perm = secp[i].s_flags;
1130 
1131                         access = nfsauth4_secinfo_access(exi, cs->req,
1132                             flavor, perm, cs->basecr);
1133 
1134                         if (! (access & NFSAUTH_DENIED) &&
1135                             ! (access & NFSAUTH_WRONGSEC)) {
1136                                 flavor_list[ret_cnt] = flavor;
1137                                 ret_cnt++;
1138                         }
1139                 }
1140 
1141                 /* Create the returning SECINFO value */
1142                 resok_val = kmem_alloc(ret_cnt * sizeof (secinfo4), KM_SLEEP);
1143 
1144                 for (i = 0; i < count; i++) {
1145                         /*
1146                          * If the flavor is in the flavor list,
1147                          * fill in resok_val.
1148                          */
1149                         si = &secp[i].s_secinfo;
1150                         if (in_flavor_list(si->sc_nfsnum,
1151                             flavor_list, ret_cnt)) {
1152                                 resok_val[k].flavor = si->sc_rpcnum;
1153                                 if (resok_val[k].flavor == RPCSEC_GSS) {
1154                                         rpcsec_gss_info *info;
1155 
1156                                         info = &resok_val[k].flavor_info;
1157                                         info->qop = si->sc_qop;
1158                                         info->service = (rpc_gss_svc_t)
1159                                             si->sc_service;
1160 
1161                                         /* get oid opaque data */
1162                                         info->oid.sec_oid4_len =
1163                                             si->sc_gss_mech_type->length;
1164                                         info->oid.sec_oid4_val = kmem_alloc(
1165                                             si->sc_gss_mech_type->length,
1166                                             KM_SLEEP);
1167                                         bcopy(si->sc_gss_mech_type->elements,
1168                                             info->oid.sec_oid4_val,
1169                                             info->oid.sec_oid4_len);
1170                                 }
1171                                 k++;
1172                         }
1173                         if (k >= ret_cnt)
1174                                 break;
1175                 }
1176                 resp->SECINFO4resok_len = ret_cnt;
1177                 resp->SECINFO4resok_val = resok_val;
1178                 kmem_free(flavor_list, count * sizeof (int));
1179         }
1180 
1181         VN_RELE(vp);
1182         return (NFS4_OK);
1183 }
1184 
1185 /*
1186  * SECINFO (Operation 33): Obtain required security information on
1187  * the component name in the format of (security-mechanism-oid, qop, service)
1188  * triplets.
1189  */
1190 /* ARGSUSED */
1191 static void
1192 rfs4_op_secinfo(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1193     struct compound_state *cs)
1194 {
1195         SECINFO4args *args = &argop->nfs_argop4_u.opsecinfo;
1196         SECINFO4res *resp = &resop->nfs_resop4_u.opsecinfo;
1197         utf8string *utfnm = &args->name;
1198         uint_t len;
1199         char *nm;
1200         struct sockaddr *ca;
1201         char *name = NULL;
1202         nfsstat4 status = NFS4_OK;
1203 
1204         DTRACE_NFSV4_2(op__secinfo__start, struct compound_state *, cs,
1205             SECINFO4args *, args);
1206 
1207         /*
1208          * Current file handle (cfh) should have been set before getting
1209          * into this function. If not, return error.
1210          */
1211         if (cs->vp == NULL) {
1212                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1213                 goto out;
1214         }
1215 
1216         if (cs->vp->v_type != VDIR) {
1217                 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
1218                 goto out;
1219         }
1220 
1221         /*
1222          * Verify the component name. If failed, error out, but
1223          * do not error out if the component name is a "..".
1224          * SECINFO will return its parents secinfo data for SECINFO "..".
1225          */
1226         status = utf8_dir_verify(utfnm);
1227         if (status != NFS4_OK) {
1228                 if (utfnm->utf8string_len != 2 ||
1229                     utfnm->utf8string_val[0] != '.' ||
1230                     utfnm->utf8string_val[1] != '.') {
1231                         *cs->statusp = resp->status = status;
1232                         goto out;
1233                 }
1234         }
1235 
1236         nm = utf8_to_str(utfnm, &len, NULL);
1237         if (nm == NULL) {
1238                 *cs->statusp = resp->status = NFS4ERR_INVAL;
1239                 goto out;
1240         }
1241 
1242         if (len > MAXNAMELEN) {
1243                 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
1244                 kmem_free(nm, len);
1245                 goto out;
1246         }
1247 
1248         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
1249         name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
1250             MAXPATHLEN  + 1);
1251 
1252         if (name == NULL) {
1253                 *cs->statusp = resp->status = NFS4ERR_INVAL;
1254                 kmem_free(nm, len);
1255                 goto out;
1256         }
1257 
1258 
1259         *cs->statusp = resp->status = do_rfs4_op_secinfo(cs, name, resp);
1260 
1261         if (name != nm)
1262                 kmem_free(name, MAXPATHLEN + 1);
1263         kmem_free(nm, len);
1264 
1265 out:
1266         DTRACE_NFSV4_2(op__secinfo__done, struct compound_state *, cs,
1267             SECINFO4res *, resp);
1268 }
1269 
1270 /*
1271  * Free SECINFO result.
1272  */
1273 /* ARGSUSED */
1274 static void
1275 rfs4_op_secinfo_free(nfs_resop4 *resop)
1276 {
1277         SECINFO4res *resp = &resop->nfs_resop4_u.opsecinfo;
1278         int count, i;
1279         secinfo4 *resok_val;
1280 
1281         /* If this is not an Ok result, nothing to free. */
1282         if (resp->status != NFS4_OK) {
1283                 return;
1284         }
1285 
1286         count = resp->SECINFO4resok_len;
1287         resok_val = resp->SECINFO4resok_val;
1288 
1289         for (i = 0; i < count; i++) {
1290                 if (resok_val[i].flavor == RPCSEC_GSS) {
1291                         rpcsec_gss_info *info;
1292 
1293                         info = &resok_val[i].flavor_info;
1294                         kmem_free(info->oid.sec_oid4_val,
1295                             info->oid.sec_oid4_len);
1296                 }
1297         }
1298         kmem_free(resok_val, count * sizeof (secinfo4));
1299         resp->SECINFO4resok_len = 0;
1300         resp->SECINFO4resok_val = NULL;
1301 }
1302 
1303 /* ARGSUSED */
1304 static void
1305 rfs4_op_access(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1306     struct compound_state *cs)
1307 {
1308         ACCESS4args *args = &argop->nfs_argop4_u.opaccess;
1309         ACCESS4res *resp = &resop->nfs_resop4_u.opaccess;
1310         int error;
1311         vnode_t *vp;
1312         struct vattr va;
1313         int checkwriteperm;
1314         cred_t *cr = cs->cr;
1315         bslabel_t *clabel, *slabel;
1316         ts_label_t *tslabel;
1317         boolean_t admin_low_client;
1318 
1319         DTRACE_NFSV4_2(op__access__start, struct compound_state *, cs,
1320             ACCESS4args *, args);
1321 
1322 #if 0   /* XXX allow access even if !cs->access. Eventually only pseudo fs */
1323         if (cs->access == CS_ACCESS_DENIED) {
1324                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
1325                 goto out;
1326         }
1327 #endif
1328         if (cs->vp == NULL) {
1329                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1330                 goto out;
1331         }
1332 
1333         ASSERT(cr != NULL);
1334 
1335         vp = cs->vp;
1336 
1337         /*
1338          * If the file system is exported read only, it is not appropriate
1339          * to check write permissions for regular files and directories.
1340          * Special files are interpreted by the client, so the underlying
1341          * permissions are sent back to the client for interpretation.
1342          */
1343         if (rdonly4(req, cs) &&
1344             (vp->v_type == VREG || vp->v_type == VDIR))
1345                 checkwriteperm = 0;
1346         else
1347                 checkwriteperm = 1;
1348 
1349         /*
1350          * XXX
1351          * We need the mode so that we can correctly determine access
1352          * permissions relative to a mandatory lock file.  Access to
1353          * mandatory lock files is denied on the server, so it might
1354          * as well be reflected to the server during the open.
1355          */
1356         va.va_mask = AT_MODE;
1357         error = VOP_GETATTR(vp, &va, 0, cr, NULL);
1358         if (error) {
1359                 *cs->statusp = resp->status = puterrno4(error);
1360                 goto out;
1361         }
1362         resp->access = 0;
1363         resp->supported = 0;
1364 
1365         if (is_system_labeled()) {
1366                 ASSERT(req->rq_label != NULL);
1367                 clabel = req->rq_label;
1368                 DTRACE_PROBE2(tx__rfs4__log__info__opaccess__clabel, char *,
1369                     "got client label from request(1)",
1370                     struct svc_req *, req);
1371                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
1372                         if ((tslabel = nfs_getflabel(vp, cs->exi)) == NULL) {
1373                                 *cs->statusp = resp->status = puterrno4(EACCES);
1374                                 goto out;
1375                         }
1376                         slabel = label2bslabel(tslabel);
1377                         DTRACE_PROBE3(tx__rfs4__log__info__opaccess__slabel,
1378                             char *, "got server label(1) for vp(2)",
1379                             bslabel_t *, slabel, vnode_t *, vp);
1380 
1381                         admin_low_client = B_FALSE;
1382                 } else
1383                         admin_low_client = B_TRUE;
1384         }
1385 
1386         if (args->access & ACCESS4_READ) {
1387                 error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
1388                 if (!error && !MANDLOCK(vp, va.va_mode) &&
1389                     (!is_system_labeled() || admin_low_client ||
1390                     bldominates(clabel, slabel)))
1391                         resp->access |= ACCESS4_READ;
1392                 resp->supported |= ACCESS4_READ;
1393         }
1394         if ((args->access & ACCESS4_LOOKUP) && vp->v_type == VDIR) {
1395                 error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL);
1396                 if (!error && (!is_system_labeled() || admin_low_client ||
1397                     bldominates(clabel, slabel)))
1398                         resp->access |= ACCESS4_LOOKUP;
1399                 resp->supported |= ACCESS4_LOOKUP;
1400         }
1401         if (checkwriteperm &&
1402             (args->access & (ACCESS4_MODIFY|ACCESS4_EXTEND))) {
1403                 error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
1404                 if (!error && !MANDLOCK(vp, va.va_mode) &&
1405                     (!is_system_labeled() || admin_low_client ||
1406                     blequal(clabel, slabel)))
1407                         resp->access |=
1408                             (args->access & (ACCESS4_MODIFY | ACCESS4_EXTEND));
1409                 resp->supported |=
1410                     resp->access & (ACCESS4_MODIFY | ACCESS4_EXTEND);
1411         }
1412 
1413         if (checkwriteperm &&
1414             (args->access & ACCESS4_DELETE) && vp->v_type == VDIR) {
1415                 error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
1416                 if (!error && (!is_system_labeled() || admin_low_client ||
1417                     blequal(clabel, slabel)))
1418                         resp->access |= ACCESS4_DELETE;
1419                 resp->supported |= ACCESS4_DELETE;
1420         }
1421         if (args->access & ACCESS4_EXECUTE && vp->v_type != VDIR) {
1422                 error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL);
1423                 if (!error && !MANDLOCK(vp, va.va_mode) &&
1424                     (!is_system_labeled() || admin_low_client ||
1425                     bldominates(clabel, slabel)))
1426                         resp->access |= ACCESS4_EXECUTE;
1427                 resp->supported |= ACCESS4_EXECUTE;
1428         }
1429 
1430         if (is_system_labeled() && !admin_low_client)
1431                 label_rele(tslabel);
1432 
1433         *cs->statusp = resp->status = NFS4_OK;
1434 out:
1435         DTRACE_NFSV4_2(op__access__done, struct compound_state *, cs,
1436             ACCESS4res *, resp);
1437 }
1438 
1439 /* ARGSUSED */
1440 static void
1441 rfs4_op_commit(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1442     struct compound_state *cs)
1443 {
1444         COMMIT4args *args = &argop->nfs_argop4_u.opcommit;
1445         COMMIT4res *resp = &resop->nfs_resop4_u.opcommit;
1446         int error;
1447         vnode_t *vp = cs->vp;
1448         cred_t *cr = cs->cr;
1449         vattr_t va;
1450         nfs4_srv_t *nsrv4;
1451 
1452         DTRACE_NFSV4_2(op__commit__start, struct compound_state *, cs,
1453             COMMIT4args *, args);
1454 
1455         if (vp == NULL) {
1456                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1457                 goto out;
1458         }
1459         if (cs->access == CS_ACCESS_DENIED) {
1460                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
1461                 goto out;
1462         }
1463 
1464         if (args->offset + args->count < args->offset) {
1465                 *cs->statusp = resp->status = NFS4ERR_INVAL;
1466                 goto out;
1467         }
1468 
1469         va.va_mask = AT_UID;
1470         error = VOP_GETATTR(vp, &va, 0, cr, NULL);
1471 
1472         /*
1473          * If we can't get the attributes, then we can't do the
1474          * right access checking.  So, we'll fail the request.
1475          */
1476         if (error) {
1477                 *cs->statusp = resp->status = puterrno4(error);
1478                 goto out;
1479         }
1480         if (rdonly4(req, cs)) {
1481                 *cs->statusp = resp->status = NFS4ERR_ROFS;
1482                 goto out;
1483         }
1484 
1485         if (vp->v_type != VREG) {
1486                 if (vp->v_type == VDIR)
1487                         resp->status = NFS4ERR_ISDIR;
1488                 else
1489                         resp->status = NFS4ERR_INVAL;
1490                 *cs->statusp = resp->status;
1491                 goto out;
1492         }
1493 
1494         if (crgetuid(cr) != va.va_uid &&
1495             (error = VOP_ACCESS(vp, VWRITE, 0, cs->cr, NULL))) {
1496                 *cs->statusp = resp->status = puterrno4(error);
1497                 goto out;
1498         }
1499 
1500         error = VOP_FSYNC(vp, FSYNC, cr, NULL);
1501 
1502         if (error) {
1503                 *cs->statusp = resp->status = puterrno4(error);
1504                 goto out;
1505         }
1506 
1507         nsrv4 = nfs4_get_srv();
1508         *cs->statusp = resp->status = NFS4_OK;
1509         resp->writeverf = nsrv4->write4verf;
1510 out:
1511         DTRACE_NFSV4_2(op__commit__done, struct compound_state *, cs,
1512             COMMIT4res *, resp);
1513 }
1514 
1515 /*
1516  * rfs4_op_mknod is called from rfs4_op_create after all initial verification
1517  * was completed. It does the nfsv4 create for special files.
1518  */
1519 /* ARGSUSED */
1520 static vnode_t *
1521 do_rfs4_op_mknod(CREATE4args *args, CREATE4res *resp, struct svc_req *req,
1522     struct compound_state *cs, vattr_t *vap, char *nm)
1523 {
1524         int error;
1525         cred_t *cr = cs->cr;
1526         vnode_t *dvp = cs->vp;
1527         vnode_t *vp = NULL;
1528         int mode;
1529         enum vcexcl excl;
1530 
1531         switch (args->type) {
1532         case NF4CHR:
1533         case NF4BLK:
1534                 if (secpolicy_sys_devices(cr) != 0) {
1535                         *cs->statusp = resp->status = NFS4ERR_PERM;
1536                         return (NULL);
1537                 }
1538                 if (args->type == NF4CHR)
1539                         vap->va_type = VCHR;
1540                 else
1541                         vap->va_type = VBLK;
1542                 vap->va_rdev = makedevice(args->ftype4_u.devdata.specdata1,
1543                     args->ftype4_u.devdata.specdata2);
1544                 vap->va_mask |= AT_RDEV;
1545                 break;
1546         case NF4SOCK:
1547                 vap->va_type = VSOCK;
1548                 break;
1549         case NF4FIFO:
1550                 vap->va_type = VFIFO;
1551                 break;
1552         default:
1553                 *cs->statusp = resp->status = NFS4ERR_BADTYPE;
1554                 return (NULL);
1555         }
1556 
1557         /*
1558          * Must specify the mode.
1559          */
1560         if (!(vap->va_mask & AT_MODE)) {
1561                 *cs->statusp = resp->status = NFS4ERR_INVAL;
1562                 return (NULL);
1563         }
1564 
1565         excl = EXCL;
1566 
1567         mode = 0;
1568 
1569         error = VOP_CREATE(dvp, nm, vap, excl, mode, &vp, cr, 0, NULL, NULL);
1570         if (error) {
1571                 *cs->statusp = resp->status = puterrno4(error);
1572                 return (NULL);
1573         }
1574         return (vp);
1575 }
1576 
1577 /*
1578  * nfsv4 create is used to create non-regular files. For regular files,
1579  * use nfsv4 open.
1580  */
1581 /* ARGSUSED */
1582 static void
1583 rfs4_op_create(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1584     struct compound_state *cs)
1585 {
1586         CREATE4args *args = &argop->nfs_argop4_u.opcreate;
1587         CREATE4res *resp = &resop->nfs_resop4_u.opcreate;
1588         int error;
1589         struct vattr bva, iva, iva2, ava, *vap;
1590         cred_t *cr = cs->cr;
1591         vnode_t *dvp = cs->vp;
1592         vnode_t *vp = NULL;
1593         vnode_t *realvp;
1594         char *nm, *lnm;
1595         uint_t len, llen;
1596         int syncval = 0;
1597         struct nfs4_svgetit_arg sarg;
1598         struct nfs4_ntov_table ntov;
1599         struct statvfs64 sb;
1600         nfsstat4 status;
1601         struct sockaddr *ca;
1602         char *name = NULL;
1603         char *lname = NULL;
1604 
1605         DTRACE_NFSV4_2(op__create__start, struct compound_state *, cs,
1606             CREATE4args *, args);
1607 
1608         resp->attrset = 0;
1609 
1610         if (dvp == NULL) {
1611                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1612                 goto out;
1613         }
1614 
1615         /*
1616          * If there is an unshared filesystem mounted on this vnode,
1617          * do not allow to create an object in this directory.
1618          */
1619         if (vn_ismntpt(dvp)) {
1620                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
1621                 goto out;
1622         }
1623 
1624         /* Verify that type is correct */
1625         switch (args->type) {
1626         case NF4LNK:
1627         case NF4BLK:
1628         case NF4CHR:
1629         case NF4SOCK:
1630         case NF4FIFO:
1631         case NF4DIR:
1632                 break;
1633         default:
1634                 *cs->statusp = resp->status = NFS4ERR_BADTYPE;
1635                 goto out;
1636         };
1637 
1638         if (cs->access == CS_ACCESS_DENIED) {
1639                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
1640                 goto out;
1641         }
1642         if (dvp->v_type != VDIR) {
1643                 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
1644                 goto out;
1645         }
1646         status = utf8_dir_verify(&args->objname);
1647         if (status != NFS4_OK) {
1648                 *cs->statusp = resp->status = status;
1649                 goto out;
1650         }
1651 
1652         if (rdonly4(req, cs)) {
1653                 *cs->statusp = resp->status = NFS4ERR_ROFS;
1654                 goto out;
1655         }
1656 
1657         /*
1658          * Name of newly created object
1659          */
1660         nm = utf8_to_fn(&args->objname, &len, NULL);
1661         if (nm == NULL) {
1662                 *cs->statusp = resp->status = NFS4ERR_INVAL;
1663                 goto out;
1664         }
1665 
1666         if (len > MAXNAMELEN) {
1667                 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
1668                 kmem_free(nm, len);
1669                 goto out;
1670         }
1671 
1672         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
1673         name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
1674             MAXPATHLEN  + 1);
1675 
1676         if (name == NULL) {
1677                 *cs->statusp = resp->status = NFS4ERR_INVAL;
1678                 kmem_free(nm, len);
1679                 goto out;
1680         }
1681 
1682         resp->attrset = 0;
1683 
1684         sarg.sbp = &sb;
1685         sarg.is_referral = B_FALSE;
1686         nfs4_ntov_table_init(&ntov);
1687 
1688         status = do_rfs4_set_attrs(&resp->attrset,
1689             &args->createattrs, cs, &sarg, &ntov, NFS4ATTR_SETIT);
1690 
1691         if (sarg.vap->va_mask == 0 && status == NFS4_OK)
1692                 status = NFS4ERR_INVAL;
1693 
1694         if (status != NFS4_OK) {
1695                 *cs->statusp = resp->status = status;
1696                 if (name != nm)
1697                         kmem_free(name, MAXPATHLEN + 1);
1698                 kmem_free(nm, len);
1699                 nfs4_ntov_table_free(&ntov, &sarg);
1700                 resp->attrset = 0;
1701                 goto out;
1702         }
1703 
1704         /* Get "before" change value */
1705         bva.va_mask = AT_CTIME|AT_SEQ|AT_MODE;
1706         error = VOP_GETATTR(dvp, &bva, 0, cr, NULL);
1707         if (error) {
1708                 *cs->statusp = resp->status = puterrno4(error);
1709                 if (name != nm)
1710                         kmem_free(name, MAXPATHLEN + 1);
1711                 kmem_free(nm, len);
1712                 nfs4_ntov_table_free(&ntov, &sarg);
1713                 resp->attrset = 0;
1714                 goto out;
1715         }
1716         NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bva.va_ctime)
1717 
1718         vap = sarg.vap;
1719 
1720         /*
1721          * Set the default initial values for attributes when the parent
1722          * directory does not have the VSUID/VSGID bit set and they have
1723          * not been specified in createattrs.
1724          */
1725         if (!(bva.va_mode & VSUID) && (vap->va_mask & AT_UID) == 0) {
1726                 vap->va_uid = crgetuid(cr);
1727                 vap->va_mask |= AT_UID;
1728         }
1729         if (!(bva.va_mode & VSGID) && (vap->va_mask & AT_GID) == 0) {
1730                 vap->va_gid = crgetgid(cr);
1731                 vap->va_mask |= AT_GID;
1732         }
1733 
1734         vap->va_mask |= AT_TYPE;
1735         switch (args->type) {
1736         case NF4DIR:
1737                 vap->va_type = VDIR;
1738                 if ((vap->va_mask & AT_MODE) == 0) {
1739                         vap->va_mode = 0700; /* default: owner rwx only */
1740                         vap->va_mask |= AT_MODE;
1741                 }
1742                 error = VOP_MKDIR(dvp, name, vap, &vp, cr, NULL, 0, NULL);
1743                 if (error)
1744                         break;
1745 
1746                 /*
1747                  * Get the initial "after" sequence number, if it fails,
1748                  * set to zero
1749                  */
1750                 iva.va_mask = AT_SEQ;
1751                 if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL))
1752                         iva.va_seq = 0;
1753                 break;
1754         case NF4LNK:
1755                 vap->va_type = VLNK;
1756                 if ((vap->va_mask & AT_MODE) == 0) {
1757                         vap->va_mode = 0700; /* default: owner rwx only */
1758                         vap->va_mask |= AT_MODE;
1759                 }
1760 
1761                 /*
1762                  * symlink names must be treated as data
1763                  */
1764                 lnm = utf8_to_str((utf8string *)&args->ftype4_u.linkdata,
1765                     &llen, NULL);
1766 
1767                 if (lnm == NULL) {
1768                         *cs->statusp = resp->status = NFS4ERR_INVAL;
1769                         if (name != nm)
1770                                 kmem_free(name, MAXPATHLEN + 1);
1771                         kmem_free(nm, len);
1772                         nfs4_ntov_table_free(&ntov, &sarg);
1773                         resp->attrset = 0;
1774                         goto out;
1775                 }
1776 
1777                 if (llen > MAXPATHLEN) {
1778                         *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
1779                         if (name != nm)
1780                                 kmem_free(name, MAXPATHLEN + 1);
1781                         kmem_free(nm, len);
1782                         kmem_free(lnm, llen);
1783                         nfs4_ntov_table_free(&ntov, &sarg);
1784                         resp->attrset = 0;
1785                         goto out;
1786                 }
1787 
1788                 lname = nfscmd_convname(ca, cs->exi, lnm,
1789                     NFSCMD_CONV_INBOUND, MAXPATHLEN  + 1);
1790 
1791                 if (lname == NULL) {
1792                         *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
1793                         if (name != nm)
1794                                 kmem_free(name, MAXPATHLEN + 1);
1795                         kmem_free(nm, len);
1796                         kmem_free(lnm, llen);
1797                         nfs4_ntov_table_free(&ntov, &sarg);
1798                         resp->attrset = 0;
1799                         goto out;
1800                 }
1801 
1802                 error = VOP_SYMLINK(dvp, name, vap, lname, cr, NULL, 0);
1803                 if (lname != lnm)
1804                         kmem_free(lname, MAXPATHLEN + 1);
1805                 kmem_free(lnm, llen);
1806                 if (error)
1807                         break;
1808 
1809                 /*
1810                  * Get the initial "after" sequence number, if it fails,
1811                  * set to zero
1812                  */
1813                 iva.va_mask = AT_SEQ;
1814                 if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL))
1815                         iva.va_seq = 0;
1816 
1817                 error = VOP_LOOKUP(dvp, name, &vp, NULL, 0, NULL, cr,
1818                     NULL, NULL, NULL);
1819                 if (error)
1820                         break;
1821 
1822                 /*
1823                  * va_seq is not safe over VOP calls, check it again
1824                  * if it has changed zero out iva to force atomic = FALSE.
1825                  */
1826                 iva2.va_mask = AT_SEQ;
1827                 if (VOP_GETATTR(dvp, &iva2, 0, cs->cr, NULL) ||
1828                     iva2.va_seq != iva.va_seq)
1829                         iva.va_seq = 0;
1830                 break;
1831         default:
1832                 /*
1833                  * probably a special file.
1834                  */
1835                 if ((vap->va_mask & AT_MODE) == 0) {
1836                         vap->va_mode = 0600; /* default: owner rw only */
1837                         vap->va_mask |= AT_MODE;
1838                 }
1839                 syncval = FNODSYNC;
1840                 /*
1841                  * We know this will only generate one VOP call
1842                  */
1843                 vp = do_rfs4_op_mknod(args, resp, req, cs, vap, name);
1844 
1845                 if (vp == NULL) {
1846                         if (name != nm)
1847                                 kmem_free(name, MAXPATHLEN + 1);
1848                         kmem_free(nm, len);
1849                         nfs4_ntov_table_free(&ntov, &sarg);
1850                         resp->attrset = 0;
1851                         goto out;
1852                 }
1853 
1854                 /*
1855                  * Get the initial "after" sequence number, if it fails,
1856                  * set to zero
1857                  */
1858                 iva.va_mask = AT_SEQ;
1859                 if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL))
1860                         iva.va_seq = 0;
1861 
1862                 break;
1863         }
1864         if (name != nm)
1865                 kmem_free(name, MAXPATHLEN + 1);
1866         kmem_free(nm, len);
1867 
1868         if (error) {
1869                 *cs->statusp = resp->status = puterrno4(error);
1870         }
1871 
1872         /*
1873          * Force modified data and metadata out to stable storage.
1874          */
1875         (void) VOP_FSYNC(dvp, 0, cr, NULL);
1876 
1877         if (resp->status != NFS4_OK) {
1878                 if (vp != NULL)
1879                         VN_RELE(vp);
1880                 nfs4_ntov_table_free(&ntov, &sarg);
1881                 resp->attrset = 0;
1882                 goto out;
1883         }
1884 
1885         /*
1886          * Finish setup of cinfo response, "before" value already set.
1887          * Get "after" change value, if it fails, simply return the
1888          * before value.
1889          */
1890         ava.va_mask = AT_CTIME|AT_SEQ;
1891         if (VOP_GETATTR(dvp, &ava, 0, cr, NULL)) {
1892                 ava.va_ctime = bva.va_ctime;
1893                 ava.va_seq = 0;
1894         }
1895         NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, ava.va_ctime);
1896 
1897         /*
1898          * True verification that object was created with correct
1899          * attrs is impossible.  The attrs could have been changed
1900          * immediately after object creation.  If attributes did
1901          * not verify, the only recourse for the server is to
1902          * destroy the object.  Maybe if some attrs (like gid)
1903          * are set incorrectly, the object should be destroyed;
1904          * however, seems bad as a default policy.  Do we really
1905          * want to destroy an object over one of the times not
1906          * verifying correctly?  For these reasons, the server
1907          * currently sets bits in attrset for createattrs
1908          * that were set; however, no verification is done.
1909          *
1910          * vmask_to_nmask accounts for vattr bits set on create
1911          *      [do_rfs4_set_attrs() only sets resp bits for
1912          *       non-vattr/vfs bits.]
1913          * Mask off any bits set by default so as not to return
1914          * more attrset bits than were requested in createattrs
1915          */
1916         nfs4_vmask_to_nmask(sarg.vap->va_mask, &resp->attrset);
1917         resp->attrset &= args->createattrs.attrmask;
1918         nfs4_ntov_table_free(&ntov, &sarg);
1919 
1920         error = makefh4(&cs->fh, vp, cs->exi);
1921         if (error) {
1922                 *cs->statusp = resp->status = puterrno4(error);
1923         }
1924 
1925         /*
1926          * The cinfo.atomic = TRUE only if we got no errors, we have
1927          * non-zero va_seq's, and it has incremented by exactly one
1928          * during the creation and it didn't change during the VOP_LOOKUP
1929          * or VOP_FSYNC.
1930          */
1931         if (!error && bva.va_seq && iva.va_seq && ava.va_seq &&
1932             iva.va_seq == (bva.va_seq + 1) && iva.va_seq == ava.va_seq)
1933                 resp->cinfo.atomic = TRUE;
1934         else
1935                 resp->cinfo.atomic = FALSE;
1936 
1937         /*
1938          * Force modified metadata out to stable storage.
1939          *
1940          * if a underlying vp exists, pass it to VOP_FSYNC
1941          */
1942         if (VOP_REALVP(vp, &realvp, NULL) == 0)
1943                 (void) VOP_FSYNC(realvp, syncval, cr, NULL);
1944         else
1945                 (void) VOP_FSYNC(vp, syncval, cr, NULL);
1946 
1947         if (resp->status != NFS4_OK) {
1948                 VN_RELE(vp);
1949                 goto out;
1950         }
1951         if (cs->vp)
1952                 VN_RELE(cs->vp);
1953 
1954         cs->vp = vp;
1955         *cs->statusp = resp->status = NFS4_OK;
1956 out:
1957         DTRACE_NFSV4_2(op__create__done, struct compound_state *, cs,
1958             CREATE4res *, resp);
1959 }
1960 
1961 /*ARGSUSED*/
1962 static void
1963 rfs4_op_delegpurge(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1964     struct compound_state *cs)
1965 {
1966         DTRACE_NFSV4_2(op__delegpurge__start, struct compound_state *, cs,
1967             DELEGPURGE4args *, &argop->nfs_argop4_u.opdelegpurge);
1968 
1969         rfs4_op_inval(argop, resop, req, cs);
1970 
1971         DTRACE_NFSV4_2(op__delegpurge__done, struct compound_state *, cs,
1972             DELEGPURGE4res *, &resop->nfs_resop4_u.opdelegpurge);
1973 }
1974 
1975 /*ARGSUSED*/
1976 static void
1977 rfs4_op_delegreturn(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1978     struct compound_state *cs)
1979 {
1980         DELEGRETURN4args *args = &argop->nfs_argop4_u.opdelegreturn;
1981         DELEGRETURN4res *resp = &resop->nfs_resop4_u.opdelegreturn;
1982         rfs4_deleg_state_t *dsp;
1983         nfsstat4 status;
1984 
1985         DTRACE_NFSV4_2(op__delegreturn__start, struct compound_state *, cs,
1986             DELEGRETURN4args *, args);
1987 
1988         status = rfs4_get_deleg_state(&args->deleg_stateid, &dsp);
1989         resp->status = *cs->statusp = status;
1990         if (status != NFS4_OK)
1991                 goto out;
1992 
1993         /* Ensure specified filehandle matches */
1994         if (cs->vp != dsp->rds_finfo->rf_vp) {
1995                 resp->status = *cs->statusp = NFS4ERR_BAD_STATEID;
1996         } else
1997                 rfs4_return_deleg(dsp, FALSE);
1998 
1999         rfs4_update_lease(dsp->rds_client);
2000 
2001         rfs4_deleg_state_rele(dsp);
2002 out:
2003         DTRACE_NFSV4_2(op__delegreturn__done, struct compound_state *, cs,
2004             DELEGRETURN4res *, resp);
2005 }
2006 
2007 /*
2008  * Check to see if a given "flavor" is an explicitly shared flavor.
2009  * The assumption of this routine is the "flavor" is already a valid
2010  * flavor in the secinfo list of "exi".
2011  *
2012  *      e.g.
2013  *              # share -o sec=flavor1 /export
2014  *              # share -o sec=flavor2 /export/home
2015  *
2016  *              flavor2 is not an explicitly shared flavor for /export,
2017  *              however it is in the secinfo list for /export thru the
2018  *              server namespace setup.
2019  */
2020 int
2021 is_exported_sec(int flavor, struct exportinfo *exi)
2022 {
2023         int     i;
2024         struct secinfo *sp;
2025 
2026         sp = exi->exi_export.ex_secinfo;
2027         for (i = 0; i < exi->exi_export.ex_seccnt; i++) {
2028                 if (flavor == sp[i].s_secinfo.sc_nfsnum ||
2029                     sp[i].s_secinfo.sc_nfsnum == AUTH_NONE) {
2030                         return (SEC_REF_EXPORTED(&sp[i]));
2031                 }
2032         }
2033 
2034         /* Should not reach this point based on the assumption */
2035         return (0);
2036 }
2037 
2038 /*
2039  * Check if the security flavor used in the request matches what is
2040  * required at the export point or at the root pseudo node (exi_root).
2041  *
2042  * returns 1 if there's a match or if exported with AUTH_NONE; 0 otherwise.
2043  *
2044  */
2045 static int
2046 secinfo_match_or_authnone(struct compound_state *cs)
2047 {
2048         int     i;
2049         struct secinfo *sp;
2050 
2051         /*
2052          * Check cs->nfsflavor (from the request) against
2053          * the current export data in cs->exi.
2054          */
2055         sp = cs->exi->exi_export.ex_secinfo;
2056         for (i = 0; i < cs->exi->exi_export.ex_seccnt; i++) {
2057                 if (cs->nfsflavor == sp[i].s_secinfo.sc_nfsnum ||
2058                     sp[i].s_secinfo.sc_nfsnum == AUTH_NONE)
2059                         return (1);
2060         }
2061 
2062         return (0);
2063 }
2064 
2065 /*
2066  * Check the access authority for the client and return the correct error.
2067  */
2068 nfsstat4
2069 call_checkauth4(struct compound_state *cs, struct svc_req *req)
2070 {
2071         int     authres;
2072 
2073         /*
2074          * First, check if the security flavor used in the request
2075          * are among the flavors set in the server namespace.
2076          */
2077         if (!secinfo_match_or_authnone(cs)) {
2078                 *cs->statusp = NFS4ERR_WRONGSEC;
2079                 return (*cs->statusp);
2080         }
2081 
2082         authres = checkauth4(cs, req);
2083 
2084         if (authres > 0) {
2085                 *cs->statusp = NFS4_OK;
2086                 if (! (cs->access & CS_ACCESS_LIMITED))
2087                         cs->access = CS_ACCESS_OK;
2088         } else if (authres == 0) {
2089                 *cs->statusp = NFS4ERR_ACCESS;
2090         } else if (authres == -2) {
2091                 *cs->statusp = NFS4ERR_WRONGSEC;
2092         } else {
2093                 *cs->statusp = NFS4ERR_DELAY;
2094         }
2095         return (*cs->statusp);
2096 }
2097 
2098 /*
2099  * bitmap4_to_attrmask is called by getattr and readdir.
2100  * It sets up the vattr mask and determines whether vfsstat call is needed
2101  * based on the input bitmap.
2102  * Returns nfsv4 status.
2103  */
2104 static nfsstat4
2105 bitmap4_to_attrmask(bitmap4 breq, struct nfs4_svgetit_arg *sargp)
2106 {
2107         int i;
2108         uint_t  va_mask;
2109         struct statvfs64 *sbp = sargp->sbp;
2110 
2111         sargp->sbp = NULL;
2112         sargp->flag = 0;
2113         sargp->rdattr_error = NFS4_OK;
2114         sargp->mntdfid_set = FALSE;
2115         if (sargp->cs->vp)
2116                 sargp->xattr = get_fh4_flag(&sargp->cs->fh,
2117                     FH4_ATTRDIR | FH4_NAMEDATTR);
2118         else
2119                 sargp->xattr = 0;
2120 
2121         /*
2122          * Set rdattr_error_req to true if return error per
2123          * failed entry rather than fail the readdir.
2124          */
2125         if (breq & FATTR4_RDATTR_ERROR_MASK)
2126                 sargp->rdattr_error_req = 1;
2127         else
2128                 sargp->rdattr_error_req = 0;
2129 
2130         /*
2131          * generate the va_mask
2132          * Handle the easy cases first
2133          */
2134         switch (breq) {
2135         case NFS4_NTOV_ATTR_MASK:
2136                 sargp->vap->va_mask = NFS4_NTOV_ATTR_AT_MASK;
2137                 return (NFS4_OK);
2138 
2139         case NFS4_FS_ATTR_MASK:
2140                 sargp->vap->va_mask = NFS4_FS_ATTR_AT_MASK;
2141                 sargp->sbp = sbp;
2142                 return (NFS4_OK);
2143 
2144         case NFS4_NTOV_ATTR_CACHE_MASK:
2145                 sargp->vap->va_mask = NFS4_NTOV_ATTR_CACHE_AT_MASK;
2146                 return (NFS4_OK);
2147 
2148         case FATTR4_LEASE_TIME_MASK:
2149                 sargp->vap->va_mask = 0;
2150                 return (NFS4_OK);
2151 
2152         default:
2153                 va_mask = 0;
2154                 for (i = 0; i < nfs4_ntov_map_size; i++) {
2155                         if ((breq & nfs4_ntov_map[i].fbit) &&
2156                             nfs4_ntov_map[i].vbit)
2157                                 va_mask |= nfs4_ntov_map[i].vbit;
2158                 }
2159 
2160                 /*
2161                  * Check is vfsstat is needed
2162                  */
2163                 if (breq & NFS4_FS_ATTR_MASK)
2164                         sargp->sbp = sbp;
2165 
2166                 sargp->vap->va_mask = va_mask;
2167                 return (NFS4_OK);
2168         }
2169         /* NOTREACHED */
2170 }
2171 
2172 /*
2173  * bitmap4_get_sysattrs is called by getattr and readdir.
2174  * It calls both VOP_GETATTR and VFS_STATVFS calls to get the attrs.
2175  * Returns nfsv4 status.
2176  */
2177 static nfsstat4
2178 bitmap4_get_sysattrs(struct nfs4_svgetit_arg *sargp)
2179 {
2180         int error;
2181         struct compound_state *cs = sargp->cs;
2182         vnode_t *vp = cs->vp;
2183 
2184         if (sargp->sbp != NULL) {
2185                 if (error = VFS_STATVFS(vp->v_vfsp, sargp->sbp)) {
2186                         sargp->sbp = NULL;   /* to identify error */
2187                         return (puterrno4(error));
2188                 }
2189         }
2190 
2191         return (rfs4_vop_getattr(vp, sargp->vap, 0, cs->cr));
2192 }
2193 
2194 static void
2195 nfs4_ntov_table_init(struct nfs4_ntov_table *ntovp)
2196 {
2197         ntovp->na = kmem_zalloc(sizeof (union nfs4_attr_u) * nfs4_ntov_map_size,
2198             KM_SLEEP);
2199         ntovp->attrcnt = 0;
2200         ntovp->vfsstat = FALSE;
2201 }
2202 
2203 static void
2204 nfs4_ntov_table_free(struct nfs4_ntov_table *ntovp,
2205     struct nfs4_svgetit_arg *sargp)
2206 {
2207         int i;
2208         union nfs4_attr_u *na;
2209         uint8_t *amap;
2210 
2211         /*
2212          * XXX Should do the same checks for whether the bit is set
2213          */
2214         for (i = 0, na = ntovp->na, amap = ntovp->amap;
2215             i < ntovp->attrcnt; i++, na++, amap++) {
2216                 (void) (*nfs4_ntov_map[*amap].sv_getit)(
2217                     NFS4ATTR_FREEIT, sargp, na);
2218         }
2219         if ((sargp->op == NFS4ATTR_SETIT) || (sargp->op == NFS4ATTR_VERIT)) {
2220                 /*
2221                  * xdr_free for getattr will be done later
2222                  */
2223                 for (i = 0, na = ntovp->na, amap = ntovp->amap;
2224                     i < ntovp->attrcnt; i++, na++, amap++) {
2225                         xdr_free(nfs4_ntov_map[*amap].xfunc, (caddr_t)na);
2226                 }
2227         }
2228         kmem_free(ntovp->na, sizeof (union nfs4_attr_u) * nfs4_ntov_map_size);
2229 }
2230 
2231 /*
2232  * do_rfs4_op_getattr gets the system attrs and converts into fattr4.
2233  */
2234 static nfsstat4
2235 do_rfs4_op_getattr(bitmap4 breq, fattr4 *fattrp,
2236     struct nfs4_svgetit_arg *sargp)
2237 {
2238         int error = 0;
2239         int i, k;
2240         struct nfs4_ntov_table ntov;
2241         XDR xdr;
2242         ulong_t xdr_size;
2243         char *xdr_attrs;
2244         nfsstat4 status = NFS4_OK;
2245         nfsstat4 prev_rdattr_error = sargp->rdattr_error;
2246         union nfs4_attr_u *na;
2247         uint8_t *amap;
2248 
2249         sargp->op = NFS4ATTR_GETIT;
2250         sargp->flag = 0;
2251 
2252         fattrp->attrmask = 0;
2253         /* if no bits requested, then return empty fattr4 */
2254         if (breq == 0) {
2255                 fattrp->attrlist4_len = 0;
2256                 fattrp->attrlist4 = NULL;
2257                 return (NFS4_OK);
2258         }
2259 
2260         /*
2261          * return NFS4ERR_INVAL when client requests write-only attrs
2262          */
2263         if (breq & (FATTR4_TIME_ACCESS_SET_MASK | FATTR4_TIME_MODIFY_SET_MASK))
2264                 return (NFS4ERR_INVAL);
2265 
2266         nfs4_ntov_table_init(&ntov);
2267         na = ntov.na;
2268         amap = ntov.amap;
2269 
2270         /*
2271          * Now loop to get or verify the attrs
2272          */
2273         for (i = 0; i < nfs4_ntov_map_size; i++) {
2274                 if (breq & nfs4_ntov_map[i].fbit) {
2275                         if ((*nfs4_ntov_map[i].sv_getit)(
2276                             NFS4ATTR_SUPPORTED, sargp, NULL) == 0) {
2277 
2278                                 error = (*nfs4_ntov_map[i].sv_getit)(
2279                                     NFS4ATTR_GETIT, sargp, na);
2280 
2281                                 /*
2282                                  * Possible error values:
2283                                  * >0 if sv_getit failed to
2284                                  * get the attr; 0 if succeeded;
2285                                  * <0 if rdattr_error and the
2286                                  * attribute cannot be returned.
2287                                  */
2288                                 if (error && !(sargp->rdattr_error_req))
2289                                         goto done;
2290                                 /*
2291                                  * If error then just for entry
2292                                  */
2293                                 if (error == 0) {
2294                                         fattrp->attrmask |=
2295                                             nfs4_ntov_map[i].fbit;
2296                                         *amap++ =
2297                                             (uint8_t)nfs4_ntov_map[i].nval;
2298                                         na++;
2299                                         (ntov.attrcnt)++;
2300                                 } else if ((error > 0) &&
2301                                     (sargp->rdattr_error == NFS4_OK)) {
2302                                         sargp->rdattr_error = puterrno4(error);
2303                                 }
2304                                 error = 0;
2305                         }
2306                 }
2307         }
2308 
2309         /*
2310          * If rdattr_error was set after the return value for it was assigned,
2311          * update it.
2312          */
2313         if (prev_rdattr_error != sargp->rdattr_error) {
2314                 na = ntov.na;
2315                 amap = ntov.amap;
2316                 for (i = 0; i < ntov.attrcnt; i++, na++, amap++) {
2317                         k = *amap;
2318                         if (k < FATTR4_RDATTR_ERROR) {
2319                                 continue;
2320                         }
2321                         if ((k == FATTR4_RDATTR_ERROR) &&
2322                             ((*nfs4_ntov_map[k].sv_getit)(
2323                             NFS4ATTR_SUPPORTED, sargp, NULL) == 0)) {
2324 
2325                                 (void) (*nfs4_ntov_map[k].sv_getit)(
2326                                     NFS4ATTR_GETIT, sargp, na);
2327                         }
2328                         break;
2329                 }
2330         }
2331 
2332         xdr_size = 0;
2333         na = ntov.na;
2334         amap = ntov.amap;
2335         for (i = 0; i < ntov.attrcnt; i++, na++, amap++) {
2336                 xdr_size += xdr_sizeof(nfs4_ntov_map[*amap].xfunc, na);
2337         }
2338 
2339         fattrp->attrlist4_len = xdr_size;
2340         if (xdr_size) {
2341                 /* freed by rfs4_op_getattr_free() */
2342                 fattrp->attrlist4 = xdr_attrs = kmem_zalloc(xdr_size, KM_SLEEP);
2343 
2344                 xdrmem_create(&xdr, xdr_attrs, xdr_size, XDR_ENCODE);
2345 
2346                 na = ntov.na;
2347                 amap = ntov.amap;
2348                 for (i = 0; i < ntov.attrcnt; i++, na++, amap++) {
2349                         if (!(*nfs4_ntov_map[*amap].xfunc)(&xdr, na)) {
2350                                 DTRACE_PROBE1(nfss__e__getattr4_encfail,
2351                                     int, *amap);
2352                                 status = NFS4ERR_SERVERFAULT;
2353                                 break;
2354                         }
2355                 }
2356                 /* xdrmem_destroy(&xdrs); */        /* NO-OP */
2357         } else {
2358                 fattrp->attrlist4 = NULL;
2359         }
2360 done:
2361 
2362         nfs4_ntov_table_free(&ntov, sargp);
2363 
2364         if (error != 0)
2365                 status = puterrno4(error);
2366 
2367         return (status);
2368 }
2369 
2370 /* ARGSUSED */
2371 static void
2372 rfs4_op_getattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2373     struct compound_state *cs)
2374 {
2375         GETATTR4args *args = &argop->nfs_argop4_u.opgetattr;
2376         GETATTR4res *resp = &resop->nfs_resop4_u.opgetattr;
2377         struct nfs4_svgetit_arg sarg;
2378         struct statvfs64 sb;
2379         nfsstat4 status;
2380 
2381         DTRACE_NFSV4_2(op__getattr__start, struct compound_state *, cs,
2382             GETATTR4args *, args);
2383 
2384         if (cs->vp == NULL) {
2385                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2386                 goto out;
2387         }
2388 
2389         if (cs->access == CS_ACCESS_DENIED) {
2390                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
2391                 goto out;
2392         }
2393 
2394         sarg.sbp = &sb;
2395         sarg.cs = cs;
2396         sarg.is_referral = B_FALSE;
2397 
2398         status = bitmap4_to_attrmask(args->attr_request, &sarg);
2399         if (status == NFS4_OK) {
2400 
2401                 status = bitmap4_get_sysattrs(&sarg);
2402                 if (status == NFS4_OK) {
2403 
2404                         /* Is this a referral? */
2405                         if (vn_is_nfs_reparse(cs->vp, cs->cr)) {
2406                                 /* Older V4 Solaris client sees a link */
2407                                 if (client_is_downrev(req))
2408                                         sarg.vap->va_type = VLNK;
2409                                 else
2410                                         sarg.is_referral = B_TRUE;
2411                         }
2412 
2413                         status = do_rfs4_op_getattr(args->attr_request,
2414                             &resp->obj_attributes, &sarg);
2415                 }
2416         }
2417         *cs->statusp = resp->status = status;
2418 out:
2419         DTRACE_NFSV4_2(op__getattr__done, struct compound_state *, cs,
2420             GETATTR4res *, resp);
2421 }
2422 
2423 static void
2424 rfs4_op_getattr_free(nfs_resop4 *resop)
2425 {
2426         GETATTR4res *resp = &resop->nfs_resop4_u.opgetattr;
2427 
2428         nfs4_fattr4_free(&resp->obj_attributes);
2429 }
2430 
2431 /* ARGSUSED */
2432 static void
2433 rfs4_op_getfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2434     struct compound_state *cs)
2435 {
2436         GETFH4res *resp = &resop->nfs_resop4_u.opgetfh;
2437 
2438         DTRACE_NFSV4_1(op__getfh__start, struct compound_state *, cs);
2439 
2440         if (cs->vp == NULL) {
2441                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2442                 goto out;
2443         }
2444         if (cs->access == CS_ACCESS_DENIED) {
2445                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
2446                 goto out;
2447         }
2448 
2449         /* check for reparse point at the share point */
2450         if (cs->exi->exi_moved || vn_is_nfs_reparse(cs->exi->exi_vp, cs->cr)) {
2451                 /* it's all bad */
2452                 cs->exi->exi_moved = 1;
2453                 *cs->statusp = resp->status = NFS4ERR_MOVED;
2454                 DTRACE_PROBE2(nfs4serv__func__referral__shared__moved,
2455                     vnode_t *, cs->vp, char *, "rfs4_op_getfh");
2456                 return;
2457         }
2458 
2459         /* check for reparse point at vp */
2460         if (vn_is_nfs_reparse(cs->vp, cs->cr) && !client_is_downrev(req)) {
2461                 /* it's not all bad */
2462                 *cs->statusp = resp->status = NFS4ERR_MOVED;
2463                 DTRACE_PROBE2(nfs4serv__func__referral__moved,
2464                     vnode_t *, cs->vp, char *, "rfs4_op_getfh");
2465                 return;
2466         }
2467 
2468         resp->object.nfs_fh4_val =
2469             kmem_alloc(cs->fh.nfs_fh4_len, KM_SLEEP);
2470         nfs_fh4_copy(&cs->fh, &resp->object);
2471         *cs->statusp = resp->status = NFS4_OK;
2472 out:
2473         DTRACE_NFSV4_2(op__getfh__done, struct compound_state *, cs,
2474             GETFH4res *, resp);
2475 }
2476 
2477 static void
2478 rfs4_op_getfh_free(nfs_resop4 *resop)
2479 {
2480         GETFH4res *resp = &resop->nfs_resop4_u.opgetfh;
2481 
2482         if (resp->status == NFS4_OK &&
2483             resp->object.nfs_fh4_val != NULL) {
2484                 kmem_free(resp->object.nfs_fh4_val, resp->object.nfs_fh4_len);
2485                 resp->object.nfs_fh4_val = NULL;
2486                 resp->object.nfs_fh4_len = 0;
2487         }
2488 }
2489 
2490 /*
2491  * illegal: args: void
2492  *          res : status (NFS4ERR_OP_ILLEGAL)
2493  */
2494 /* ARGSUSED */
2495 static void
2496 rfs4_op_illegal(nfs_argop4 *argop, nfs_resop4 *resop,
2497     struct svc_req *req, struct compound_state *cs)
2498 {
2499         ILLEGAL4res *resp = &resop->nfs_resop4_u.opillegal;
2500 
2501         resop->resop = OP_ILLEGAL;
2502         *cs->statusp = resp->status = NFS4ERR_OP_ILLEGAL;
2503 }
2504 
2505 /*
2506  * link: args: SAVED_FH: file, CURRENT_FH: target directory
2507  *       res: status. If success - CURRENT_FH unchanged, return change_info
2508  */
2509 /* ARGSUSED */
2510 static void
2511 rfs4_op_link(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2512     struct compound_state *cs)
2513 {
2514         LINK4args *args = &argop->nfs_argop4_u.oplink;
2515         LINK4res *resp = &resop->nfs_resop4_u.oplink;
2516         int error;
2517         vnode_t *vp;
2518         vnode_t *dvp;
2519         struct vattr bdva, idva, adva;
2520         char *nm;
2521         uint_t  len;
2522         struct sockaddr *ca;
2523         char *name = NULL;
2524         nfsstat4 status;
2525 
2526         DTRACE_NFSV4_2(op__link__start, struct compound_state *, cs,
2527             LINK4args *, args);
2528 
2529         /* SAVED_FH: source object */
2530         vp = cs->saved_vp;
2531         if (vp == NULL) {
2532                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2533                 goto out;
2534         }
2535 
2536         /* CURRENT_FH: target directory */
2537         dvp = cs->vp;
2538         if (dvp == NULL) {
2539                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2540                 goto out;
2541         }
2542 
2543         /*
2544          * If there is a non-shared filesystem mounted on this vnode,
2545          * do not allow to link any file in this directory.
2546          */
2547         if (vn_ismntpt(dvp)) {
2548                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
2549                 goto out;
2550         }
2551 
2552         if (cs->access == CS_ACCESS_DENIED) {
2553                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
2554                 goto out;
2555         }
2556 
2557         /* Check source object's type validity */
2558         if (vp->v_type == VDIR) {
2559                 *cs->statusp = resp->status = NFS4ERR_ISDIR;
2560                 goto out;
2561         }
2562 
2563         /* Check target directory's type */
2564         if (dvp->v_type != VDIR) {
2565                 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
2566                 goto out;
2567         }
2568 
2569         if (cs->saved_exi != cs->exi) {
2570                 *cs->statusp = resp->status = NFS4ERR_XDEV;
2571                 goto out;
2572         }
2573 
2574         status = utf8_dir_verify(&args->newname);
2575         if (status != NFS4_OK) {
2576                 *cs->statusp = resp->status = status;
2577                 goto out;
2578         }
2579 
2580         nm = utf8_to_fn(&args->newname, &len, NULL);
2581         if (nm == NULL) {
2582                 *cs->statusp = resp->status = NFS4ERR_INVAL;
2583                 goto out;
2584         }
2585 
2586         if (len > MAXNAMELEN) {
2587                 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
2588                 kmem_free(nm, len);
2589                 goto out;
2590         }
2591 
2592         if (rdonly4(req, cs)) {
2593                 *cs->statusp = resp->status = NFS4ERR_ROFS;
2594                 kmem_free(nm, len);
2595                 goto out;
2596         }
2597 
2598         /* Get "before" change value */
2599         bdva.va_mask = AT_CTIME|AT_SEQ;
2600         error = VOP_GETATTR(dvp, &bdva, 0, cs->cr, NULL);
2601         if (error) {
2602                 *cs->statusp = resp->status = puterrno4(error);
2603                 kmem_free(nm, len);
2604                 goto out;
2605         }
2606 
2607         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2608         name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
2609             MAXPATHLEN  + 1);
2610 
2611         if (name == NULL) {
2612                 *cs->statusp = resp->status = NFS4ERR_INVAL;
2613                 kmem_free(nm, len);
2614                 goto out;
2615         }
2616 
2617         NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bdva.va_ctime)
2618 
2619         error = VOP_LINK(dvp, vp, name, cs->cr, NULL, 0);
2620 
2621         if (nm != name)
2622                 kmem_free(name, MAXPATHLEN + 1);
2623         kmem_free(nm, len);
2624 
2625         /*
2626          * Get the initial "after" sequence number, if it fails, set to zero
2627          */
2628         idva.va_mask = AT_SEQ;
2629         if (VOP_GETATTR(dvp, &idva, 0, cs->cr, NULL))
2630                 idva.va_seq = 0;
2631 
2632         /*
2633          * Force modified data and metadata out to stable storage.
2634          */
2635         (void) VOP_FSYNC(vp, FNODSYNC, cs->cr, NULL);
2636         (void) VOP_FSYNC(dvp, 0, cs->cr, NULL);
2637 
2638         if (error) {
2639                 *cs->statusp = resp->status = puterrno4(error);
2640                 goto out;
2641         }
2642 
2643         /*
2644          * Get "after" change value, if it fails, simply return the
2645          * before value.
2646          */
2647         adva.va_mask = AT_CTIME|AT_SEQ;
2648         if (VOP_GETATTR(dvp, &adva, 0, cs->cr, NULL)) {
2649                 adva.va_ctime = bdva.va_ctime;
2650                 adva.va_seq = 0;
2651         }
2652 
2653         NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, adva.va_ctime)
2654 
2655         /*
2656          * The cinfo.atomic = TRUE only if we have
2657          * non-zero va_seq's, and it has incremented by exactly one
2658          * during the VOP_LINK and it didn't change during the VOP_FSYNC.
2659          */
2660         if (bdva.va_seq && idva.va_seq && adva.va_seq &&
2661             idva.va_seq == (bdva.va_seq + 1) && idva.va_seq == adva.va_seq)
2662                 resp->cinfo.atomic = TRUE;
2663         else
2664                 resp->cinfo.atomic = FALSE;
2665 
2666         *cs->statusp = resp->status = NFS4_OK;
2667 out:
2668         DTRACE_NFSV4_2(op__link__done, struct compound_state *, cs,
2669             LINK4res *, resp);
2670 }
2671 
2672 /*
2673  * Used by rfs4_op_lookup and rfs4_op_lookupp to do the actual work.
2674  */
2675 
2676 /* ARGSUSED */
2677 static nfsstat4
2678 do_rfs4_op_lookup(char *nm, struct svc_req *req, struct compound_state *cs)
2679 {
2680         int error;
2681         int different_export = 0;
2682         vnode_t *vp, *pre_tvp = NULL, *oldvp = NULL;
2683         struct exportinfo *exi = NULL, *pre_exi = NULL;
2684         nfsstat4 stat;
2685         fid_t fid;
2686         int attrdir, dotdot, walk;
2687         bool_t is_newvp = FALSE;
2688 
2689         if (cs->vp->v_flag & V_XATTRDIR) {
2690                 attrdir = 1;
2691                 ASSERT(get_fh4_flag(&cs->fh, FH4_ATTRDIR));
2692         } else {
2693                 attrdir = 0;
2694                 ASSERT(! get_fh4_flag(&cs->fh, FH4_ATTRDIR));
2695         }
2696 
2697         dotdot = (nm[0] == '.' && nm[1] == '.' && nm[2] == '\0');
2698 
2699         /*
2700          * If dotdotting, then need to check whether it's
2701          * above the root of a filesystem, or above an
2702          * export point.
2703          */
2704         if (dotdot) {
2705 
2706                 /*
2707                  * If dotdotting at the root of a filesystem, then
2708                  * need to traverse back to the mounted-on filesystem
2709                  * and do the dotdot lookup there.
2710                  */
2711                 if (cs->vp->v_flag & VROOT) {
2712 
2713                         /*
2714                          * If at the system root, then can
2715                          * go up no further.
2716                          */
2717                         if (VN_CMP(cs->vp, ZONE_ROOTVP()))
2718                                 return (puterrno4(ENOENT));
2719 
2720                         /*
2721                          * Traverse back to the mounted-on filesystem
2722                          */
2723                         cs->vp = untraverse(cs->vp);
2724 
2725                         /*
2726                          * Set the different_export flag so we remember
2727                          * to pick up a new exportinfo entry for
2728                          * this new filesystem.
2729                          */
2730                         different_export = 1;
2731                 } else {
2732 
2733                         /*
2734                          * If dotdotting above an export point then set
2735                          * the different_export to get new export info.
2736                          */
2737                         different_export = nfs_exported(cs->exi, cs->vp);
2738                 }
2739         }
2740 
2741         error = VOP_LOOKUP(cs->vp, nm, &vp, NULL, 0, NULL, cs->cr,
2742             NULL, NULL, NULL);
2743         if (error)
2744                 return (puterrno4(error));
2745 
2746         /*
2747          * If the vnode is in a pseudo filesystem, check whether it is visible.
2748          *
2749          * XXX if the vnode is a symlink and it is not visible in
2750          * a pseudo filesystem, return ENOENT (not following symlink).
2751          * V4 client can not mount such symlink. This is a regression
2752          * from V2/V3.
2753          *
2754          * In the same exported filesystem, if the security flavor used
2755          * is not an explicitly shared flavor, limit the view to the visible
2756          * list entries only. This is not a WRONGSEC case because it's already
2757          * checked via PUTROOTFH/PUTPUBFH or PUTFH.
2758          */
2759         if (!different_export &&
2760             (PSEUDO(cs->exi) || ! is_exported_sec(cs->nfsflavor, cs->exi) ||
2761             cs->access & CS_ACCESS_LIMITED)) {
2762                 if (! nfs_visible(cs->exi, vp, &different_export)) {
2763                         VN_RELE(vp);
2764                         return (puterrno4(ENOENT));
2765                 }
2766         }
2767 
2768         /*
2769          * If it's a mountpoint, then traverse it.
2770          */
2771         if (vn_ismntpt(vp)) {
2772                 pre_exi = cs->exi;   /* save pre-traversed exportinfo */
2773                 pre_tvp = vp;           /* save pre-traversed vnode     */
2774 
2775                 /*
2776                  * hold pre_tvp to counteract rele by traverse.  We will
2777                  * need pre_tvp below if checkexport4 fails
2778                  */
2779                 VN_HOLD(pre_tvp);
2780                 if ((error = traverse(&vp)) != 0) {
2781                         VN_RELE(vp);
2782                         VN_RELE(pre_tvp);
2783                         return (puterrno4(error));
2784                 }
2785                 different_export = 1;
2786         } else if (vp->v_vfsp != cs->vp->v_vfsp) {
2787                 /*
2788                  * The vfsp comparison is to handle the case where
2789                  * a LOFS mount is shared.  lo_lookup traverses mount points,
2790                  * and NFS is unaware of local fs transistions because
2791                  * v_vfsmountedhere isn't set.  For this special LOFS case,
2792                  * the dir and the obj returned by lookup will have different
2793                  * vfs ptrs.
2794                  */
2795                 different_export = 1;
2796         }
2797 
2798         if (different_export) {
2799 
2800                 bzero(&fid, sizeof (fid));
2801                 fid.fid_len = MAXFIDSZ;
2802                 error = vop_fid_pseudo(vp, &fid);
2803                 if (error) {
2804                         VN_RELE(vp);
2805                         if (pre_tvp)
2806                                 VN_RELE(pre_tvp);
2807                         return (puterrno4(error));
2808                 }
2809 
2810                 if (dotdot)
2811                         exi = nfs_vptoexi(NULL, vp, cs->cr, &walk, NULL, TRUE);
2812                 else
2813                         exi = checkexport4(&vp->v_vfsp->vfs_fsid, &fid, vp);
2814 
2815                 if (exi == NULL) {
2816                         if (pre_tvp) {
2817                                 /*
2818                                  * If this vnode is a mounted-on vnode,
2819                                  * but the mounted-on file system is not
2820                                  * exported, send back the filehandle for
2821                                  * the mounted-on vnode, not the root of
2822                                  * the mounted-on file system.
2823                                  */
2824                                 VN_RELE(vp);
2825                                 vp = pre_tvp;
2826                                 exi = pre_exi;
2827                         } else {
2828                                 VN_RELE(vp);
2829                                 return (puterrno4(EACCES));
2830                         }
2831                 } else if (pre_tvp) {
2832                         /* we're done with pre_tvp now. release extra hold */
2833                         VN_RELE(pre_tvp);
2834                 }
2835 
2836                 cs->exi = exi;
2837 
2838                 /*
2839                  * Now we do a checkauth4. The reason is that
2840                  * this client/user may not have access to the new
2841                  * exported file system, and if they do,
2842                  * the client/user may be mapped to a different uid.
2843                  *
2844                  * We start with a new cr, because the checkauth4 done
2845                  * in the PUT*FH operation over wrote the cred's uid,
2846                  * gid, etc, and we want the real thing before calling
2847                  * checkauth4()
2848                  */
2849                 crfree(cs->cr);
2850                 cs->cr = crdup(cs->basecr);
2851 
2852                 oldvp = cs->vp;
2853                 cs->vp = vp;
2854                 is_newvp = TRUE;
2855 
2856                 stat = call_checkauth4(cs, req);
2857                 if (stat != NFS4_OK) {
2858                         VN_RELE(cs->vp);
2859                         cs->vp = oldvp;
2860                         return (stat);
2861                 }
2862         }
2863 
2864         /*
2865          * After various NFS checks, do a label check on the path
2866          * component. The label on this path should either be the
2867          * global zone's label or a zone's label. We are only
2868          * interested in the zone's label because exported files
2869          * in global zone is accessible (though read-only) to
2870          * clients. The exportability/visibility check is already
2871          * done before reaching this code.
2872          */
2873         if (is_system_labeled()) {
2874                 bslabel_t *clabel;
2875 
2876                 ASSERT(req->rq_label != NULL);
2877                 clabel = req->rq_label;
2878                 DTRACE_PROBE2(tx__rfs4__log__info__oplookup__clabel, char *,
2879                     "got client label from request(1)", struct svc_req *, req);
2880 
2881                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2882                         if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
2883                             cs->exi)) {
2884                                 error = EACCES;
2885                                 goto err_out;
2886                         }
2887                 } else {
2888                         /*
2889                          * We grant access to admin_low label clients
2890                          * only if the client is trusted, i.e. also
2891                          * running Solaris Trusted Extension.
2892                          */
2893                         struct sockaddr *ca;
2894                         int             addr_type;
2895                         void            *ipaddr;
2896                         tsol_tpc_t      *tp;
2897 
2898                         ca = (struct sockaddr *)svc_getrpccaller(
2899                             req->rq_xprt)->buf;
2900                         if (ca->sa_family == AF_INET) {
2901                                 addr_type = IPV4_VERSION;
2902                                 ipaddr = &((struct sockaddr_in *)ca)->sin_addr;
2903                         } else if (ca->sa_family == AF_INET6) {
2904                                 addr_type = IPV6_VERSION;
2905                                 ipaddr = &((struct sockaddr_in6 *)
2906                                     ca)->sin6_addr;
2907                         }
2908                         tp = find_tpc(ipaddr, addr_type, B_FALSE);
2909                         if (tp == NULL || tp->tpc_tp.tp_doi !=
2910                             l_admin_low->tsl_doi || tp->tpc_tp.host_type !=
2911                             SUN_CIPSO) {
2912                                 if (tp != NULL)
2913                                         TPC_RELE(tp);
2914                                 error = EACCES;
2915                                 goto err_out;
2916                         }
2917                         TPC_RELE(tp);
2918                 }
2919         }
2920 
2921         error = makefh4(&cs->fh, vp, cs->exi);
2922 
2923 err_out:
2924         if (error) {
2925                 if (is_newvp) {
2926                         VN_RELE(cs->vp);
2927                         cs->vp = oldvp;
2928                 } else
2929                         VN_RELE(vp);
2930                 return (puterrno4(error));
2931         }
2932 
2933         if (!is_newvp) {
2934                 if (cs->vp)
2935                         VN_RELE(cs->vp);
2936                 cs->vp = vp;
2937         } else if (oldvp)
2938                 VN_RELE(oldvp);
2939 
2940         /*
2941          * if did lookup on attrdir and didn't lookup .., set named
2942          * attr fh flag
2943          */
2944         if (attrdir && ! dotdot)
2945                 set_fh4_flag(&cs->fh, FH4_NAMEDATTR);
2946 
2947         /* Assume false for now, open proc will set this */
2948         cs->mandlock = FALSE;
2949 
2950         return (NFS4_OK);
2951 }
2952 
2953 /* ARGSUSED */
2954 static void
2955 rfs4_op_lookup(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2956     struct compound_state *cs)
2957 {
2958         LOOKUP4args *args = &argop->nfs_argop4_u.oplookup;
2959         LOOKUP4res *resp = &resop->nfs_resop4_u.oplookup;
2960         char *nm;
2961         uint_t len;
2962         struct sockaddr *ca;
2963         char *name = NULL;
2964         nfsstat4 status;
2965 
2966         DTRACE_NFSV4_2(op__lookup__start, struct compound_state *, cs,
2967             LOOKUP4args *, args);
2968 
2969         if (cs->vp == NULL) {
2970                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2971                 goto out;
2972         }
2973 
2974         if (cs->vp->v_type == VLNK) {
2975                 *cs->statusp = resp->status = NFS4ERR_SYMLINK;
2976                 goto out;
2977         }
2978 
2979         if (cs->vp->v_type != VDIR) {
2980                 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
2981                 goto out;
2982         }
2983 
2984         status = utf8_dir_verify(&args->objname);
2985         if (status != NFS4_OK) {
2986                 *cs->statusp = resp->status = status;
2987                 goto out;
2988         }
2989 
2990         nm = utf8_to_str(&args->objname, &len, NULL);
2991         if (nm == NULL) {
2992                 *cs->statusp = resp->status = NFS4ERR_INVAL;
2993                 goto out;
2994         }
2995 
2996         if (len > MAXNAMELEN) {
2997                 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
2998                 kmem_free(nm, len);
2999                 goto out;
3000         }
3001 
3002         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
3003         name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
3004             MAXPATHLEN  + 1);
3005 
3006         if (name == NULL) {
3007                 *cs->statusp = resp->status = NFS4ERR_INVAL;
3008                 kmem_free(nm, len);
3009                 goto out;
3010         }
3011 
3012         *cs->statusp = resp->status = do_rfs4_op_lookup(name, req, cs);
3013 
3014         if (name != nm)
3015                 kmem_free(name, MAXPATHLEN + 1);
3016         kmem_free(nm, len);
3017 
3018 out:
3019         DTRACE_NFSV4_2(op__lookup__done, struct compound_state *, cs,
3020             LOOKUP4res *, resp);
3021 }
3022 
3023 /* ARGSUSED */
3024 static void
3025 rfs4_op_lookupp(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req,
3026     struct compound_state *cs)
3027 {
3028         LOOKUPP4res *resp = &resop->nfs_resop4_u.oplookupp;
3029 
3030         DTRACE_NFSV4_1(op__lookupp__start, struct compound_state *, cs);
3031 
3032         if (cs->vp == NULL) {
3033                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3034                 goto out;
3035         }
3036 
3037         if (cs->vp->v_type != VDIR) {
3038                 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
3039                 goto out;
3040         }
3041 
3042         *cs->statusp = resp->status = do_rfs4_op_lookup("..", req, cs);
3043 
3044         /*
3045          * From NFSV4 Specification, LOOKUPP should not check for
3046          * NFS4ERR_WRONGSEC. Retrun NFS4_OK instead.
3047          */
3048         if (resp->status == NFS4ERR_WRONGSEC) {
3049                 *cs->statusp = resp->status = NFS4_OK;
3050         }
3051 
3052 out:
3053         DTRACE_NFSV4_2(op__lookupp__done, struct compound_state *, cs,
3054             LOOKUPP4res *, resp);
3055 }
3056 
3057 
3058 /*ARGSUSED2*/
3059 static void
3060 rfs4_op_openattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3061     struct compound_state *cs)
3062 {
3063         OPENATTR4args   *args = &argop->nfs_argop4_u.opopenattr;
3064         OPENATTR4res    *resp = &resop->nfs_resop4_u.opopenattr;
3065         vnode_t         *avp = NULL;
3066         int             lookup_flags = LOOKUP_XATTR, error;
3067         int             exp_ro = 0;
3068 
3069         DTRACE_NFSV4_2(op__openattr__start, struct compound_state *, cs,
3070             OPENATTR4args *, args);
3071 
3072         if (cs->vp == NULL) {
3073                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3074                 goto out;
3075         }
3076 
3077         if ((cs->vp->v_vfsp->vfs_flag & VFS_XATTR) == 0 &&
3078             !vfs_has_feature(cs->vp->v_vfsp, VFSFT_SYSATTR_VIEWS)) {
3079                 *cs->statusp = resp->status = puterrno4(ENOTSUP);
3080                 goto out;
3081         }
3082 
3083         /*
3084          * If file system supports passing ACE mask to VOP_ACCESS then
3085          * check for ACE_READ_NAMED_ATTRS, otherwise do legacy checks
3086          */
3087 
3088         if (vfs_has_feature(cs->vp->v_vfsp, VFSFT_ACEMASKONACCESS))
3089                 error = VOP_ACCESS(cs->vp, ACE_READ_NAMED_ATTRS,
3090                     V_ACE_MASK, cs->cr, NULL);
3091         else
3092                 error = ((VOP_ACCESS(cs->vp, VREAD, 0, cs->cr, NULL) != 0) &&
3093                     (VOP_ACCESS(cs->vp, VWRITE, 0, cs->cr, NULL) != 0) &&
3094                     (VOP_ACCESS(cs->vp, VEXEC, 0, cs->cr, NULL) != 0));
3095 
3096         if (error) {
3097                 *cs->statusp = resp->status = puterrno4(EACCES);
3098                 goto out;
3099         }
3100 
3101         /*
3102          * The CREATE_XATTR_DIR VOP flag cannot be specified if
3103          * the file system is exported read-only -- regardless of
3104          * createdir flag.  Otherwise the attrdir would be created
3105          * (assuming server fs isn't mounted readonly locally).  If
3106          * VOP_LOOKUP returns ENOENT in this case, the error will
3107          * be translated into EROFS.  ENOSYS is mapped to ENOTSUP
3108          * because specfs has no VOP_LOOKUP op, so the macro would
3109          * return ENOSYS.  EINVAL is returned by all (current)
3110          * Solaris file system implementations when any of their
3111          * restrictions are violated (xattr(dir) can't have xattrdir).
3112          * Returning NOTSUPP is more appropriate in this case
3113          * because the object will never be able to have an attrdir.
3114          */
3115         if (args->createdir && ! (exp_ro = rdonly4(req, cs)))
3116                 lookup_flags |= CREATE_XATTR_DIR;
3117 
3118         error = VOP_LOOKUP(cs->vp, "", &avp, NULL, lookup_flags, NULL, cs->cr,
3119             NULL, NULL, NULL);
3120 
3121         if (error) {
3122                 if (error == ENOENT && args->createdir && exp_ro)
3123                         *cs->statusp = resp->status = puterrno4(EROFS);
3124                 else if (error == EINVAL || error == ENOSYS)
3125                         *cs->statusp = resp->status = puterrno4(ENOTSUP);
3126                 else
3127                         *cs->statusp = resp->status = puterrno4(error);
3128                 goto out;
3129         }
3130 
3131         ASSERT(avp->v_flag & V_XATTRDIR);
3132 
3133         error = makefh4(&cs->fh, avp, cs->exi);
3134 
3135         if (error) {
3136                 VN_RELE(avp);
3137                 *cs->statusp = resp->status = puterrno4(error);
3138                 goto out;
3139         }
3140 
3141         VN_RELE(cs->vp);
3142         cs->vp = avp;
3143 
3144         /*
3145          * There is no requirement for an attrdir fh flag
3146          * because the attrdir has a vnode flag to distinguish
3147          * it from regular (non-xattr) directories.  The
3148          * FH4_ATTRDIR flag is set for future sanity checks.
3149          */
3150         set_fh4_flag(&cs->fh, FH4_ATTRDIR);
3151         *cs->statusp = resp->status = NFS4_OK;
3152 
3153 out:
3154         DTRACE_NFSV4_2(op__openattr__done, struct compound_state *, cs,
3155             OPENATTR4res *, resp);
3156 }
3157 
3158 static int
3159 do_io(int direction, vnode_t *vp, struct uio *uio, int ioflag, cred_t *cred,
3160     caller_context_t *ct)
3161 {
3162         int error;
3163         int i;
3164         clock_t delaytime;
3165 
3166         delaytime = MSEC_TO_TICK_ROUNDUP(rfs4_lock_delay);
3167 
3168         /*
3169          * Don't block on mandatory locks. If this routine returns
3170          * EAGAIN, the caller should return NFS4ERR_LOCKED.
3171          */
3172         uio->uio_fmode = FNONBLOCK;
3173 
3174         for (i = 0; i < rfs4_maxlock_tries; i++) {
3175 
3176 
3177                 if (direction == FREAD) {
3178                         (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, ct);
3179                         error = VOP_READ(vp, uio, ioflag, cred, ct);
3180                         VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, ct);
3181                 } else {
3182                         (void) VOP_RWLOCK(vp, V_WRITELOCK_TRUE, ct);
3183                         error = VOP_WRITE(vp, uio, ioflag, cred, ct);
3184                         VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, ct);
3185                 }
3186 
3187                 if (error != EAGAIN)
3188                         break;
3189 
3190                 if (i < rfs4_maxlock_tries - 1) {
3191                         delay(delaytime);
3192                         delaytime *= 2;
3193                 }
3194         }
3195 
3196         return (error);
3197 }
3198 
3199 /* ARGSUSED */
3200 static void
3201 rfs4_op_read(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3202     struct compound_state *cs)
3203 {
3204         READ4args *args = &argop->nfs_argop4_u.opread;
3205         READ4res *resp = &resop->nfs_resop4_u.opread;
3206         int error;
3207         int verror;
3208         vnode_t *vp;
3209         struct vattr va;
3210         struct iovec iov, *iovp = NULL;
3211         int iovcnt;
3212         struct uio uio;
3213         u_offset_t offset;
3214         bool_t *deleg = &cs->deleg;
3215         nfsstat4 stat;
3216         int in_crit = 0;
3217         mblk_t *mp = NULL;
3218         int alloc_err = 0;
3219         int rdma_used = 0;
3220         int loaned_buffers;
3221         caller_context_t ct;
3222         struct uio *uiop;
3223 
3224         DTRACE_NFSV4_2(op__read__start, struct compound_state *, cs,
3225             READ4args, args);
3226 
3227         vp = cs->vp;
3228         if (vp == NULL) {
3229                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3230                 goto out;
3231         }
3232         if (cs->access == CS_ACCESS_DENIED) {
3233                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
3234                 goto out;
3235         }
3236 
3237         if ((stat = rfs4_check_stateid(FREAD, vp, &args->stateid, FALSE,
3238             deleg, TRUE, &ct)) != NFS4_OK) {
3239                 *cs->statusp = resp->status = stat;
3240                 goto out;
3241         }
3242 
3243         /*
3244          * Enter the critical region before calling VOP_RWLOCK
3245          * to avoid a deadlock with write requests.
3246          */
3247         if (nbl_need_check(vp)) {
3248                 nbl_start_crit(vp, RW_READER);
3249                 in_crit = 1;
3250                 if (nbl_conflict(vp, NBL_READ, args->offset, args->count, 0,
3251                     &ct)) {
3252                         *cs->statusp = resp->status = NFS4ERR_LOCKED;
3253                         goto out;
3254                 }
3255         }
3256 
3257         if (args->wlist) {
3258                 if (args->count > clist_len(args->wlist)) {
3259                         *cs->statusp = resp->status = NFS4ERR_INVAL;
3260                         goto out;
3261                 }
3262                 rdma_used = 1;
3263         }
3264 
3265         /* use loaned buffers for TCP */
3266         loaned_buffers = (nfs_loaned_buffers && !rdma_used) ? 1 : 0;
3267 
3268         va.va_mask = AT_MODE|AT_SIZE|AT_UID;
3269         verror = VOP_GETATTR(vp, &va, 0, cs->cr, &ct);
3270 
3271         /*
3272          * If we can't get the attributes, then we can't do the
3273          * right access checking.  So, we'll fail the request.
3274          */
3275         if (verror) {
3276                 *cs->statusp = resp->status = puterrno4(verror);
3277                 goto out;
3278         }
3279 
3280         if (vp->v_type != VREG) {
3281                 *cs->statusp = resp->status =
3282                     ((vp->v_type == VDIR) ? NFS4ERR_ISDIR : NFS4ERR_INVAL);
3283                 goto out;
3284         }
3285 
3286         if (crgetuid(cs->cr) != va.va_uid &&
3287             (error = VOP_ACCESS(vp, VREAD, 0, cs->cr, &ct)) &&
3288             (error = VOP_ACCESS(vp, VEXEC, 0, cs->cr, &ct))) {
3289                 *cs->statusp = resp->status = puterrno4(error);
3290                 goto out;
3291         }
3292 
3293         if (MANDLOCK(vp, va.va_mode)) { /* XXX - V4 supports mand locking */
3294                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
3295                 goto out;
3296         }
3297 
3298         offset = args->offset;
3299         if (offset >= va.va_size) {
3300                 *cs->statusp = resp->status = NFS4_OK;
3301                 resp->eof = TRUE;
3302                 resp->data_len = 0;
3303                 resp->data_val = NULL;
3304                 resp->mblk = NULL;
3305                 /* RDMA */
3306                 resp->wlist = args->wlist;
3307                 resp->wlist_len = resp->data_len;
3308                 *cs->statusp = resp->status = NFS4_OK;
3309                 if (resp->wlist)
3310                         clist_zero_len(resp->wlist);
3311                 goto out;
3312         }
3313 
3314         if (args->count == 0) {
3315                 *cs->statusp = resp->status = NFS4_OK;
3316                 resp->eof = FALSE;
3317                 resp->data_len = 0;
3318                 resp->data_val = NULL;
3319                 resp->mblk = NULL;
3320                 /* RDMA */
3321                 resp->wlist = args->wlist;
3322                 resp->wlist_len = resp->data_len;
3323                 if (resp->wlist)
3324                         clist_zero_len(resp->wlist);
3325                 goto out;
3326         }
3327 
3328         /*
3329          * Do not allocate memory more than maximum allowed
3330          * transfer size
3331          */
3332         if (args->count > rfs4_tsize(req))
3333                 args->count = rfs4_tsize(req);
3334 
3335         if (loaned_buffers) {
3336                 uiop = (uio_t *)rfs_setup_xuio(vp);
3337                 ASSERT(uiop != NULL);
3338                 uiop->uio_segflg = UIO_SYSSPACE;
3339                 uiop->uio_loffset = args->offset;
3340                 uiop->uio_resid = args->count;
3341 
3342                 /* Jump to do the read if successful */
3343                 if (!VOP_REQZCBUF(vp, UIO_READ, (xuio_t *)uiop, cs->cr, &ct)) {
3344                         /*
3345                          * Need to hold the vnode until after VOP_RETZCBUF()
3346                          * is called.
3347                          */
3348                         VN_HOLD(vp);
3349                         goto doio_read;
3350                 }
3351 
3352                 DTRACE_PROBE2(nfss__i__reqzcbuf_failed, int,
3353                     uiop->uio_loffset, int, uiop->uio_resid);
3354 
3355                 uiop->uio_extflg = 0;
3356 
3357                 /* failure to setup for zero copy */
3358                 rfs_free_xuio((void *)uiop);
3359                 loaned_buffers = 0;
3360         }
3361 
3362         /*
3363          * If returning data via RDMA Write, then grab the chunk list. If we
3364          * aren't returning READ data w/RDMA_WRITE, then grab a mblk.
3365          */
3366         if (rdma_used) {
3367                 mp = NULL;
3368                 (void) rdma_get_wchunk(req, &iov, args->wlist);
3369                 uio.uio_iov = &iov;
3370                 uio.uio_iovcnt = 1;
3371         } else {
3372                 /*
3373                  * mp will contain the data to be sent out in the read reply.
3374                  * It will be freed after the reply has been sent.
3375                  */
3376                 mp = rfs_read_alloc(args->count, &iovp, &iovcnt);
3377                 ASSERT(mp != NULL);
3378                 ASSERT(alloc_err == 0);
3379                 uio.uio_iov = iovp;
3380                 uio.uio_iovcnt = iovcnt;
3381         }
3382 
3383         uio.uio_segflg = UIO_SYSSPACE;
3384         uio.uio_extflg = UIO_COPY_CACHED;
3385         uio.uio_loffset = args->offset;
3386         uio.uio_resid = args->count;
3387         uiop = &uio;
3388 
3389 doio_read:
3390         error = do_io(FREAD, vp, uiop, 0, cs->cr, &ct);
3391 
3392         va.va_mask = AT_SIZE;
3393         verror = VOP_GETATTR(vp, &va, 0, cs->cr, &ct);
3394 
3395         if (error) {
3396                 if (mp)
3397                         freemsg(mp);
3398                 *cs->statusp = resp->status = puterrno4(error);
3399                 goto out;
3400         }
3401 
3402         /* make mblk using zc buffers */
3403         if (loaned_buffers) {
3404                 mp = uio_to_mblk(uiop);
3405                 ASSERT(mp != NULL);
3406         }
3407 
3408         *cs->statusp = resp->status = NFS4_OK;
3409 
3410         ASSERT(uiop->uio_resid >= 0);
3411         resp->data_len = args->count - uiop->uio_resid;
3412         if (mp) {
3413                 resp->data_val = (char *)mp->b_datap->db_base;
3414                 rfs_rndup_mblks(mp, resp->data_len, loaned_buffers);
3415         } else {
3416                 resp->data_val = (caddr_t)iov.iov_base;
3417         }
3418 
3419         resp->mblk = mp;
3420 
3421         if (!verror && offset + resp->data_len == va.va_size)
3422                 resp->eof = TRUE;
3423         else
3424                 resp->eof = FALSE;
3425 
3426         if (rdma_used) {
3427                 if (!rdma_setup_read_data4(args, resp)) {
3428                         *cs->statusp = resp->status = NFS4ERR_INVAL;
3429                 }
3430         } else {
3431                 resp->wlist = NULL;
3432         }
3433 
3434 out:
3435         if (in_crit)
3436                 nbl_end_crit(vp);
3437 
3438         if (iovp != NULL)
3439                 kmem_free(iovp, iovcnt * sizeof (struct iovec));
3440 
3441         DTRACE_NFSV4_2(op__read__done, struct compound_state *, cs,
3442             READ4res *, resp);
3443 }
3444 
3445 static void
3446 rfs4_op_read_free(nfs_resop4 *resop)
3447 {
3448         READ4res        *resp = &resop->nfs_resop4_u.opread;
3449 
3450         if (resp->status == NFS4_OK && resp->mblk != NULL) {
3451                 freemsg(resp->mblk);
3452                 resp->mblk = NULL;
3453                 resp->data_val = NULL;
3454                 resp->data_len = 0;
3455         }
3456 }
3457 
3458 static void
3459 rfs4_op_readdir_free(nfs_resop4 * resop)
3460 {
3461         READDIR4res    *resp = &resop->nfs_resop4_u.opreaddir;
3462 
3463         if (resp->status == NFS4_OK && resp->mblk != NULL) {
3464                 freeb(resp->mblk);
3465                 resp->mblk = NULL;
3466                 resp->data_len = 0;
3467         }
3468 }
3469 
3470 
3471 /* ARGSUSED */
3472 static void
3473 rfs4_op_putpubfh(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req,
3474     struct compound_state *cs)
3475 {
3476         PUTPUBFH4res    *resp = &resop->nfs_resop4_u.opputpubfh;
3477         int             error;
3478         vnode_t         *vp;
3479         struct exportinfo *exi, *sav_exi;
3480         nfs_fh4_fmt_t   *fh_fmtp;
3481         nfs_export_t *ne = nfs_get_export();
3482 
3483         DTRACE_NFSV4_1(op__putpubfh__start, struct compound_state *, cs);
3484 
3485         if (cs->vp) {
3486                 VN_RELE(cs->vp);
3487                 cs->vp = NULL;
3488         }
3489 
3490         if (cs->cr)
3491                 crfree(cs->cr);
3492 
3493         cs->cr = crdup(cs->basecr);
3494 
3495         vp = ne->exi_public->exi_vp;
3496         if (vp == NULL) {
3497                 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
3498                 goto out;
3499         }
3500 
3501         error = makefh4(&cs->fh, vp, ne->exi_public);
3502         if (error != 0) {
3503                 *cs->statusp = resp->status = puterrno4(error);
3504                 goto out;
3505         }
3506         sav_exi = cs->exi;
3507         if (ne->exi_public == ne->exi_root) {
3508                 /*
3509                  * No filesystem is actually shared public, so we default
3510                  * to exi_root. In this case, we must check whether root
3511                  * is exported.
3512                  */
3513                 fh_fmtp = (nfs_fh4_fmt_t *)cs->fh.nfs_fh4_val;
3514 
3515                 /*
3516                  * if root filesystem is exported, the exportinfo struct that we
3517                  * should use is what checkexport4 returns, because root_exi is
3518                  * actually a mostly empty struct.
3519                  */
3520                 exi = checkexport4(&fh_fmtp->fh4_fsid,
3521                     (fid_t *)&fh_fmtp->fh4_xlen, NULL);
3522                 cs->exi = ((exi != NULL) ? exi : ne->exi_public);
3523         } else {
3524                 /*
3525                  * it's a properly shared filesystem
3526                  */
3527                 cs->exi = ne->exi_public;
3528         }
3529 
3530         if (is_system_labeled()) {
3531                 bslabel_t *clabel;
3532 
3533                 ASSERT(req->rq_label != NULL);
3534                 clabel = req->rq_label;
3535                 DTRACE_PROBE2(tx__rfs4__log__info__opputpubfh__clabel, char *,
3536                     "got client label from request(1)",
3537                     struct svc_req *, req);
3538                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3539                         if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3540                             cs->exi)) {
3541                                 *cs->statusp = resp->status =
3542                                     NFS4ERR_SERVERFAULT;
3543                                 goto out;
3544                         }
3545                 }
3546         }
3547 
3548         VN_HOLD(vp);
3549         cs->vp = vp;
3550 
3551         if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) {
3552                 VN_RELE(cs->vp);
3553                 cs->vp = NULL;
3554                 cs->exi = sav_exi;
3555                 goto out;
3556         }
3557 
3558         *cs->statusp = resp->status = NFS4_OK;
3559 out:
3560         DTRACE_NFSV4_2(op__putpubfh__done, struct compound_state *, cs,
3561             PUTPUBFH4res *, resp);
3562 }
3563 
3564 /*
3565  * XXX - issue with put*fh operations. Suppose /export/home is exported.
3566  * Suppose an NFS client goes to mount /export/home/joe. If /export, home,
3567  * or joe have restrictive search permissions, then we shouldn't let
3568  * the client get a file handle. This is easy to enforce. However, we
3569  * don't know what security flavor should be used until we resolve the
3570  * path name. Another complication is uid mapping. If root is
3571  * the user, then it will be mapped to the anonymous user by default,
3572  * but we won't know that till we've resolved the path name. And we won't
3573  * know what the anonymous user is.
3574  * Luckily, SECINFO is specified to take a full filename.
3575  * So what we will have to in rfs4_op_lookup is check that flavor of
3576  * the target object matches that of the request, and if root was the
3577  * caller, check for the root= and anon= options, and if necessary,
3578  * repeat the lookup using the right cred_t. But that's not done yet.
3579  */
3580 /* ARGSUSED */
3581 static void
3582 rfs4_op_putfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3583     struct compound_state *cs)
3584 {
3585         PUTFH4args *args = &argop->nfs_argop4_u.opputfh;
3586         PUTFH4res *resp = &resop->nfs_resop4_u.opputfh;
3587         nfs_fh4_fmt_t *fh_fmtp;
3588 
3589         DTRACE_NFSV4_2(op__putfh__start, struct compound_state *, cs,
3590             PUTFH4args *, args);
3591 
3592         if (cs->vp) {
3593                 VN_RELE(cs->vp);
3594                 cs->vp = NULL;
3595         }
3596 
3597         if (cs->cr) {
3598                 crfree(cs->cr);
3599                 cs->cr = NULL;
3600         }
3601 
3602 
3603         if (args->object.nfs_fh4_len < NFS_FH4_LEN) {
3604                 *cs->statusp = resp->status = NFS4ERR_BADHANDLE;
3605                 goto out;
3606         }
3607 
3608         fh_fmtp = (nfs_fh4_fmt_t *)args->object.nfs_fh4_val;
3609         cs->exi = checkexport4(&fh_fmtp->fh4_fsid, (fid_t *)&fh_fmtp->fh4_xlen,
3610             NULL);
3611 
3612         if (cs->exi == NULL) {
3613                 *cs->statusp = resp->status = NFS4ERR_STALE;
3614                 goto out;
3615         }
3616 
3617         cs->cr = crdup(cs->basecr);
3618 
3619         ASSERT(cs->cr != NULL);
3620 
3621         if (! (cs->vp = nfs4_fhtovp(&args->object, cs->exi, &resp->status))) {
3622                 *cs->statusp = resp->status;
3623                 goto out;
3624         }
3625 
3626         if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) {
3627                 VN_RELE(cs->vp);
3628                 cs->vp = NULL;
3629                 goto out;
3630         }
3631 
3632         nfs_fh4_copy(&args->object, &cs->fh);
3633         *cs->statusp = resp->status = NFS4_OK;
3634         cs->deleg = FALSE;
3635 
3636 out:
3637         DTRACE_NFSV4_2(op__putfh__done, struct compound_state *, cs,
3638             PUTFH4res *, resp);
3639 }
3640 
3641 /* ARGSUSED */
3642 static void
3643 rfs4_op_putrootfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3644     struct compound_state *cs)
3645 {
3646         PUTROOTFH4res *resp = &resop->nfs_resop4_u.opputrootfh;
3647         int error;
3648         fid_t fid;
3649         struct exportinfo *exi, *sav_exi;
3650 
3651         DTRACE_NFSV4_1(op__putrootfh__start, struct compound_state *, cs);
3652 
3653         if (cs->vp) {
3654                 VN_RELE(cs->vp);
3655                 cs->vp = NULL;
3656         }
3657 
3658         if (cs->cr)
3659                 crfree(cs->cr);
3660 
3661         cs->cr = crdup(cs->basecr);
3662 
3663         /*
3664          * Using rootdir, the system root vnode,
3665          * get its fid.
3666          */
3667         bzero(&fid, sizeof (fid));
3668         fid.fid_len = MAXFIDSZ;
3669         error = vop_fid_pseudo(ZONE_ROOTVP(), &fid);
3670         if (error != 0) {
3671                 *cs->statusp = resp->status = puterrno4(error);
3672                 goto out;
3673         }
3674 
3675         /*
3676          * Then use the root fsid & fid it to find out if it's exported
3677          *
3678          * If the server root isn't exported directly, then
3679          * it should at least be a pseudo export based on
3680          * one or more exports further down in the server's
3681          * file tree.
3682          */
3683         exi = checkexport4(&ZONE_ROOTVP()->v_vfsp->vfs_fsid, &fid, NULL);
3684         if (exi == NULL || exi->exi_export.ex_flags & EX_PUBLIC) {
3685                 NFS4_DEBUG(rfs4_debug,
3686                     (CE_WARN, "rfs4_op_putrootfh: export check failure"));
3687                 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
3688                 goto out;
3689         }
3690 
3691         /*
3692          * Now make a filehandle based on the root
3693          * export and root vnode.
3694          */
3695         error = makefh4(&cs->fh, ZONE_ROOTVP(), exi);
3696         if (error != 0) {
3697                 *cs->statusp = resp->status = puterrno4(error);
3698                 goto out;
3699         }
3700 
3701         sav_exi = cs->exi;
3702         cs->exi = exi;
3703 
3704         VN_HOLD(ZONE_ROOTVP());
3705         cs->vp = ZONE_ROOTVP();
3706 
3707         if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) {
3708                 VN_RELE(cs->vp);
3709                 cs->vp = NULL;
3710                 cs->exi = sav_exi;
3711                 goto out;
3712         }
3713 
3714         *cs->statusp = resp->status = NFS4_OK;
3715         cs->deleg = FALSE;
3716 out:
3717         DTRACE_NFSV4_2(op__putrootfh__done, struct compound_state *, cs,
3718             PUTROOTFH4res *, resp);
3719 }
3720 
3721 /*
3722  * readlink: args: CURRENT_FH.
3723  *      res: status. If success - CURRENT_FH unchanged, return linktext.
3724  */
3725 
3726 /* ARGSUSED */
3727 static void
3728 rfs4_op_readlink(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3729     struct compound_state *cs)
3730 {
3731         READLINK4res *resp = &resop->nfs_resop4_u.opreadlink;
3732         int error;
3733         vnode_t *vp;
3734         struct iovec iov;
3735         struct vattr va;
3736         struct uio uio;
3737         char *data;
3738         struct sockaddr *ca;
3739         char *name = NULL;
3740         int is_referral;
3741 
3742         DTRACE_NFSV4_1(op__readlink__start, struct compound_state *, cs);
3743 
3744         /* CURRENT_FH: directory */
3745         vp = cs->vp;
3746         if (vp == NULL) {
3747                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3748                 goto out;
3749         }
3750 
3751         if (cs->access == CS_ACCESS_DENIED) {
3752                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
3753                 goto out;
3754         }
3755 
3756         /* Is it a referral? */
3757         if (vn_is_nfs_reparse(vp, cs->cr) && client_is_downrev(req)) {
3758 
3759                 is_referral = 1;
3760 
3761         } else {
3762 
3763                 is_referral = 0;
3764 
3765                 if (vp->v_type == VDIR) {
3766                         *cs->statusp = resp->status = NFS4ERR_ISDIR;
3767                         goto out;
3768                 }
3769 
3770                 if (vp->v_type != VLNK) {
3771                         *cs->statusp = resp->status = NFS4ERR_INVAL;
3772                         goto out;
3773                 }
3774 
3775         }
3776 
3777         va.va_mask = AT_MODE;
3778         error = VOP_GETATTR(vp, &va, 0, cs->cr, NULL);
3779         if (error) {
3780                 *cs->statusp = resp->status = puterrno4(error);
3781                 goto out;
3782         }
3783 
3784         if (MANDLOCK(vp, va.va_mode)) {
3785                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
3786                 goto out;
3787         }
3788 
3789         data = kmem_alloc(MAXPATHLEN + 1, KM_SLEEP);
3790 
3791         if (is_referral) {
3792                 char *s;
3793                 size_t strsz;
3794 
3795                 /* Get an artificial symlink based on a referral */
3796                 s = build_symlink(vp, cs->cr, &strsz);
3797                 global_svstat_ptr[4][NFS_REFERLINKS].value.ui64++;
3798                 DTRACE_PROBE2(nfs4serv__func__referral__reflink,
3799                     vnode_t *, vp, char *, s);
3800                 if (s == NULL)
3801                         error = EINVAL;
3802                 else {
3803                         error = 0;
3804                         (void) strlcpy(data, s, MAXPATHLEN + 1);
3805                         kmem_free(s, strsz);
3806                 }
3807 
3808         } else {
3809 
3810                 iov.iov_base = data;
3811                 iov.iov_len = MAXPATHLEN;
3812                 uio.uio_iov = &iov;
3813                 uio.uio_iovcnt = 1;
3814                 uio.uio_segflg = UIO_SYSSPACE;
3815                 uio.uio_extflg = UIO_COPY_CACHED;
3816                 uio.uio_loffset = 0;
3817                 uio.uio_resid = MAXPATHLEN;
3818 
3819                 error = VOP_READLINK(vp, &uio, cs->cr, NULL);
3820 
3821                 if (!error)
3822                         *(data + MAXPATHLEN - uio.uio_resid) = '\0';
3823         }
3824 
3825         if (error) {
3826                 kmem_free((caddr_t)data, (uint_t)MAXPATHLEN + 1);
3827                 *cs->statusp = resp->status = puterrno4(error);
3828                 goto out;
3829         }
3830 
3831         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
3832         name = nfscmd_convname(ca, cs->exi, data, NFSCMD_CONV_OUTBOUND,
3833             MAXPATHLEN  + 1);
3834 
3835         if (name == NULL) {
3836                 /*
3837                  * Even though the conversion failed, we return
3838                  * something. We just don't translate it.
3839                  */
3840                 name = data;
3841         }
3842 
3843         /*
3844          * treat link name as data
3845          */
3846         (void) str_to_utf8(name, (utf8string *)&resp->link);
3847 
3848         if (name != data)
3849                 kmem_free(name, MAXPATHLEN + 1);
3850         kmem_free((caddr_t)data, (uint_t)MAXPATHLEN + 1);
3851         *cs->statusp = resp->status = NFS4_OK;
3852 
3853 out:
3854         DTRACE_NFSV4_2(op__readlink__done, struct compound_state *, cs,
3855             READLINK4res *, resp);
3856 }
3857 
3858 static void
3859 rfs4_op_readlink_free(nfs_resop4 *resop)
3860 {
3861         READLINK4res *resp = &resop->nfs_resop4_u.opreadlink;
3862         utf8string *symlink = (utf8string *)&resp->link;
3863 
3864         if (symlink->utf8string_val) {
3865                 UTF8STRING_FREE(*symlink)
3866         }
3867 }
3868 
3869 /*
3870  * release_lockowner:
3871  *      Release any state associated with the supplied
3872  *      lockowner. Note if any lo_state is holding locks we will not
3873  *      rele that lo_state and thus the lockowner will not be destroyed.
3874  *      A client using lock after the lock owner stateid has been released
3875  *      will suffer the consequence of NFS4ERR_BAD_STATEID and would have
3876  *      to reissue the lock with new_lock_owner set to TRUE.
3877  *      args: lock_owner
3878  *      res:  status
3879  */
3880 /* ARGSUSED */
3881 static void
3882 rfs4_op_release_lockowner(nfs_argop4 *argop, nfs_resop4 *resop,
3883     struct svc_req *req, struct compound_state *cs)
3884 {
3885         RELEASE_LOCKOWNER4args *ap = &argop->nfs_argop4_u.oprelease_lockowner;
3886         RELEASE_LOCKOWNER4res *resp = &resop->nfs_resop4_u.oprelease_lockowner;
3887         rfs4_lockowner_t *lo;
3888         rfs4_openowner_t *oo;
3889         rfs4_state_t *sp;
3890         rfs4_lo_state_t *lsp;
3891         rfs4_client_t *cp;
3892         bool_t create = FALSE;
3893         locklist_t *llist;
3894         sysid_t sysid;
3895 
3896         DTRACE_NFSV4_2(op__release__lockowner__start, struct compound_state *,
3897             cs, RELEASE_LOCKOWNER4args *, ap);
3898 
3899         /* Make sure there is a clientid around for this request */
3900         cp = rfs4_findclient_by_id(ap->lock_owner.clientid, FALSE);
3901 
3902         if (cp == NULL) {
3903                 *cs->statusp = resp->status =
3904                     rfs4_check_clientid(&ap->lock_owner.clientid, 0);
3905                 goto out;
3906         }
3907         rfs4_client_rele(cp);
3908 
3909         lo = rfs4_findlockowner(&ap->lock_owner, &create);
3910         if (lo == NULL) {
3911                 *cs->statusp = resp->status = NFS4_OK;
3912                 goto out;
3913         }
3914         ASSERT(lo->rl_client != NULL);
3915 
3916         /*
3917          * Check for EXPIRED client. If so will reap state with in a lease
3918          * period or on next set_clientid_confirm step
3919          */
3920         if (rfs4_lease_expired(lo->rl_client)) {
3921                 rfs4_lockowner_rele(lo);
3922                 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
3923                 goto out;
3924         }
3925 
3926         /*
3927          * If no sysid has been assigned, then no locks exist; just return.
3928          */
3929         rfs4_dbe_lock(lo->rl_client->rc_dbe);
3930         if (lo->rl_client->rc_sysidt == LM_NOSYSID) {
3931                 rfs4_lockowner_rele(lo);
3932                 rfs4_dbe_unlock(lo->rl_client->rc_dbe);
3933                 goto out;
3934         }
3935 
3936         sysid = lo->rl_client->rc_sysidt;
3937         rfs4_dbe_unlock(lo->rl_client->rc_dbe);
3938 
3939         /*
3940          * Mark the lockowner invalid.
3941          */
3942         rfs4_dbe_hide(lo->rl_dbe);
3943 
3944         /*
3945          * sysid-pid pair should now not be used since the lockowner is
3946          * invalid. If the client were to instantiate the lockowner again
3947          * it would be assigned a new pid. Thus we can get the list of
3948          * current locks.
3949          */
3950 
3951         llist = flk_get_active_locks(sysid, lo->rl_pid);
3952         /* If we are still holding locks fail */
3953         if (llist != NULL) {
3954 
3955                 *cs->statusp = resp->status = NFS4ERR_LOCKS_HELD;
3956 
3957                 flk_free_locklist(llist);
3958                 /*
3959                  * We need to unhide the lockowner so the client can
3960                  * try it again. The bad thing here is if the client
3961                  * has a logic error that took it here in the first place
3962                  * they probably have lost accounting of the locks that it
3963                  * is holding. So we may have dangling state until the
3964                  * open owner state is reaped via close. One scenario
3965                  * that could possibly occur is that the client has
3966                  * sent the unlock request(s) in separate threads
3967                  * and has not waited for the replies before sending the
3968                  * RELEASE_LOCKOWNER request. Presumably, it would expect
3969                  * and deal appropriately with NFS4ERR_LOCKS_HELD, by
3970                  * reissuing the request.
3971                  */
3972                 rfs4_dbe_unhide(lo->rl_dbe);
3973                 rfs4_lockowner_rele(lo);
3974                 goto out;
3975         }
3976 
3977         /*
3978          * For the corresponding client we need to check each open
3979          * owner for any opens that have lockowner state associated
3980          * with this lockowner.
3981          */
3982 
3983         rfs4_dbe_lock(lo->rl_client->rc_dbe);
3984         for (oo = list_head(&lo->rl_client->rc_openownerlist); oo != NULL;
3985             oo = list_next(&lo->rl_client->rc_openownerlist, oo)) {
3986 
3987                 rfs4_dbe_lock(oo->ro_dbe);
3988                 for (sp = list_head(&oo->ro_statelist); sp != NULL;
3989                     sp = list_next(&oo->ro_statelist, sp)) {
3990 
3991                         rfs4_dbe_lock(sp->rs_dbe);
3992                         for (lsp = list_head(&sp->rs_lostatelist);
3993                             lsp != NULL;
3994                             lsp = list_next(&sp->rs_lostatelist, lsp)) {
3995                                 if (lsp->rls_locker == lo) {
3996                                         rfs4_dbe_lock(lsp->rls_dbe);
3997                                         rfs4_dbe_invalidate(lsp->rls_dbe);
3998                                         rfs4_dbe_unlock(lsp->rls_dbe);
3999                                 }
4000                         }
4001                         rfs4_dbe_unlock(sp->rs_dbe);
4002                 }
4003                 rfs4_dbe_unlock(oo->ro_dbe);
4004         }
4005         rfs4_dbe_unlock(lo->rl_client->rc_dbe);
4006 
4007         rfs4_lockowner_rele(lo);
4008 
4009         *cs->statusp = resp->status = NFS4_OK;
4010 
4011 out:
4012         DTRACE_NFSV4_2(op__release__lockowner__done, struct compound_state *,
4013             cs, RELEASE_LOCKOWNER4res *, resp);
4014 }
4015 
4016 /*
4017  * short utility function to lookup a file and recall the delegation
4018  */
4019 static rfs4_file_t *
4020 rfs4_lookup_and_findfile(vnode_t *dvp, char *nm, vnode_t **vpp,
4021     int *lkup_error, cred_t *cr)
4022 {
4023         vnode_t *vp;
4024         rfs4_file_t *fp = NULL;
4025         bool_t fcreate = FALSE;
4026         int error;
4027 
4028         if (vpp)
4029                 *vpp = NULL;
4030 
4031         if ((error = VOP_LOOKUP(dvp, nm, &vp, NULL, 0, NULL, cr, NULL, NULL,
4032             NULL)) == 0) {
4033                 if (vp->v_type == VREG)
4034                         fp = rfs4_findfile(vp, NULL, &fcreate);
4035                 if (vpp)
4036                         *vpp = vp;
4037                 else
4038                         VN_RELE(vp);
4039         }
4040 
4041         if (lkup_error)
4042                 *lkup_error = error;
4043 
4044         return (fp);
4045 }
4046 
4047 /*
4048  * remove: args: CURRENT_FH: directory; name.
4049  *      res: status. If success - CURRENT_FH unchanged, return change_info
4050  *              for directory.
4051  */
4052 /* ARGSUSED */
4053 static void
4054 rfs4_op_remove(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4055     struct compound_state *cs)
4056 {
4057         REMOVE4args *args = &argop->nfs_argop4_u.opremove;
4058         REMOVE4res *resp = &resop->nfs_resop4_u.opremove;
4059         int error;
4060         vnode_t *dvp, *vp;
4061         struct vattr bdva, idva, adva;
4062         char *nm;
4063         uint_t len;
4064         rfs4_file_t *fp;
4065         int in_crit = 0;
4066         bslabel_t *clabel;
4067         struct sockaddr *ca;
4068         char *name = NULL;
4069         nfsstat4 status;
4070 
4071         DTRACE_NFSV4_2(op__remove__start, struct compound_state *, cs,
4072             REMOVE4args *, args);
4073 
4074         /* CURRENT_FH: directory */
4075         dvp = cs->vp;
4076         if (dvp == NULL) {
4077                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4078                 goto out;
4079         }
4080 
4081         if (cs->access == CS_ACCESS_DENIED) {
4082                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4083                 goto out;
4084         }
4085 
4086         /*
4087          * If there is an unshared filesystem mounted on this vnode,
4088          * Do not allow to remove anything in this directory.
4089          */
4090         if (vn_ismntpt(dvp)) {
4091                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4092                 goto out;
4093         }
4094 
4095         if (dvp->v_type != VDIR) {
4096                 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
4097                 goto out;
4098         }
4099 
4100         status = utf8_dir_verify(&args->target);
4101         if (status != NFS4_OK) {
4102                 *cs->statusp = resp->status = status;
4103                 goto out;
4104         }
4105 
4106         /*
4107          * Lookup the file so that we can check if it's a directory
4108          */
4109         nm = utf8_to_fn(&args->target, &len, NULL);
4110         if (nm == NULL) {
4111                 *cs->statusp = resp->status = NFS4ERR_INVAL;
4112                 goto out;
4113         }
4114 
4115         if (len > MAXNAMELEN) {
4116                 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
4117                 kmem_free(nm, len);
4118                 goto out;
4119         }
4120 
4121         if (rdonly4(req, cs)) {
4122                 *cs->statusp = resp->status = NFS4ERR_ROFS;
4123                 kmem_free(nm, len);
4124                 goto out;
4125         }
4126 
4127         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
4128         name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
4129             MAXPATHLEN  + 1);
4130 
4131         if (name == NULL) {
4132                 *cs->statusp = resp->status = NFS4ERR_INVAL;
4133                 kmem_free(nm, len);
4134                 goto out;
4135         }
4136 
4137         /*
4138          * Lookup the file to determine type and while we are see if
4139          * there is a file struct around and check for delegation.
4140          * We don't need to acquire va_seq before this lookup, if
4141          * it causes an update, cinfo.before will not match, which will
4142          * trigger a cache flush even if atomic is TRUE.
4143          */
4144         if (fp = rfs4_lookup_and_findfile(dvp, name, &vp, &error, cs->cr)) {
4145                 if (rfs4_check_delegated_byfp(FWRITE, fp, TRUE, TRUE, TRUE,
4146                     NULL)) {
4147                         VN_RELE(vp);
4148                         rfs4_file_rele(fp);
4149                         *cs->statusp = resp->status = NFS4ERR_DELAY;
4150                         if (nm != name)
4151                                 kmem_free(name, MAXPATHLEN + 1);
4152                         kmem_free(nm, len);
4153                         goto out;
4154                 }
4155         }
4156 
4157         /* Didn't find anything to remove */
4158         if (vp == NULL) {
4159                 *cs->statusp = resp->status = error;
4160                 if (nm != name)
4161                         kmem_free(name, MAXPATHLEN + 1);
4162                 kmem_free(nm, len);
4163                 goto out;
4164         }
4165 
4166         if (nbl_need_check(vp)) {
4167                 nbl_start_crit(vp, RW_READER);
4168                 in_crit = 1;
4169                 if (nbl_conflict(vp, NBL_REMOVE, 0, 0, 0, NULL)) {
4170                         *cs->statusp = resp->status = NFS4ERR_FILE_OPEN;
4171                         if (nm != name)
4172                                 kmem_free(name, MAXPATHLEN + 1);
4173                         kmem_free(nm, len);
4174                         nbl_end_crit(vp);
4175                         VN_RELE(vp);
4176                         if (fp) {
4177                                 rfs4_clear_dont_grant(fp);
4178                                 rfs4_file_rele(fp);
4179                         }
4180                         goto out;
4181                 }
4182         }
4183 
4184         /* check label before allowing removal */
4185         if (is_system_labeled()) {
4186                 ASSERT(req->rq_label != NULL);
4187                 clabel = req->rq_label;
4188                 DTRACE_PROBE2(tx__rfs4__log__info__opremove__clabel, char *,
4189                     "got client label from request(1)",
4190                     struct svc_req *, req);
4191                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
4192                         if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
4193                             cs->exi)) {
4194                                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4195                                 if (name != nm)
4196                                         kmem_free(name, MAXPATHLEN + 1);
4197                                 kmem_free(nm, len);
4198                                 if (in_crit)
4199                                         nbl_end_crit(vp);
4200                                 VN_RELE(vp);
4201                                 if (fp) {
4202                                         rfs4_clear_dont_grant(fp);
4203                                         rfs4_file_rele(fp);
4204                                 }
4205                                 goto out;
4206                         }
4207                 }
4208         }
4209 
4210         /* Get dir "before" change value */
4211         bdva.va_mask = AT_CTIME|AT_SEQ;
4212         error = VOP_GETATTR(dvp, &bdva, 0, cs->cr, NULL);
4213         if (error) {
4214                 *cs->statusp = resp->status = puterrno4(error);
4215                 if (nm != name)
4216                         kmem_free(name, MAXPATHLEN + 1);
4217                 kmem_free(nm, len);
4218                 if (in_crit)
4219                         nbl_end_crit(vp);
4220                 VN_RELE(vp);
4221                 if (fp) {
4222                         rfs4_clear_dont_grant(fp);
4223                         rfs4_file_rele(fp);
4224                 }
4225                 goto out;
4226         }
4227         NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bdva.va_ctime)
4228 
4229         /* Actually do the REMOVE operation */
4230         if (vp->v_type == VDIR) {
4231                 /*
4232                  * Can't remove a directory that has a mounted-on filesystem.
4233                  */
4234                 if (vn_ismntpt(vp)) {
4235                         error = EACCES;
4236                 } else {
4237                         /*
4238                          * System V defines rmdir to return EEXIST,
4239                          * not ENOTEMPTY, if the directory is not
4240                          * empty.  A System V NFS server needs to map
4241                          * NFS4ERR_EXIST to NFS4ERR_NOTEMPTY to
4242                          * transmit over the wire.
4243                          */
4244                         if ((error = VOP_RMDIR(dvp, name, ZONE_ROOTVP(), cs->cr,
4245                             NULL, 0)) == EEXIST)
4246                                 error = ENOTEMPTY;
4247                 }
4248         } else {
4249                 if ((error = VOP_REMOVE(dvp, name, cs->cr, NULL, 0)) == 0 &&
4250                     fp != NULL) {
4251                         struct vattr va;
4252                         vnode_t *tvp;
4253 
4254                         rfs4_dbe_lock(fp->rf_dbe);
4255                         tvp = fp->rf_vp;
4256                         if (tvp)
4257                                 VN_HOLD(tvp);
4258                         rfs4_dbe_unlock(fp->rf_dbe);
4259 
4260                         if (tvp) {
4261                                 /*
4262                                  * This is va_seq safe because we are not
4263                                  * manipulating dvp.
4264                                  */
4265                                 va.va_mask = AT_NLINK;
4266                                 if (!VOP_GETATTR(tvp, &va, 0, cs->cr, NULL) &&
4267                                     va.va_nlink == 0) {
4268                                         /* Remove state on file remove */
4269                                         if (in_crit) {
4270                                                 nbl_end_crit(vp);
4271                                                 in_crit = 0;
4272                                         }
4273                                         rfs4_close_all_state(fp);
4274                                 }
4275                                 VN_RELE(tvp);
4276                         }
4277                 }
4278         }
4279 
4280         if (in_crit)
4281                 nbl_end_crit(vp);
4282         VN_RELE(vp);
4283 
4284         if (fp) {
4285                 rfs4_clear_dont_grant(fp);
4286                 rfs4_file_rele(fp);
4287         }
4288         if (nm != name)
4289                 kmem_free(name, MAXPATHLEN + 1);
4290         kmem_free(nm, len);
4291 
4292         if (error) {
4293                 *cs->statusp = resp->status = puterrno4(error);
4294                 goto out;
4295         }
4296 
4297         /*
4298          * Get the initial "after" sequence number, if it fails, set to zero
4299          */
4300         idva.va_mask = AT_SEQ;
4301         if (VOP_GETATTR(dvp, &idva, 0, cs->cr, NULL))
4302                 idva.va_seq = 0;
4303 
4304         /*
4305          * Force modified data and metadata out to stable storage.
4306          */
4307         (void) VOP_FSYNC(dvp, 0, cs->cr, NULL);
4308 
4309         /*
4310          * Get "after" change value, if it fails, simply return the
4311          * before value.
4312          */
4313         adva.va_mask = AT_CTIME|AT_SEQ;
4314         if (VOP_GETATTR(dvp, &adva, 0, cs->cr, NULL)) {
4315                 adva.va_ctime = bdva.va_ctime;
4316                 adva.va_seq = 0;
4317         }
4318 
4319         NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, adva.va_ctime)
4320 
4321         /*
4322          * The cinfo.atomic = TRUE only if we have
4323          * non-zero va_seq's, and it has incremented by exactly one
4324          * during the VOP_REMOVE/RMDIR and it didn't change during
4325          * the VOP_FSYNC.
4326          */
4327         if (bdva.va_seq && idva.va_seq && adva.va_seq &&
4328             idva.va_seq == (bdva.va_seq + 1) && idva.va_seq == adva.va_seq)
4329                 resp->cinfo.atomic = TRUE;
4330         else
4331                 resp->cinfo.atomic = FALSE;
4332 
4333         *cs->statusp = resp->status = NFS4_OK;
4334 
4335 out:
4336         DTRACE_NFSV4_2(op__remove__done, struct compound_state *, cs,
4337             REMOVE4res *, resp);
4338 }
4339 
4340 /*
4341  * rename: args: SAVED_FH: from directory, CURRENT_FH: target directory,
4342  *              oldname and newname.
4343  *      res: status. If success - CURRENT_FH unchanged, return change_info
4344  *              for both from and target directories.
4345  */
4346 /* ARGSUSED */
4347 static void
4348 rfs4_op_rename(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4349     struct compound_state *cs)
4350 {
4351         RENAME4args *args = &argop->nfs_argop4_u.oprename;
4352         RENAME4res *resp = &resop->nfs_resop4_u.oprename;
4353         int error;
4354         vnode_t *odvp;
4355         vnode_t *ndvp;
4356         vnode_t *srcvp, *targvp, *tvp;
4357         struct vattr obdva, oidva, oadva;
4358         struct vattr nbdva, nidva, nadva;
4359         char *onm, *nnm;
4360         uint_t olen, nlen;
4361         rfs4_file_t *fp, *sfp;
4362         int in_crit_src, in_crit_targ;
4363         int fp_rele_grant_hold, sfp_rele_grant_hold;
4364         int unlinked;
4365         bslabel_t *clabel;
4366         struct sockaddr *ca;
4367         char *converted_onm = NULL;
4368         char *converted_nnm = NULL;
4369         nfsstat4 status;
4370 
4371         DTRACE_NFSV4_2(op__rename__start, struct compound_state *, cs,
4372             RENAME4args *, args);
4373 
4374         fp = sfp = NULL;
4375         srcvp = targvp = tvp = NULL;
4376         in_crit_src = in_crit_targ = 0;
4377         fp_rele_grant_hold = sfp_rele_grant_hold = 0;
4378         unlinked = 0;
4379 
4380         /* CURRENT_FH: target directory */
4381         ndvp = cs->vp;
4382         if (ndvp == NULL) {
4383                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4384                 goto out;
4385         }
4386 
4387         /* SAVED_FH: from directory */
4388         odvp = cs->saved_vp;
4389         if (odvp == NULL) {
4390                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4391                 goto out;
4392         }
4393 
4394         if (cs->access == CS_ACCESS_DENIED) {
4395                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4396                 goto out;
4397         }
4398 
4399         /*
4400          * If there is an unshared filesystem mounted on this vnode,
4401          * do not allow to rename objects in this directory.
4402          */
4403         if (vn_ismntpt(odvp)) {
4404                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4405                 goto out;
4406         }
4407 
4408         /*
4409          * If there is an unshared filesystem mounted on this vnode,
4410          * do not allow to rename to this directory.
4411          */
4412         if (vn_ismntpt(ndvp)) {
4413                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4414                 goto out;
4415         }
4416 
4417         if (odvp->v_type != VDIR || ndvp->v_type != VDIR) {
4418                 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
4419                 goto out;
4420         }
4421 
4422         if (cs->saved_exi != cs->exi) {
4423                 *cs->statusp = resp->status = NFS4ERR_XDEV;
4424                 goto out;
4425         }
4426 
4427         status = utf8_dir_verify(&args->oldname);
4428         if (status != NFS4_OK) {
4429                 *cs->statusp = resp->status = status;
4430                 goto out;
4431         }
4432 
4433         status = utf8_dir_verify(&args->newname);
4434         if (status != NFS4_OK) {
4435                 *cs->statusp = resp->status = status;
4436                 goto out;
4437         }
4438 
4439         onm = utf8_to_fn(&args->oldname, &olen, NULL);
4440         if (onm == NULL) {
4441                 *cs->statusp = resp->status = NFS4ERR_INVAL;
4442                 goto out;
4443         }
4444         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
4445         nlen = MAXPATHLEN + 1;
4446         converted_onm = nfscmd_convname(ca, cs->exi, onm, NFSCMD_CONV_INBOUND,
4447             nlen);
4448 
4449         if (converted_onm == NULL) {
4450                 *cs->statusp = resp->status = NFS4ERR_INVAL;
4451                 kmem_free(onm, olen);
4452                 goto out;
4453         }
4454 
4455         nnm = utf8_to_fn(&args->newname, &nlen, NULL);
4456         if (nnm == NULL) {
4457                 *cs->statusp = resp->status = NFS4ERR_INVAL;
4458                 if (onm != converted_onm)
4459                         kmem_free(converted_onm, MAXPATHLEN + 1);
4460                 kmem_free(onm, olen);
4461                 goto out;
4462         }
4463         converted_nnm = nfscmd_convname(ca, cs->exi, nnm, NFSCMD_CONV_INBOUND,
4464             MAXPATHLEN  + 1);
4465 
4466         if (converted_nnm == NULL) {
4467                 *cs->statusp = resp->status = NFS4ERR_INVAL;
4468                 kmem_free(nnm, nlen);
4469                 nnm = NULL;
4470                 if (onm != converted_onm)
4471                         kmem_free(converted_onm, MAXPATHLEN + 1);
4472                 kmem_free(onm, olen);
4473                 goto out;
4474         }
4475 
4476 
4477         if (olen > MAXNAMELEN || nlen > MAXNAMELEN) {
4478                 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
4479                 kmem_free(onm, olen);
4480                 kmem_free(nnm, nlen);
4481                 goto out;
4482         }
4483 
4484 
4485         if (rdonly4(req, cs)) {
4486                 *cs->statusp = resp->status = NFS4ERR_ROFS;
4487                 if (onm != converted_onm)
4488                         kmem_free(converted_onm, MAXPATHLEN + 1);
4489                 kmem_free(onm, olen);
4490                 if (nnm != converted_nnm)
4491                         kmem_free(converted_nnm, MAXPATHLEN + 1);
4492                 kmem_free(nnm, nlen);
4493                 goto out;
4494         }
4495 
4496         /* check label of the target dir */
4497         if (is_system_labeled()) {
4498                 ASSERT(req->rq_label != NULL);
4499                 clabel = req->rq_label;
4500                 DTRACE_PROBE2(tx__rfs4__log__info__oprename__clabel, char *,
4501                     "got client label from request(1)",
4502                     struct svc_req *, req);
4503                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
4504                         if (!do_rfs_label_check(clabel, ndvp,
4505                             EQUALITY_CHECK, cs->exi)) {
4506                                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4507                                 goto err_out;
4508                         }
4509                 }
4510         }
4511 
4512         /*
4513          * Is the source a file and have a delegation?
4514          * We don't need to acquire va_seq before these lookups, if
4515          * it causes an update, cinfo.before will not match, which will
4516          * trigger a cache flush even if atomic is TRUE.
4517          */
4518         if (sfp = rfs4_lookup_and_findfile(odvp, converted_onm, &srcvp,
4519             &error, cs->cr)) {
4520                 if (rfs4_check_delegated_byfp(FWRITE, sfp, TRUE, TRUE, TRUE,
4521                     NULL)) {
4522                         *cs->statusp = resp->status = NFS4ERR_DELAY;
4523                         goto err_out;
4524                 }
4525         }
4526 
4527         if (srcvp == NULL) {
4528                 *cs->statusp = resp->status = puterrno4(error);
4529                 if (onm != converted_onm)
4530                         kmem_free(converted_onm, MAXPATHLEN + 1);
4531                 kmem_free(onm, olen);
4532                 if (nnm != converted_nnm)
4533                         kmem_free(converted_nnm, MAXPATHLEN + 1);
4534                 kmem_free(nnm, nlen);
4535                 goto out;
4536         }
4537 
4538         sfp_rele_grant_hold = 1;
4539 
4540         /* Does the destination exist and a file and have a delegation? */
4541         if (fp = rfs4_lookup_and_findfile(ndvp, converted_nnm, &targvp,
4542             NULL, cs->cr)) {
4543                 if (rfs4_check_delegated_byfp(FWRITE, fp, TRUE, TRUE, TRUE,
4544                     NULL)) {
4545                         *cs->statusp = resp->status = NFS4ERR_DELAY;
4546                         goto err_out;
4547                 }
4548         }
4549         fp_rele_grant_hold = 1;
4550 
4551         /* Check for NBMAND lock on both source and target */
4552         if (nbl_need_check(srcvp)) {
4553                 nbl_start_crit(srcvp, RW_READER);
4554                 in_crit_src = 1;
4555                 if (nbl_conflict(srcvp, NBL_RENAME, 0, 0, 0, NULL)) {
4556                         *cs->statusp = resp->status = NFS4ERR_FILE_OPEN;
4557                         goto err_out;
4558                 }
4559         }
4560 
4561         if (targvp && nbl_need_check(targvp)) {
4562                 nbl_start_crit(targvp, RW_READER);
4563                 in_crit_targ = 1;
4564                 if (nbl_conflict(targvp, NBL_REMOVE, 0, 0, 0, NULL)) {
4565                         *cs->statusp = resp->status = NFS4ERR_FILE_OPEN;
4566                         goto err_out;
4567                 }
4568         }
4569 
4570         /* Get source "before" change value */
4571         obdva.va_mask = AT_CTIME|AT_SEQ;
4572         error = VOP_GETATTR(odvp, &obdva, 0, cs->cr, NULL);
4573         if (!error) {
4574                 nbdva.va_mask = AT_CTIME|AT_SEQ;
4575                 error = VOP_GETATTR(ndvp, &nbdva, 0, cs->cr, NULL);
4576         }
4577         if (error) {
4578                 *cs->statusp = resp->status = puterrno4(error);
4579                 goto err_out;
4580         }
4581 
4582         NFS4_SET_FATTR4_CHANGE(resp->source_cinfo.before, obdva.va_ctime)
4583         NFS4_SET_FATTR4_CHANGE(resp->target_cinfo.before, nbdva.va_ctime)
4584 
4585         error = VOP_RENAME(odvp, converted_onm, ndvp, converted_nnm, cs->cr,
4586             NULL, 0);
4587 
4588         /*
4589          * If target existed and was unlinked by VOP_RENAME, state will need
4590          * closed. To avoid deadlock, rfs4_close_all_state will be done after
4591          * any necessary nbl_end_crit on srcvp and tgtvp.
4592          */
4593         if (error == 0 && fp != NULL) {
4594                 rfs4_dbe_lock(fp->rf_dbe);
4595                 tvp = fp->rf_vp;
4596                 if (tvp)
4597                         VN_HOLD(tvp);
4598                 rfs4_dbe_unlock(fp->rf_dbe);
4599 
4600                 if (tvp) {
4601                         struct vattr va;
4602                         va.va_mask = AT_NLINK;
4603 
4604                         if (!VOP_GETATTR(tvp, &va, 0, cs->cr, NULL) &&
4605                             va.va_nlink == 0) {
4606                                 unlinked = 1;
4607 
4608                                 /* DEBUG data */
4609                                 if ((srcvp == targvp) || (tvp != targvp)) {
4610                                         cmn_err(CE_WARN, "rfs4_op_rename: "
4611                                             "srcvp %p, targvp: %p, tvp: %p",
4612                                             (void *)srcvp, (void *)targvp,
4613                                             (void *)tvp);
4614                                 }
4615                         } else {
4616                                 VN_RELE(tvp);
4617                         }
4618                 }
4619         }
4620         if (error == 0)
4621                 vn_renamepath(ndvp, srcvp, nnm, nlen - 1);
4622 
4623         if (in_crit_src)
4624                 nbl_end_crit(srcvp);
4625         if (srcvp)
4626                 VN_RELE(srcvp);
4627         if (in_crit_targ)
4628                 nbl_end_crit(targvp);
4629         if (targvp)
4630                 VN_RELE(targvp);
4631 
4632         if (unlinked) {
4633                 ASSERT(fp != NULL);
4634                 ASSERT(tvp != NULL);
4635 
4636                 /* DEBUG data */
4637                 if (RW_READ_HELD(&tvp->v_nbllock)) {
4638                         cmn_err(CE_WARN, "rfs4_op_rename: "
4639                             "RW_READ_HELD(%p)", (void *)tvp);
4640                 }
4641 
4642                 /* The file is gone and so should the state */
4643                 rfs4_close_all_state(fp);
4644                 VN_RELE(tvp);
4645         }
4646 
4647         if (sfp) {
4648                 rfs4_clear_dont_grant(sfp);
4649                 rfs4_file_rele(sfp);
4650         }
4651         if (fp) {
4652                 rfs4_clear_dont_grant(fp);
4653                 rfs4_file_rele(fp);
4654         }
4655 
4656         if (converted_onm != onm)
4657                 kmem_free(converted_onm, MAXPATHLEN + 1);
4658         kmem_free(onm, olen);
4659         if (converted_nnm != nnm)
4660                 kmem_free(converted_nnm, MAXPATHLEN + 1);
4661         kmem_free(nnm, nlen);
4662 
4663         /*
4664          * Get the initial "after" sequence number, if it fails, set to zero
4665          */
4666         oidva.va_mask = AT_SEQ;
4667         if (VOP_GETATTR(odvp, &oidva, 0, cs->cr, NULL))
4668                 oidva.va_seq = 0;
4669 
4670         nidva.va_mask = AT_SEQ;
4671         if (VOP_GETATTR(ndvp, &nidva, 0, cs->cr, NULL))
4672                 nidva.va_seq = 0;
4673 
4674         /*
4675          * Force modified data and metadata out to stable storage.
4676          */
4677         (void) VOP_FSYNC(odvp, 0, cs->cr, NULL);
4678         (void) VOP_FSYNC(ndvp, 0, cs->cr, NULL);
4679 
4680         if (error) {
4681                 *cs->statusp = resp->status = puterrno4(error);
4682                 goto out;
4683         }
4684 
4685         /*
4686          * Get "after" change values, if it fails, simply return the
4687          * before value.
4688          */
4689         oadva.va_mask = AT_CTIME|AT_SEQ;
4690         if (VOP_GETATTR(odvp, &oadva, 0, cs->cr, NULL)) {
4691                 oadva.va_ctime = obdva.va_ctime;
4692                 oadva.va_seq = 0;
4693         }
4694 
4695         nadva.va_mask = AT_CTIME|AT_SEQ;
4696         if (VOP_GETATTR(odvp, &nadva, 0, cs->cr, NULL)) {
4697                 nadva.va_ctime = nbdva.va_ctime;
4698                 nadva.va_seq = 0;
4699         }
4700 
4701         NFS4_SET_FATTR4_CHANGE(resp->source_cinfo.after, oadva.va_ctime)
4702         NFS4_SET_FATTR4_CHANGE(resp->target_cinfo.after, nadva.va_ctime)
4703 
4704         /*
4705          * The cinfo.atomic = TRUE only if we have
4706          * non-zero va_seq's, and it has incremented by exactly one
4707          * during the VOP_RENAME and it didn't change during the VOP_FSYNC.
4708          */
4709         if (obdva.va_seq && oidva.va_seq && oadva.va_seq &&
4710             oidva.va_seq == (obdva.va_seq + 1) && oidva.va_seq == oadva.va_seq)
4711                 resp->source_cinfo.atomic = TRUE;
4712         else
4713                 resp->source_cinfo.atomic = FALSE;
4714 
4715         if (nbdva.va_seq && nidva.va_seq && nadva.va_seq &&
4716             nidva.va_seq == (nbdva.va_seq + 1) && nidva.va_seq == nadva.va_seq)
4717                 resp->target_cinfo.atomic = TRUE;
4718         else
4719                 resp->target_cinfo.atomic = FALSE;
4720 
4721 #ifdef  VOLATILE_FH_TEST
4722         {
4723         extern void add_volrnm_fh(struct exportinfo *, vnode_t *);
4724 
4725         /*
4726          * Add the renamed file handle to the volatile rename list
4727          */
4728         if (cs->exi->exi_export.ex_flags & EX_VOLRNM) {
4729                 /* file handles may expire on rename */
4730                 vnode_t *vp;
4731 
4732                 nnm = utf8_to_fn(&args->newname, &nlen, NULL);
4733                 /*
4734                  * Already know that nnm will be a valid string
4735                  */
4736                 error = VOP_LOOKUP(ndvp, nnm, &vp, NULL, 0, NULL, cs->cr,
4737                     NULL, NULL, NULL);
4738                 kmem_free(nnm, nlen);
4739                 if (!error) {
4740                         add_volrnm_fh(cs->exi, vp);
4741                         VN_RELE(vp);
4742                 }
4743         }
4744         }
4745 #endif  /* VOLATILE_FH_TEST */
4746 
4747         *cs->statusp = resp->status = NFS4_OK;
4748 out:
4749         DTRACE_NFSV4_2(op__rename__done, struct compound_state *, cs,
4750             RENAME4res *, resp);
4751         return;
4752 
4753 err_out:
4754         if (onm != converted_onm)
4755                 kmem_free(converted_onm, MAXPATHLEN + 1);
4756         if (onm != NULL)
4757                 kmem_free(onm, olen);
4758         if (nnm != converted_nnm)
4759                 kmem_free(converted_nnm, MAXPATHLEN + 1);
4760         if (nnm != NULL)
4761                 kmem_free(nnm, nlen);
4762 
4763         if (in_crit_src) nbl_end_crit(srcvp);
4764         if (in_crit_targ) nbl_end_crit(targvp);
4765         if (targvp) VN_RELE(targvp);
4766         if (srcvp) VN_RELE(srcvp);
4767         if (sfp) {
4768                 if (sfp_rele_grant_hold) rfs4_clear_dont_grant(sfp);
4769                 rfs4_file_rele(sfp);
4770         }
4771         if (fp) {
4772                 if (fp_rele_grant_hold) rfs4_clear_dont_grant(fp);
4773                 rfs4_file_rele(fp);
4774         }
4775 
4776         DTRACE_NFSV4_2(op__rename__done, struct compound_state *, cs,
4777             RENAME4res *, resp);
4778 }
4779 
4780 /* ARGSUSED */
4781 static void
4782 rfs4_op_renew(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4783     struct compound_state *cs)
4784 {
4785         RENEW4args *args = &argop->nfs_argop4_u.oprenew;
4786         RENEW4res *resp = &resop->nfs_resop4_u.oprenew;
4787         rfs4_client_t *cp;
4788 
4789         DTRACE_NFSV4_2(op__renew__start, struct compound_state *, cs,
4790             RENEW4args *, args);
4791 
4792         if ((cp = rfs4_findclient_by_id(args->clientid, FALSE)) == NULL) {
4793                 *cs->statusp = resp->status =
4794                     rfs4_check_clientid(&args->clientid, 0);
4795                 goto out;
4796         }
4797 
4798         if (rfs4_lease_expired(cp)) {
4799                 rfs4_client_rele(cp);
4800                 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
4801                 goto out;
4802         }
4803 
4804         rfs4_update_lease(cp);
4805 
4806         mutex_enter(cp->rc_cbinfo.cb_lock);
4807         if (cp->rc_cbinfo.cb_notified_of_cb_path_down == FALSE) {
4808                 cp->rc_cbinfo.cb_notified_of_cb_path_down = TRUE;
4809                 *cs->statusp = resp->status = NFS4ERR_CB_PATH_DOWN;
4810         } else {
4811                 *cs->statusp = resp->status = NFS4_OK;
4812         }
4813         mutex_exit(cp->rc_cbinfo.cb_lock);
4814 
4815         rfs4_client_rele(cp);
4816 
4817 out:
4818         DTRACE_NFSV4_2(op__renew__done, struct compound_state *, cs,
4819             RENEW4res *, resp);
4820 }
4821 
4822 /* ARGSUSED */
4823 static void
4824 rfs4_op_restorefh(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req,
4825     struct compound_state *cs)
4826 {
4827         RESTOREFH4res *resp = &resop->nfs_resop4_u.oprestorefh;
4828 
4829         DTRACE_NFSV4_1(op__restorefh__start, struct compound_state *, cs);
4830 
4831         /* No need to check cs->access - we are not accessing any object */
4832         if ((cs->saved_vp == NULL) || (cs->saved_fh.nfs_fh4_val == NULL)) {
4833                 *cs->statusp = resp->status = NFS4ERR_RESTOREFH;
4834                 goto out;
4835         }
4836         if (cs->vp != NULL) {
4837                 VN_RELE(cs->vp);
4838         }
4839         cs->vp = cs->saved_vp;
4840         cs->saved_vp = NULL;
4841         cs->exi = cs->saved_exi;
4842         nfs_fh4_copy(&cs->saved_fh, &cs->fh);
4843         *cs->statusp = resp->status = NFS4_OK;
4844         cs->deleg = FALSE;
4845 
4846 out:
4847         DTRACE_NFSV4_2(op__restorefh__done, struct compound_state *, cs,
4848             RESTOREFH4res *, resp);
4849 }
4850 
4851 /* ARGSUSED */
4852 static void
4853 rfs4_op_savefh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4854     struct compound_state *cs)
4855 {
4856         SAVEFH4res *resp = &resop->nfs_resop4_u.opsavefh;
4857 
4858         DTRACE_NFSV4_1(op__savefh__start, struct compound_state *, cs);
4859 
4860         /* No need to check cs->access - we are not accessing any object */
4861         if (cs->vp == NULL) {
4862                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4863                 goto out;
4864         }
4865         if (cs->saved_vp != NULL) {
4866                 VN_RELE(cs->saved_vp);
4867         }
4868         cs->saved_vp = cs->vp;
4869         VN_HOLD(cs->saved_vp);
4870         cs->saved_exi = cs->exi;
4871         /*
4872          * since SAVEFH is fairly rare, don't alloc space for its fh
4873          * unless necessary.
4874          */
4875         if (cs->saved_fh.nfs_fh4_val == NULL) {
4876                 cs->saved_fh.nfs_fh4_val = kmem_alloc(NFS4_FHSIZE, KM_SLEEP);
4877         }
4878         nfs_fh4_copy(&cs->fh, &cs->saved_fh);
4879         *cs->statusp = resp->status = NFS4_OK;
4880 
4881 out:
4882         DTRACE_NFSV4_2(op__savefh__done, struct compound_state *, cs,
4883             SAVEFH4res *, resp);
4884 }
4885 
4886 /*
4887  * rfs4_verify_attr is called when nfsv4 Setattr failed, but we wish to
4888  * return the bitmap of attrs that were set successfully. It is also
4889  * called by Verify/Nverify to test the vattr/vfsstat attrs. It should
4890  * always be called only after rfs4_do_set_attrs().
4891  *
4892  * Verify that the attributes are same as the expected ones. sargp->vap
4893  * and sargp->sbp contain the input attributes as translated from fattr4.
4894  *
4895  * This function verifies only the attrs that correspond to a vattr or
4896  * vfsstat struct. That is because of the extra step needed to get the
4897  * corresponding system structs. Other attributes have already been set or
4898  * verified by do_rfs4_set_attrs.
4899  *
4900  * Return 0 if all attrs match, -1 if some don't, error if error processing.
4901  */
4902 static int
4903 rfs4_verify_attr(struct nfs4_svgetit_arg *sargp,
4904     bitmap4 *resp, struct nfs4_ntov_table *ntovp)
4905 {
4906         int error, ret_error = 0;
4907         int i, k;
4908         uint_t sva_mask = sargp->vap->va_mask;
4909         uint_t vbit;
4910         union nfs4_attr_u *na;
4911         uint8_t *amap;
4912         bool_t getsb = ntovp->vfsstat;
4913 
4914         if (sva_mask != 0) {
4915                 /*
4916                  * Okay to overwrite sargp->vap because we verify based
4917                  * on the incoming values.
4918                  */
4919                 ret_error = VOP_GETATTR(sargp->cs->vp, sargp->vap, 0,
4920                     sargp->cs->cr, NULL);
4921                 if (ret_error) {
4922                         if (resp == NULL)
4923                                 return (ret_error);
4924                         /*
4925                          * Must return bitmap of successful attrs
4926                          */
4927                         sva_mask = 0;   /* to prevent checking vap later */
4928                 } else {
4929                         /*
4930                          * Some file systems clobber va_mask. it is probably
4931                          * wrong of them to do so, nonethless we practice
4932                          * defensive coding.
4933                          * See bug id 4276830.
4934                          */
4935                         sargp->vap->va_mask = sva_mask;
4936                 }
4937         }
4938 
4939         if (getsb) {
4940                 /*
4941                  * Now get the superblock and loop on the bitmap, as there is
4942                  * no simple way of translating from superblock to bitmap4.
4943                  */
4944                 ret_error = VFS_STATVFS(sargp->cs->vp->v_vfsp, sargp->sbp);
4945                 if (ret_error) {
4946                         if (resp == NULL)
4947                                 goto errout;
4948                         getsb = FALSE;
4949                 }
4950         }
4951 
4952         /*
4953          * Now loop and verify each attribute which getattr returned
4954          * whether it's the same as the input.
4955          */
4956         if (resp == NULL && !getsb && (sva_mask == 0))
4957                 goto errout;
4958 
4959         na = ntovp->na;
4960         amap = ntovp->amap;
4961         k = 0;
4962         for (i = 0; i < ntovp->attrcnt; i++, na++, amap++) {
4963                 k = *amap;
4964                 ASSERT(nfs4_ntov_map[k].nval == k);
4965                 vbit = nfs4_ntov_map[k].vbit;
4966 
4967                 /*
4968                  * If vattr attribute but VOP_GETATTR failed, or it's
4969                  * superblock attribute but VFS_STATVFS failed, skip
4970                  */
4971                 if (vbit) {
4972                         if ((vbit & sva_mask) == 0)
4973                                 continue;
4974                 } else if (!(getsb && nfs4_ntov_map[k].vfsstat)) {
4975                         continue;
4976                 }
4977                 error = (*nfs4_ntov_map[k].sv_getit)(NFS4ATTR_VERIT, sargp, na);
4978                 if (resp != NULL) {
4979                         if (error)
4980                                 ret_error = -1; /* not all match */
4981                         else    /* update response bitmap */
4982                                 *resp |= nfs4_ntov_map[k].fbit;
4983                         continue;
4984                 }
4985                 if (error) {
4986                         ret_error = -1; /* not all match */
4987                         break;
4988                 }
4989         }
4990 errout:
4991         return (ret_error);
4992 }
4993 
4994 /*
4995  * Decode the attribute to be set/verified. If the attr requires a sys op
4996  * (VOP_GETATTR, VFS_VFSSTAT), and the request is to verify, then don't
4997  * call the sv_getit function for it, because the sys op hasn't yet been done.
4998  * Return 0 for success, error code if failed.
4999  *
5000  * Note: the decoded arg is not freed here but in nfs4_ntov_table_free.
5001  */
5002 static int
5003 decode_fattr4_attr(nfs4_attr_cmd_t cmd, struct nfs4_svgetit_arg *sargp,
5004     int k, XDR *xdrp, bitmap4 *resp_bval, union nfs4_attr_u *nap)
5005 {
5006         int error = 0;
5007         bool_t set_later;
5008 
5009         sargp->vap->va_mask |= nfs4_ntov_map[k].vbit;
5010 
5011         if ((*nfs4_ntov_map[k].xfunc)(xdrp, nap)) {
5012                 set_later = nfs4_ntov_map[k].vbit || nfs4_ntov_map[k].vfsstat;
5013                 /*
5014                  * don't verify yet if a vattr or sb dependent attr,
5015                  * because we don't have their sys values yet.
5016                  * Will be done later.
5017                  */
5018                 if (! (set_later && (cmd == NFS4ATTR_VERIT))) {
5019                         /*
5020                          * ACLs are a special case, since setting the MODE
5021                          * conflicts with setting the ACL.  We delay setting
5022                          * the ACL until all other attributes have been set.
5023                          * The ACL gets set in do_rfs4_op_setattr().
5024                          */
5025                         if (nfs4_ntov_map[k].fbit != FATTR4_ACL_MASK) {
5026                                 error = (*nfs4_ntov_map[k].sv_getit)(cmd,
5027                                     sargp, nap);
5028                                 if (error) {
5029                                         xdr_free(nfs4_ntov_map[k].xfunc,
5030                                             (caddr_t)nap);
5031                                 }
5032                         }
5033                 }
5034         } else {
5035 #ifdef  DEBUG
5036                 cmn_err(CE_NOTE, "decode_fattr4_attr: error "
5037                     "decoding attribute %d\n", k);
5038 #endif
5039                 error = EINVAL;
5040         }
5041         if (!error && resp_bval && !set_later) {
5042                 *resp_bval |= nfs4_ntov_map[k].fbit;
5043         }
5044 
5045         return (error);
5046 }
5047 
5048 /*
5049  * Set vattr based on incoming fattr4 attrs - used by setattr.
5050  * Set response mask. Ignore any values that are not writable vattr attrs.
5051  */
5052 static nfsstat4
5053 do_rfs4_set_attrs(bitmap4 *resp, fattr4 *fattrp, struct compound_state *cs,
5054     struct nfs4_svgetit_arg *sargp, struct nfs4_ntov_table *ntovp,
5055     nfs4_attr_cmd_t cmd)
5056 {
5057         int error = 0;
5058         int i;
5059         char *attrs = fattrp->attrlist4;
5060         uint32_t attrslen = fattrp->attrlist4_len;
5061         XDR xdr;
5062         nfsstat4 status = NFS4_OK;
5063         vnode_t *vp = cs->vp;
5064         union nfs4_attr_u *na;
5065         uint8_t *amap;
5066 
5067 #ifndef lint
5068         /*
5069          * Make sure that maximum attribute number can be expressed as an
5070          * 8 bit quantity.
5071          */
5072         ASSERT(NFS4_MAXNUM_ATTRS <= (UINT8_MAX + 1));
5073 #endif
5074 
5075         if (vp == NULL) {
5076                 if (resp)
5077                         *resp = 0;
5078                 return (NFS4ERR_NOFILEHANDLE);
5079         }
5080         if (cs->access == CS_ACCESS_DENIED) {
5081                 if (resp)
5082                         *resp = 0;
5083                 return (NFS4ERR_ACCESS);
5084         }
5085 
5086         sargp->op = cmd;
5087         sargp->cs = cs;
5088         sargp->flag = 0;     /* may be set later */
5089         sargp->vap->va_mask = 0;
5090         sargp->rdattr_error = NFS4_OK;
5091         sargp->rdattr_error_req = FALSE;
5092         /* sargp->sbp is set by the caller */
5093 
5094         xdrmem_create(&xdr, attrs, attrslen, XDR_DECODE);
5095 
5096         na = ntovp->na;
5097         amap = ntovp->amap;
5098 
5099         /*
5100          * The following loop iterates on the nfs4_ntov_map checking
5101          * if the fbit is set in the requested bitmap.
5102          * If set then we process the arguments using the
5103          * rfs4_fattr4 conversion functions to populate the setattr
5104          * vattr and va_mask. Any settable attrs that are not using vattr
5105          * will be set in this loop.
5106          */
5107         for (i = 0; i < nfs4_ntov_map_size; i++) {
5108                 if (!(fattrp->attrmask & nfs4_ntov_map[i].fbit)) {
5109                         continue;
5110                 }
5111                 /*
5112                  * If setattr, must be a writable attr.
5113                  * If verify/nverify, must be a readable attr.
5114                  */
5115                 if ((error = (*nfs4_ntov_map[i].sv_getit)(
5116                     NFS4ATTR_SUPPORTED, sargp, NULL)) != 0) {
5117                         /*
5118                          * Client tries to set/verify an
5119                          * unsupported attribute, tries to set
5120                          * a read only attr or verify a write
5121                          * only one - error!
5122                          */
5123                         break;
5124                 }
5125                 /*
5126                  * Decode the attribute to set/verify
5127                  */
5128                 error = decode_fattr4_attr(cmd, sargp, nfs4_ntov_map[i].nval,
5129                     &xdr, resp ? resp : NULL, na);
5130                 if (error)
5131                         break;
5132                 *amap++ = (uint8_t)nfs4_ntov_map[i].nval;
5133                 na++;
5134                 (ntovp->attrcnt)++;
5135                 if (nfs4_ntov_map[i].vfsstat)
5136                         ntovp->vfsstat = TRUE;
5137         }
5138 
5139         if (error != 0)
5140                 status = (error == ENOTSUP ? NFS4ERR_ATTRNOTSUPP :
5141                     puterrno4(error));
5142         /* xdrmem_destroy(&xdrs); */        /* NO-OP */
5143         return (status);
5144 }
5145 
5146 static nfsstat4
5147 do_rfs4_op_setattr(bitmap4 *resp, fattr4 *fattrp, struct compound_state *cs,
5148     stateid4 *stateid)
5149 {
5150         int error = 0;
5151         struct nfs4_svgetit_arg sarg;
5152         bool_t trunc;
5153 
5154         nfsstat4 status = NFS4_OK;
5155         cred_t *cr = cs->cr;
5156         vnode_t *vp = cs->vp;
5157         struct nfs4_ntov_table ntov;
5158         struct statvfs64 sb;
5159         struct vattr bva;
5160         struct flock64 bf;
5161         int in_crit = 0;
5162         uint_t saved_mask = 0;
5163         caller_context_t ct;
5164 
5165         *resp = 0;
5166         sarg.sbp = &sb;
5167         sarg.is_referral = B_FALSE;
5168         nfs4_ntov_table_init(&ntov);
5169         status = do_rfs4_set_attrs(resp, fattrp, cs, &sarg, &ntov,
5170             NFS4ATTR_SETIT);
5171         if (status != NFS4_OK) {
5172                 /*
5173                  * failed set attrs
5174                  */
5175                 goto done;
5176         }
5177         if ((sarg.vap->va_mask == 0) &&
5178             (! (fattrp->attrmask & FATTR4_ACL_MASK))) {
5179                 /*
5180                  * no further work to be done
5181                  */
5182                 goto done;
5183         }
5184 
5185         /*
5186          * If we got a request to set the ACL and the MODE, only
5187          * allow changing VSUID, VSGID, and VSVTX.  Attempting
5188          * to change any other bits, along with setting an ACL,
5189          * gives NFS4ERR_INVAL.
5190          */
5191         if ((fattrp->attrmask & FATTR4_ACL_MASK) &&
5192             (fattrp->attrmask & FATTR4_MODE_MASK)) {
5193                 vattr_t va;
5194 
5195                 va.va_mask = AT_MODE;
5196                 error = VOP_GETATTR(vp, &va, 0, cs->cr, NULL);
5197                 if (error) {
5198                         status = puterrno4(error);
5199                         goto done;
5200                 }
5201                 if ((sarg.vap->va_mode ^ va.va_mode) &
5202                     ~(VSUID | VSGID | VSVTX)) {
5203                         status = NFS4ERR_INVAL;
5204                         goto done;
5205                 }
5206         }
5207 
5208         /* Check stateid only if size has been set */
5209         if (sarg.vap->va_mask & AT_SIZE) {
5210                 trunc = (sarg.vap->va_size == 0);
5211                 status = rfs4_check_stateid(FWRITE, cs->vp, stateid,
5212                     trunc, &cs->deleg, sarg.vap->va_mask & AT_SIZE, &ct);
5213                 if (status != NFS4_OK)
5214                         goto done;
5215         } else {
5216                 ct.cc_sysid = 0;
5217                 ct.cc_pid = 0;
5218                 ct.cc_caller_id = nfs4_srv_caller_id;
5219                 ct.cc_flags = CC_DONTBLOCK;
5220         }
5221 
5222         /* XXX start of possible race with delegations */
5223 
5224         /*
5225          * We need to specially handle size changes because it is
5226          * possible for the client to create a file with read-only
5227          * modes, but with the file opened for writing. If the client
5228          * then tries to set the file size, e.g. ftruncate(3C),
5229          * fcntl(F_FREESP), the normal access checking done in
5230          * VOP_SETATTR would prevent the client from doing it even though
5231          * it should be allowed to do so.  To get around this, we do the
5232          * access checking for ourselves and use VOP_SPACE which doesn't
5233          * do the access checking.
5234          * Also the client should not be allowed to change the file
5235          * size if there is a conflicting non-blocking mandatory lock in
5236          * the region of the change.
5237          */
5238         if (vp->v_type == VREG && (sarg.vap->va_mask & AT_SIZE)) {
5239                 u_offset_t offset;
5240                 ssize_t length;
5241 
5242                 /*
5243                  * ufs_setattr clears AT_SIZE from vap->va_mask, but
5244                  * before returning, sarg.vap->va_mask is used to
5245                  * generate the setattr reply bitmap.  We also clear
5246                  * AT_SIZE below before calling VOP_SPACE.  For both
5247                  * of these cases, the va_mask needs to be saved here
5248                  * and restored after calling VOP_SETATTR.
5249                  */
5250                 saved_mask = sarg.vap->va_mask;
5251 
5252                 /*
5253                  * Check any possible conflict due to NBMAND locks.
5254                  * Get into critical region before VOP_GETATTR, so the
5255                  * size attribute is valid when checking conflicts.
5256                  */
5257                 if (nbl_need_check(vp)) {
5258                         nbl_start_crit(vp, RW_READER);
5259                         in_crit = 1;
5260                 }
5261 
5262                 bva.va_mask = AT_UID|AT_SIZE;
5263                 if (error = VOP_GETATTR(vp, &bva, 0, cr, &ct)) {
5264                         status = puterrno4(error);
5265                         goto done;
5266                 }
5267 
5268                 if (in_crit) {
5269                         if (sarg.vap->va_size < bva.va_size) {
5270                                 offset = sarg.vap->va_size;
5271                                 length = bva.va_size - sarg.vap->va_size;
5272                         } else {
5273                                 offset = bva.va_size;
5274                                 length = sarg.vap->va_size - bva.va_size;
5275                         }
5276                         if (nbl_conflict(vp, NBL_WRITE, offset, length, 0,
5277                             &ct)) {
5278                                 status = NFS4ERR_LOCKED;
5279                                 goto done;
5280                         }
5281                 }
5282 
5283                 if (crgetuid(cr) == bva.va_uid) {
5284                         sarg.vap->va_mask &= ~AT_SIZE;
5285                         bf.l_type = F_WRLCK;
5286                         bf.l_whence = 0;
5287                         bf.l_start = (off64_t)sarg.vap->va_size;
5288                         bf.l_len = 0;
5289                         bf.l_sysid = 0;
5290                         bf.l_pid = 0;
5291                         error = VOP_SPACE(vp, F_FREESP, &bf, FWRITE,
5292                             (offset_t)sarg.vap->va_size, cr, &ct);
5293                 }
5294         }
5295 
5296         if (!error && sarg.vap->va_mask != 0)
5297                 error = VOP_SETATTR(vp, sarg.vap, sarg.flag, cr, &ct);
5298 
5299         /* restore va_mask -- ufs_setattr clears AT_SIZE */
5300         if (saved_mask & AT_SIZE)
5301                 sarg.vap->va_mask |= AT_SIZE;
5302 
5303         /*
5304          * If an ACL was being set, it has been delayed until now,
5305          * in order to set the mode (via the VOP_SETATTR() above) first.
5306          */
5307         if ((! error) && (fattrp->attrmask & FATTR4_ACL_MASK)) {
5308                 int i;
5309 
5310                 for (i = 0; i < NFS4_MAXNUM_ATTRS; i++)
5311                         if (ntov.amap[i] == FATTR4_ACL)
5312                                 break;
5313                 if (i < NFS4_MAXNUM_ATTRS) {
5314                         error = (*nfs4_ntov_map[FATTR4_ACL].sv_getit)(
5315                             NFS4ATTR_SETIT, &sarg, &ntov.na[i]);
5316                         if (error == 0) {
5317                                 *resp |= FATTR4_ACL_MASK;
5318                         } else if (error == ENOTSUP) {
5319                                 (void) rfs4_verify_attr(&sarg, resp, &ntov);
5320                                 status = NFS4ERR_ATTRNOTSUPP;
5321                                 goto done;
5322                         }
5323                 } else {
5324                         NFS4_DEBUG(rfs4_debug,
5325                             (CE_NOTE, "do_rfs4_op_setattr: "
5326                             "unable to find ACL in fattr4"));
5327                         error = EINVAL;
5328                 }
5329         }
5330 
5331         if (error) {
5332                 /* check if a monitor detected a delegation conflict */
5333                 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK))
5334                         status = NFS4ERR_DELAY;
5335                 else
5336                         status = puterrno4(error);
5337 
5338                 /*
5339                  * Set the response bitmap when setattr failed.
5340                  * If VOP_SETATTR partially succeeded, test by doing a
5341                  * VOP_GETATTR on the object and comparing the data
5342                  * to the setattr arguments.
5343                  */
5344                 (void) rfs4_verify_attr(&sarg, resp, &ntov);
5345         } else {
5346                 /*
5347                  * Force modified metadata out to stable storage.
5348                  */
5349                 (void) VOP_FSYNC(vp, FNODSYNC, cr, &ct);
5350                 /*
5351                  * Set response bitmap
5352                  */
5353                 nfs4_vmask_to_nmask_set(sarg.vap->va_mask, resp);
5354         }
5355 
5356 /* Return early and already have a NFSv4 error */
5357 done:
5358         /*
5359          * Except for nfs4_vmask_to_nmask_set(), vattr --> fattr
5360          * conversion sets both readable and writeable NFS4 attrs
5361          * for AT_MTIME and AT_ATIME.  The line below masks out
5362          * unrequested attrs from the setattr result bitmap.  This
5363          * is placed after the done: label to catch the ATTRNOTSUP
5364          * case.
5365          */
5366         *resp &= fattrp->attrmask;
5367 
5368         if (in_crit)
5369                 nbl_end_crit(vp);
5370 
5371         nfs4_ntov_table_free(&ntov, &sarg);
5372 
5373         return (status);
5374 }
5375 
5376 /* ARGSUSED */
5377 static void
5378 rfs4_op_setattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5379     struct compound_state *cs)
5380 {
5381         SETATTR4args *args = &argop->nfs_argop4_u.opsetattr;
5382         SETATTR4res *resp = &resop->nfs_resop4_u.opsetattr;
5383         bslabel_t *clabel;
5384 
5385         DTRACE_NFSV4_2(op__setattr__start, struct compound_state *, cs,
5386             SETATTR4args *, args);
5387 
5388         if (cs->vp == NULL) {
5389                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5390                 goto out;
5391         }
5392 
5393         /*
5394          * If there is an unshared filesystem mounted on this vnode,
5395          * do not allow to setattr on this vnode.
5396          */
5397         if (vn_ismntpt(cs->vp)) {
5398                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
5399                 goto out;
5400         }
5401 
5402         resp->attrsset = 0;
5403 
5404         if (rdonly4(req, cs)) {
5405                 *cs->statusp = resp->status = NFS4ERR_ROFS;
5406                 goto out;
5407         }
5408 
5409         /* check label before setting attributes */
5410         if (is_system_labeled()) {
5411                 ASSERT(req->rq_label != NULL);
5412                 clabel = req->rq_label;
5413                 DTRACE_PROBE2(tx__rfs4__log__info__opsetattr__clabel, char *,
5414                     "got client label from request(1)",
5415                     struct svc_req *, req);
5416                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
5417                         if (!do_rfs_label_check(clabel, cs->vp,
5418                             EQUALITY_CHECK, cs->exi)) {
5419                                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
5420                                 goto out;
5421                         }
5422                 }
5423         }
5424 
5425         *cs->statusp = resp->status =
5426             do_rfs4_op_setattr(&resp->attrsset, &args->obj_attributes, cs,
5427             &args->stateid);
5428 
5429 out:
5430         DTRACE_NFSV4_2(op__setattr__done, struct compound_state *, cs,
5431             SETATTR4res *, resp);
5432 }
5433 
5434 /* ARGSUSED */
5435 static void
5436 rfs4_op_verify(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5437     struct compound_state *cs)
5438 {
5439         /*
5440          * verify and nverify are exactly the same, except that nverify
5441          * succeeds when some argument changed, and verify succeeds when
5442          * when none changed.
5443          */
5444 
5445         VERIFY4args  *args = &argop->nfs_argop4_u.opverify;
5446         VERIFY4res *resp = &resop->nfs_resop4_u.opverify;
5447 
5448         int error;
5449         struct nfs4_svgetit_arg sarg;
5450         struct statvfs64 sb;
5451         struct nfs4_ntov_table ntov;
5452 
5453         DTRACE_NFSV4_2(op__verify__start, struct compound_state *, cs,
5454             VERIFY4args *, args);
5455 
5456         if (cs->vp == NULL) {
5457                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5458                 goto out;
5459         }
5460 
5461         sarg.sbp = &sb;
5462         sarg.is_referral = B_FALSE;
5463         nfs4_ntov_table_init(&ntov);
5464         resp->status = do_rfs4_set_attrs(NULL, &args->obj_attributes, cs,
5465             &sarg, &ntov, NFS4ATTR_VERIT);
5466         if (resp->status != NFS4_OK) {
5467                 /*
5468                  * do_rfs4_set_attrs will try to verify systemwide attrs,
5469                  * so could return -1 for "no match".
5470                  */
5471                 if (resp->status == -1)
5472                         resp->status = NFS4ERR_NOT_SAME;
5473                 goto done;
5474         }
5475         error = rfs4_verify_attr(&sarg, NULL, &ntov);
5476         switch (error) {
5477         case 0:
5478                 resp->status = NFS4_OK;
5479                 break;
5480         case -1:
5481                 resp->status = NFS4ERR_NOT_SAME;
5482                 break;
5483         default:
5484                 resp->status = puterrno4(error);
5485                 break;
5486         }
5487 done:
5488         *cs->statusp = resp->status;
5489         nfs4_ntov_table_free(&ntov, &sarg);
5490 out:
5491         DTRACE_NFSV4_2(op__verify__done, struct compound_state *, cs,
5492             VERIFY4res *, resp);
5493 }
5494 
5495 /* ARGSUSED */
5496 static void
5497 rfs4_op_nverify(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5498     struct compound_state *cs)
5499 {
5500         /*
5501          * verify and nverify are exactly the same, except that nverify
5502          * succeeds when some argument changed, and verify succeeds when
5503          * when none changed.
5504          */
5505 
5506         NVERIFY4args  *args = &argop->nfs_argop4_u.opnverify;
5507         NVERIFY4res *resp = &resop->nfs_resop4_u.opnverify;
5508 
5509         int error;
5510         struct nfs4_svgetit_arg sarg;
5511         struct statvfs64 sb;
5512         struct nfs4_ntov_table ntov;
5513 
5514         DTRACE_NFSV4_2(op__nverify__start, struct compound_state *, cs,
5515             NVERIFY4args *, args);
5516 
5517         if (cs->vp == NULL) {
5518                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5519                 DTRACE_NFSV4_2(op__nverify__done, struct compound_state *, cs,
5520                     NVERIFY4res *, resp);
5521                 return;
5522         }
5523         sarg.sbp = &sb;
5524         sarg.is_referral = B_FALSE;
5525         nfs4_ntov_table_init(&ntov);
5526         resp->status = do_rfs4_set_attrs(NULL, &args->obj_attributes, cs,
5527             &sarg, &ntov, NFS4ATTR_VERIT);
5528         if (resp->status != NFS4_OK) {
5529                 /*
5530                  * do_rfs4_set_attrs will try to verify systemwide attrs,
5531                  * so could return -1 for "no match".
5532                  */
5533                 if (resp->status == -1)
5534                         resp->status = NFS4_OK;
5535                 goto done;
5536         }
5537         error = rfs4_verify_attr(&sarg, NULL, &ntov);
5538         switch (error) {
5539         case 0:
5540                 resp->status = NFS4ERR_SAME;
5541                 break;
5542         case -1:
5543                 resp->status = NFS4_OK;
5544                 break;
5545         default:
5546                 resp->status = puterrno4(error);
5547                 break;
5548         }
5549 done:
5550         *cs->statusp = resp->status;
5551         nfs4_ntov_table_free(&ntov, &sarg);
5552 
5553         DTRACE_NFSV4_2(op__nverify__done, struct compound_state *, cs,
5554             NVERIFY4res *, resp);
5555 }
5556 
5557 /*
5558  * XXX - This should live in an NFS header file.
5559  */
5560 #define MAX_IOVECS      12
5561 
5562 /* ARGSUSED */
5563 static void
5564 rfs4_op_write(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5565     struct compound_state *cs)
5566 {
5567         WRITE4args *args = &argop->nfs_argop4_u.opwrite;
5568         WRITE4res *resp = &resop->nfs_resop4_u.opwrite;
5569         int error;
5570         vnode_t *vp;
5571         struct vattr bva;
5572         u_offset_t rlimit;
5573         struct uio uio;
5574         struct iovec iov[MAX_IOVECS];
5575         struct iovec *iovp;
5576         int iovcnt;
5577         int ioflag;
5578         cred_t *savecred, *cr;
5579         bool_t *deleg = &cs->deleg;
5580         nfsstat4 stat;
5581         int in_crit = 0;
5582         caller_context_t ct;
5583         nfs4_srv_t *nsrv4;
5584 
5585         DTRACE_NFSV4_2(op__write__start, struct compound_state *, cs,
5586             WRITE4args *, args);
5587 
5588         vp = cs->vp;
5589         if (vp == NULL) {
5590                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5591                 goto out;
5592         }
5593         if (cs->access == CS_ACCESS_DENIED) {
5594                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
5595                 goto out;
5596         }
5597 
5598         cr = cs->cr;
5599 
5600         if ((stat = rfs4_check_stateid(FWRITE, vp, &args->stateid, FALSE,
5601             deleg, TRUE, &ct)) != NFS4_OK) {
5602                 *cs->statusp = resp->status = stat;
5603                 goto out;
5604         }
5605 
5606         /*
5607          * We have to enter the critical region before calling VOP_RWLOCK
5608          * to avoid a deadlock with ufs.
5609          */
5610         if (nbl_need_check(vp)) {
5611                 nbl_start_crit(vp, RW_READER);
5612                 in_crit = 1;
5613                 if (nbl_conflict(vp, NBL_WRITE,
5614                     args->offset, args->data_len, 0, &ct)) {
5615                         *cs->statusp = resp->status = NFS4ERR_LOCKED;
5616                         goto out;
5617                 }
5618         }
5619 
5620         bva.va_mask = AT_MODE | AT_UID;
5621         error = VOP_GETATTR(vp, &bva, 0, cr, &ct);
5622 
5623         /*
5624          * If we can't get the attributes, then we can't do the
5625          * right access checking.  So, we'll fail the request.
5626          */
5627         if (error) {
5628                 *cs->statusp = resp->status = puterrno4(error);
5629                 goto out;
5630         }
5631 
5632         if (rdonly4(req, cs)) {
5633                 *cs->statusp = resp->status = NFS4ERR_ROFS;
5634                 goto out;
5635         }
5636 
5637         if (vp->v_type != VREG) {
5638                 *cs->statusp = resp->status =
5639                     ((vp->v_type == VDIR) ? NFS4ERR_ISDIR : NFS4ERR_INVAL);
5640                 goto out;
5641         }
5642 
5643         if (crgetuid(cr) != bva.va_uid &&
5644             (error = VOP_ACCESS(vp, VWRITE, 0, cr, &ct))) {
5645                 *cs->statusp = resp->status = puterrno4(error);
5646                 goto out;
5647         }
5648 
5649         if (MANDLOCK(vp, bva.va_mode)) {
5650                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
5651                 goto out;
5652         }
5653 
5654         nsrv4 = nfs4_get_srv();
5655         if (args->data_len == 0) {
5656                 *cs->statusp = resp->status = NFS4_OK;
5657                 resp->count = 0;
5658                 resp->committed = args->stable;
5659                 resp->writeverf = nsrv4->write4verf;
5660                 goto out;
5661         }
5662 
5663         if (args->mblk != NULL) {
5664                 mblk_t *m;
5665                 uint_t bytes, round_len;
5666 
5667                 iovcnt = 0;
5668                 bytes = 0;
5669                 round_len = roundup(args->data_len, BYTES_PER_XDR_UNIT);
5670                 for (m = args->mblk;
5671                     m != NULL && bytes < round_len;
5672                     m = m->b_cont) {
5673                         iovcnt++;
5674                         bytes += MBLKL(m);
5675                 }
5676 #ifdef DEBUG
5677                 /* should have ended on an mblk boundary */
5678                 if (bytes != round_len) {
5679                         printf("bytes=0x%x, round_len=0x%x, req len=0x%x\n",
5680                             bytes, round_len, args->data_len);
5681                         printf("args=%p, args->mblk=%p, m=%p", (void *)args,
5682                             (void *)args->mblk, (void *)m);
5683                         ASSERT(bytes == round_len);
5684                 }
5685 #endif
5686                 if (iovcnt <= MAX_IOVECS) {
5687                         iovp = iov;
5688                 } else {
5689                         iovp = kmem_alloc(sizeof (*iovp) * iovcnt, KM_SLEEP);
5690                 }
5691                 mblk_to_iov(args->mblk, iovcnt, iovp);
5692         } else if (args->rlist != NULL) {
5693                 iovcnt = 1;
5694                 iovp = iov;
5695                 iovp->iov_base = (char *)((args->rlist)->u.c_daddr3);
5696                 iovp->iov_len = args->data_len;
5697         } else {
5698                 iovcnt = 1;
5699                 iovp = iov;
5700                 iovp->iov_base = args->data_val;
5701                 iovp->iov_len = args->data_len;
5702         }
5703 
5704         uio.uio_iov = iovp;
5705         uio.uio_iovcnt = iovcnt;
5706 
5707         uio.uio_segflg = UIO_SYSSPACE;
5708         uio.uio_extflg = UIO_COPY_DEFAULT;
5709         uio.uio_loffset = args->offset;
5710         uio.uio_resid = args->data_len;
5711         uio.uio_llimit = curproc->p_fsz_ctl;
5712         rlimit = uio.uio_llimit - args->offset;
5713         if (rlimit < (u_offset_t)uio.uio_resid)
5714                 uio.uio_resid = (int)rlimit;
5715 
5716         if (args->stable == UNSTABLE4)
5717                 ioflag = 0;
5718         else if (args->stable == FILE_SYNC4)
5719                 ioflag = FSYNC;
5720         else if (args->stable == DATA_SYNC4)
5721                 ioflag = FDSYNC;
5722         else {
5723                 if (iovp != iov)
5724                         kmem_free(iovp, sizeof (*iovp) * iovcnt);
5725                 *cs->statusp = resp->status = NFS4ERR_INVAL;
5726                 goto out;
5727         }
5728 
5729         /*
5730          * We're changing creds because VM may fault and we need
5731          * the cred of the current thread to be used if quota
5732          * checking is enabled.
5733          */
5734         savecred = curthread->t_cred;
5735         curthread->t_cred = cr;
5736         error = do_io(FWRITE, vp, &uio, ioflag, cr, &ct);
5737         curthread->t_cred = savecred;
5738 
5739         if (iovp != iov)
5740                 kmem_free(iovp, sizeof (*iovp) * iovcnt);
5741 
5742         if (error) {
5743                 *cs->statusp = resp->status = puterrno4(error);
5744                 goto out;
5745         }
5746 
5747         *cs->statusp = resp->status = NFS4_OK;
5748         resp->count = args->data_len - uio.uio_resid;
5749 
5750         if (ioflag == 0)
5751                 resp->committed = UNSTABLE4;
5752         else
5753                 resp->committed = FILE_SYNC4;
5754 
5755         resp->writeverf = nsrv4->write4verf;
5756 
5757 out:
5758         if (in_crit)
5759                 nbl_end_crit(vp);
5760 
5761         DTRACE_NFSV4_2(op__write__done, struct compound_state *, cs,
5762             WRITE4res *, resp);
5763 }
5764 
5765 
5766 /* XXX put in a header file */
5767 extern int      sec_svc_getcred(struct svc_req *, cred_t *,  caddr_t *, int *);
5768 
5769 void
5770 rfs4_compound(COMPOUND4args *args, COMPOUND4res *resp, struct exportinfo *exi,
5771     struct svc_req *req, cred_t *cr, int *rv)
5772 {
5773         uint_t i;
5774         struct compound_state cs;
5775         nfs4_srv_t *nsrv4;
5776         nfs_export_t *ne = nfs_get_export();
5777 
5778         if (rv != NULL)
5779                 *rv = 0;
5780         rfs4_init_compound_state(&cs);
5781         /*
5782          * Form a reply tag by copying over the reqeuest tag.
5783          */
5784         resp->tag.utf8string_val =
5785             kmem_alloc(args->tag.utf8string_len, KM_SLEEP);
5786         resp->tag.utf8string_len = args->tag.utf8string_len;
5787         bcopy(args->tag.utf8string_val, resp->tag.utf8string_val,
5788             resp->tag.utf8string_len);
5789 
5790         cs.statusp = &resp->status;
5791         cs.req = req;
5792         resp->array = NULL;
5793         resp->array_len = 0;
5794 
5795         /*
5796          * XXX for now, minorversion should be zero
5797          */
5798         if (args->minorversion != NFS4_MINORVERSION) {
5799                 DTRACE_NFSV4_2(compound__start, struct compound_state *,
5800                     &cs, COMPOUND4args *, args);
5801                 resp->status = NFS4ERR_MINOR_VERS_MISMATCH;
5802                 DTRACE_NFSV4_2(compound__done, struct compound_state *,
5803                     &cs, COMPOUND4res *, resp);
5804                 return;
5805         }
5806 
5807         if (args->array_len == 0) {
5808                 resp->status = NFS4_OK;
5809                 return;
5810         }
5811 
5812         ASSERT(exi == NULL);
5813         ASSERT(cr == NULL);
5814 
5815         cr = crget();
5816         ASSERT(cr != NULL);
5817 
5818         if (sec_svc_getcred(req, cr, &cs.principal, &cs.nfsflavor) == 0) {
5819                 DTRACE_NFSV4_2(compound__start, struct compound_state *,
5820                     &cs, COMPOUND4args *, args);
5821                 crfree(cr);
5822                 DTRACE_NFSV4_2(compound__done, struct compound_state *,
5823                     &cs, COMPOUND4res *, resp);
5824                 svcerr_badcred(req->rq_xprt);
5825                 if (rv != NULL)
5826                         *rv = 1;
5827                 return;
5828         }
5829         resp->array_len = args->array_len;
5830         resp->array = kmem_zalloc(args->array_len * sizeof (nfs_resop4),
5831             KM_SLEEP);
5832 
5833         cs.basecr = cr;
5834         nsrv4 = nfs4_get_srv();
5835 
5836         DTRACE_NFSV4_2(compound__start, struct compound_state *, &cs,
5837             COMPOUND4args *, args);
5838 
5839         /*
5840          * For now, NFS4 compound processing must be protected by
5841          * exported_lock because it can access more than one exportinfo
5842          * per compound and share/unshare can now change multiple
5843          * exinfo structs.  The NFS2/3 code only refs 1 exportinfo
5844          * per proc (excluding public exinfo), and exi_count design
5845          * is sufficient to protect concurrent execution of NFS2/3
5846          * ops along with unexport.  This lock will be removed as
5847          * part of the NFSv4 phase 2 namespace redesign work.
5848          */
5849         rw_enter(&ne->exported_lock, RW_READER);
5850 
5851         /*
5852          * If this is the first compound we've seen, we need to start all
5853          * new instances' grace periods.
5854          */
5855         if (nsrv4->seen_first_compound == 0) {
5856                 rfs4_grace_start_new(nsrv4);
5857                 /*
5858                  * This must be set after rfs4_grace_start_new(), otherwise
5859                  * another thread could proceed past here before the former
5860                  * is finished.
5861                  */
5862                 nsrv4->seen_first_compound = 1;
5863         }
5864 
5865         for (i = 0; i < args->array_len && cs.cont; i++) {
5866                 nfs_argop4 *argop;
5867                 nfs_resop4 *resop;
5868                 uint_t op;
5869 
5870                 argop = &args->array[i];
5871                 resop = &resp->array[i];
5872                 resop->resop = argop->argop;
5873                 op = (uint_t)resop->resop;
5874 
5875                 if (op < rfsv4disp_cnt) {
5876                         /*
5877                          * Count the individual ops here; NULL and COMPOUND
5878                          * are counted in common_dispatch()
5879                          */
5880                         rfsproccnt_v4_ptr[op].value.ui64++;
5881 
5882                         NFS4_DEBUG(rfs4_debug > 1,
5883                             (CE_NOTE, "Executing %s", rfs4_op_string[op]));
5884                         (*rfsv4disptab[op].dis_proc)(argop, resop, req, &cs);
5885                         NFS4_DEBUG(rfs4_debug > 1, (CE_NOTE, "%s returned %d",
5886                             rfs4_op_string[op], *cs.statusp));
5887                         if (*cs.statusp != NFS4_OK)
5888                                 cs.cont = FALSE;
5889                 } else {
5890                         /*
5891                          * This is effectively dead code since XDR code
5892                          * will have already returned BADXDR if op doesn't
5893                          * decode to legal value.  This only done for a
5894                          * day when XDR code doesn't verify v4 opcodes.
5895                          */
5896                         op = OP_ILLEGAL;
5897                         rfsproccnt_v4_ptr[OP_ILLEGAL_IDX].value.ui64++;
5898 
5899                         rfs4_op_illegal(argop, resop, req, &cs);
5900                         cs.cont = FALSE;
5901                 }
5902 
5903                 /*
5904                  * If not at last op, and if we are to stop, then
5905                  * compact the results array.
5906                  */
5907                 if ((i + 1) < args->array_len && !cs.cont) {
5908                         nfs_resop4 *new_res = kmem_alloc(
5909                             (i+1) * sizeof (nfs_resop4), KM_SLEEP);
5910                         bcopy(resp->array,
5911                             new_res, (i+1) * sizeof (nfs_resop4));
5912                         kmem_free(resp->array,
5913                             args->array_len * sizeof (nfs_resop4));
5914 
5915                         resp->array_len =  i + 1;
5916                         resp->array = new_res;
5917                 }
5918         }
5919 
5920         rw_exit(&ne->exported_lock);
5921 
5922         /*
5923          * clear exportinfo and vnode fields from compound_state before dtrace
5924          * probe, to avoid tracing residual values for path and share path.
5925          */
5926         if (cs.vp)
5927                 VN_RELE(cs.vp);
5928         if (cs.saved_vp)
5929                 VN_RELE(cs.saved_vp);
5930         cs.exi = cs.saved_exi = NULL;
5931         cs.vp = cs.saved_vp = NULL;
5932 
5933         DTRACE_NFSV4_2(compound__done, struct compound_state *, &cs,
5934             COMPOUND4res *, resp);
5935 
5936         if (cs.saved_fh.nfs_fh4_val)
5937                 kmem_free(cs.saved_fh.nfs_fh4_val, NFS4_FHSIZE);
5938 
5939         if (cs.basecr)
5940                 crfree(cs.basecr);
5941         if (cs.cr)
5942                 crfree(cs.cr);
5943         /*
5944          * done with this compound request, free the label
5945          */
5946 
5947         if (req->rq_label != NULL) {
5948                 kmem_free(req->rq_label, sizeof (bslabel_t));
5949                 req->rq_label = NULL;
5950         }
5951 }
5952 
5953 /*
5954  * XXX because of what appears to be duplicate calls to rfs4_compound_free
5955  * XXX zero out the tag and array values. Need to investigate why the
5956  * XXX calls occur, but at least prevent the panic for now.
5957  */
5958 void
5959 rfs4_compound_free(COMPOUND4res *resp)
5960 {
5961         uint_t i;
5962 
5963         if (resp->tag.utf8string_val) {
5964                 UTF8STRING_FREE(resp->tag)
5965         }
5966 
5967         for (i = 0; i < resp->array_len; i++) {
5968                 nfs_resop4 *resop;
5969                 uint_t op;
5970 
5971                 resop = &resp->array[i];
5972                 op = (uint_t)resop->resop;
5973                 if (op < rfsv4disp_cnt) {
5974                         (*rfsv4disptab[op].dis_resfree)(resop);
5975                 }
5976         }
5977         if (resp->array != NULL) {
5978                 kmem_free(resp->array, resp->array_len * sizeof (nfs_resop4));
5979         }
5980 }
5981 
5982 /*
5983  * Process the value of the compound request rpc flags, as a bit-AND
5984  * of the individual per-op flags (idempotent, allowork, publicfh_ok)
5985  */
5986 void
5987 rfs4_compound_flagproc(COMPOUND4args *args, int *flagp)
5988 {
5989         int i;
5990         int flag = RPC_ALL;
5991 
5992         for (i = 0; flag && i < args->array_len; i++) {
5993                 uint_t op;
5994 
5995                 op = (uint_t)args->array[i].argop;
5996 
5997                 if (op < rfsv4disp_cnt)
5998                         flag &= rfsv4disptab[op].dis_flags;
5999                 else
6000                         flag = 0;
6001         }
6002         *flagp = flag;
6003 }
6004 
6005 nfsstat4
6006 rfs4_client_sysid(rfs4_client_t *cp, sysid_t *sp)
6007 {
6008         nfsstat4 e;
6009 
6010         rfs4_dbe_lock(cp->rc_dbe);
6011 
6012         if (cp->rc_sysidt != LM_NOSYSID) {
6013                 *sp = cp->rc_sysidt;
6014                 e = NFS4_OK;
6015 
6016         } else if ((cp->rc_sysidt = lm_alloc_sysidt()) != LM_NOSYSID) {
6017                 *sp = cp->rc_sysidt;
6018                 e = NFS4_OK;
6019 
6020                 NFS4_DEBUG(rfs4_debug, (CE_NOTE,
6021                     "rfs4_client_sysid: allocated 0x%x\n", *sp));
6022         } else
6023                 e = NFS4ERR_DELAY;
6024 
6025         rfs4_dbe_unlock(cp->rc_dbe);
6026         return (e);
6027 }
6028 
6029 #if defined(DEBUG) && ! defined(lint)
6030 static void lock_print(char *str, int operation, struct flock64 *flk)
6031 {
6032         char *op, *type;
6033 
6034         switch (operation) {
6035         case F_GETLK: op = "F_GETLK";
6036                 break;
6037         case F_SETLK: op = "F_SETLK";
6038                 break;
6039         case F_SETLK_NBMAND: op = "F_SETLK_NBMAND";
6040                 break;
6041         default: op = "F_UNKNOWN";
6042                 break;
6043         }
6044         switch (flk->l_type) {
6045         case F_UNLCK: type = "F_UNLCK";
6046                 break;
6047         case F_RDLCK: type = "F_RDLCK";
6048                 break;
6049         case F_WRLCK: type = "F_WRLCK";
6050                 break;
6051         default: type = "F_UNKNOWN";
6052                 break;
6053         }
6054 
6055         ASSERT(flk->l_whence == 0);
6056         cmn_err(CE_NOTE, "%s:  %s, type = %s, off = %llx len = %llx pid = %d",
6057             str, op, type, (longlong_t)flk->l_start,
6058             flk->l_len ? (longlong_t)flk->l_len : ~0LL, flk->l_pid);
6059 }
6060 
6061 #define LOCK_PRINT(d, s, t, f) if (d) lock_print(s, t, f)
6062 #else
6063 #define LOCK_PRINT(d, s, t, f)
6064 #endif
6065 
6066 /*ARGSUSED*/
6067 static bool_t
6068 creds_ok(cred_set_t cr_set, struct svc_req *req, struct compound_state *cs)
6069 {
6070         return (TRUE);
6071 }
6072 
6073 /*
6074  * Look up the pathname using the vp in cs as the directory vnode.
6075  * cs->vp will be the vnode for the file on success
6076  */
6077 
6078 static nfsstat4
6079 rfs4_lookup(component4 *component, struct svc_req *req,
6080     struct compound_state *cs)
6081 {
6082         char *nm;
6083         uint32_t len;
6084         nfsstat4 status;
6085         struct sockaddr *ca;
6086         char *name;
6087 
6088         if (cs->vp == NULL) {
6089                 return (NFS4ERR_NOFILEHANDLE);
6090         }
6091         if (cs->vp->v_type != VDIR) {
6092                 return (NFS4ERR_NOTDIR);
6093         }
6094 
6095         status = utf8_dir_verify(component);
6096         if (status != NFS4_OK)
6097                 return (status);
6098 
6099         nm = utf8_to_fn(component, &len, NULL);
6100         if (nm == NULL) {
6101                 return (NFS4ERR_INVAL);
6102         }
6103 
6104         if (len > MAXNAMELEN) {
6105                 kmem_free(nm, len);
6106                 return (NFS4ERR_NAMETOOLONG);
6107         }
6108 
6109         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
6110         name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
6111             MAXPATHLEN + 1);
6112 
6113         if (name == NULL) {
6114                 kmem_free(nm, len);
6115                 return (NFS4ERR_INVAL);
6116         }
6117 
6118         status = do_rfs4_op_lookup(name, req, cs);
6119 
6120         if (name != nm)
6121                 kmem_free(name, MAXPATHLEN + 1);
6122 
6123         kmem_free(nm, len);
6124 
6125         return (status);
6126 }
6127 
6128 static nfsstat4
6129 rfs4_lookupfile(component4 *component, struct svc_req *req,
6130     struct compound_state *cs, uint32_t access, change_info4 *cinfo)
6131 {
6132         nfsstat4 status;
6133         vnode_t *dvp = cs->vp;
6134         vattr_t bva, ava, fva;
6135         int error;
6136 
6137         /* Get "before" change value */
6138         bva.va_mask = AT_CTIME|AT_SEQ;
6139         error = VOP_GETATTR(dvp, &bva, 0, cs->cr, NULL);
6140         if (error)
6141                 return (puterrno4(error));
6142 
6143         /* rfs4_lookup may VN_RELE directory */
6144         VN_HOLD(dvp);
6145 
6146         status = rfs4_lookup(component, req, cs);
6147         if (status != NFS4_OK) {
6148                 VN_RELE(dvp);
6149                 return (status);
6150         }
6151 
6152         /*
6153          * Get "after" change value, if it fails, simply return the
6154          * before value.
6155          */
6156         ava.va_mask = AT_CTIME|AT_SEQ;
6157         if (VOP_GETATTR(dvp, &ava, 0, cs->cr, NULL)) {
6158                 ava.va_ctime = bva.va_ctime;
6159                 ava.va_seq = 0;
6160         }
6161         VN_RELE(dvp);
6162 
6163         /*
6164          * Validate the file is a file
6165          */
6166         fva.va_mask = AT_TYPE|AT_MODE;
6167         error = VOP_GETATTR(cs->vp, &fva, 0, cs->cr, NULL);
6168         if (error)
6169                 return (puterrno4(error));
6170 
6171         if (fva.va_type != VREG) {
6172                 if (fva.va_type == VDIR)
6173                         return (NFS4ERR_ISDIR);
6174                 if (fva.va_type == VLNK)
6175                         return (NFS4ERR_SYMLINK);
6176                 return (NFS4ERR_INVAL);
6177         }
6178 
6179         NFS4_SET_FATTR4_CHANGE(cinfo->before, bva.va_ctime);
6180         NFS4_SET_FATTR4_CHANGE(cinfo->after, ava.va_ctime);
6181 
6182         /*
6183          * It is undefined if VOP_LOOKUP will change va_seq, so
6184          * cinfo.atomic = TRUE only if we have
6185          * non-zero va_seq's, and they have not changed.
6186          */
6187         if (bva.va_seq && ava.va_seq && ava.va_seq == bva.va_seq)
6188                 cinfo->atomic = TRUE;
6189         else
6190                 cinfo->atomic = FALSE;
6191 
6192         /* Check for mandatory locking */
6193         cs->mandlock = MANDLOCK(cs->vp, fva.va_mode);
6194         return (check_open_access(access, cs, req));
6195 }
6196 
6197 static nfsstat4
6198 create_vnode(vnode_t *dvp, char *nm,  vattr_t *vap, createmode4 mode,
6199     cred_t *cr, vnode_t **vpp, bool_t *created)
6200 {
6201         int error;
6202         nfsstat4 status = NFS4_OK;
6203         vattr_t va;
6204 
6205 tryagain:
6206 
6207         /*
6208          * The file open mode used is VWRITE.  If the client needs
6209          * some other semantic, then it should do the access checking
6210          * itself.  It would have been nice to have the file open mode
6211          * passed as part of the arguments.
6212          */
6213 
6214         *created = TRUE;
6215         error = VOP_CREATE(dvp, nm, vap, EXCL, VWRITE, vpp, cr, 0, NULL, NULL);
6216 
6217         if (error) {
6218                 *created = FALSE;
6219 
6220                 /*
6221                  * If we got something other than file already exists
6222                  * then just return this error.  Otherwise, we got
6223                  * EEXIST.  If we were doing a GUARDED create, then
6224                  * just return this error.  Otherwise, we need to
6225                  * make sure that this wasn't a duplicate of an
6226                  * exclusive create request.
6227                  *
6228                  * The assumption is made that a non-exclusive create
6229                  * request will never return EEXIST.
6230                  */
6231 
6232                 if (error != EEXIST || mode == GUARDED4) {
6233                         status = puterrno4(error);
6234                         return (status);
6235                 }
6236                 error = VOP_LOOKUP(dvp, nm, vpp, NULL, 0, NULL, cr,
6237                     NULL, NULL, NULL);
6238 
6239                 if (error) {
6240                         /*
6241                          * We couldn't find the file that we thought that
6242                          * we just created.  So, we'll just try creating
6243                          * it again.
6244                          */
6245                         if (error == ENOENT)
6246                                 goto tryagain;
6247 
6248                         status = puterrno4(error);
6249                         return (status);
6250                 }
6251 
6252                 if (mode == UNCHECKED4) {
6253                         /* existing object must be regular file */
6254                         if ((*vpp)->v_type != VREG) {
6255                                 if ((*vpp)->v_type == VDIR)
6256                                         status = NFS4ERR_ISDIR;
6257                                 else if ((*vpp)->v_type == VLNK)
6258                                         status = NFS4ERR_SYMLINK;
6259                                 else
6260                                         status = NFS4ERR_INVAL;
6261                                 VN_RELE(*vpp);
6262                                 return (status);
6263                         }
6264 
6265                         return (NFS4_OK);
6266                 }
6267 
6268                 /* Check for duplicate request */
6269                 va.va_mask = AT_MTIME;
6270                 error = VOP_GETATTR(*vpp, &va, 0, cr, NULL);
6271                 if (!error) {
6272                         /* We found the file */
6273                         const timestruc_t *mtime = &vap->va_mtime;
6274 
6275                         if (va.va_mtime.tv_sec != mtime->tv_sec ||
6276                             va.va_mtime.tv_nsec != mtime->tv_nsec) {
6277                                 /* but its not our creation */
6278                                 VN_RELE(*vpp);
6279                                 return (NFS4ERR_EXIST);
6280                         }
6281                         *created = TRUE; /* retrans of create == created */
6282                         return (NFS4_OK);
6283                 }
6284                 VN_RELE(*vpp);
6285                 return (NFS4ERR_EXIST);
6286         }
6287 
6288         return (NFS4_OK);
6289 }
6290 
6291 static nfsstat4
6292 check_open_access(uint32_t access, struct compound_state *cs,
6293     struct svc_req *req)
6294 {
6295         int error;
6296         vnode_t *vp;
6297         bool_t readonly;
6298         cred_t *cr = cs->cr;
6299 
6300         /* For now we don't allow mandatory locking as per V2/V3 */
6301         if (cs->access == CS_ACCESS_DENIED || cs->mandlock) {
6302                 return (NFS4ERR_ACCESS);
6303         }
6304 
6305         vp = cs->vp;
6306         ASSERT(cr != NULL && vp->v_type == VREG);
6307 
6308         /*
6309          * If the file system is exported read only and we are trying
6310          * to open for write, then return NFS4ERR_ROFS
6311          */
6312 
6313         readonly = rdonly4(req, cs);
6314 
6315         if ((access & OPEN4_SHARE_ACCESS_WRITE) && readonly)
6316                 return (NFS4ERR_ROFS);
6317 
6318         if (access & OPEN4_SHARE_ACCESS_READ) {
6319                 if ((VOP_ACCESS(vp, VREAD, 0, cr, NULL) != 0) &&
6320                     (VOP_ACCESS(vp, VEXEC, 0, cr, NULL) != 0)) {
6321                         return (NFS4ERR_ACCESS);
6322                 }
6323         }
6324 
6325         if (access & OPEN4_SHARE_ACCESS_WRITE) {
6326                 error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
6327                 if (error)
6328                         return (NFS4ERR_ACCESS);
6329         }
6330 
6331         return (NFS4_OK);
6332 }
6333 
6334 static nfsstat4
6335 rfs4_createfile(OPEN4args *args, struct svc_req *req, struct compound_state *cs,
6336     change_info4 *cinfo, bitmap4 *attrset, clientid4 clientid)
6337 {
6338         struct nfs4_svgetit_arg sarg;
6339         struct nfs4_ntov_table ntov;
6340 
6341         bool_t ntov_table_init = FALSE;
6342         struct statvfs64 sb;
6343         nfsstat4 status;
6344         vnode_t *vp;
6345         vattr_t bva, ava, iva, cva, *vap;
6346         vnode_t *dvp;
6347         timespec32_t *mtime;
6348         char *nm = NULL;
6349         uint_t buflen;
6350         bool_t created;
6351         bool_t setsize = FALSE;
6352         len_t reqsize;
6353         int error;
6354         bool_t trunc;
6355         caller_context_t ct;
6356         component4 *component;
6357         bslabel_t *clabel;
6358         struct sockaddr *ca;
6359         char *name = NULL;
6360 
6361         sarg.sbp = &sb;
6362         sarg.is_referral = B_FALSE;
6363 
6364         dvp = cs->vp;
6365 
6366         /* Check if the file system is read only */
6367         if (rdonly4(req, cs))
6368                 return (NFS4ERR_ROFS);
6369 
6370         /* check the label of including directory */
6371         if (is_system_labeled()) {
6372                 ASSERT(req->rq_label != NULL);
6373                 clabel = req->rq_label;
6374                 DTRACE_PROBE2(tx__rfs4__log__info__opremove__clabel, char *,
6375                     "got client label from request(1)",
6376                     struct svc_req *, req);
6377                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
6378                         if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
6379                             cs->exi)) {
6380                                 return (NFS4ERR_ACCESS);
6381                         }
6382                 }
6383         }
6384 
6385         /*
6386          * Get the last component of path name in nm. cs will reference
6387          * the including directory on success.
6388          */
6389         component = &args->open_claim4_u.file;
6390         status = utf8_dir_verify(component);
6391         if (status != NFS4_OK)
6392                 return (status);
6393 
6394         nm = utf8_to_fn(component, &buflen, NULL);
6395 
6396         if (nm == NULL)
6397                 return (NFS4ERR_RESOURCE);
6398 
6399         if (buflen > MAXNAMELEN) {
6400                 kmem_free(nm, buflen);
6401                 return (NFS4ERR_NAMETOOLONG);
6402         }
6403 
6404         bva.va_mask = AT_TYPE|AT_CTIME|AT_SEQ;
6405         error = VOP_GETATTR(dvp, &bva, 0, cs->cr, NULL);
6406         if (error) {
6407                 kmem_free(nm, buflen);
6408                 return (puterrno4(error));
6409         }
6410 
6411         if (bva.va_type != VDIR) {
6412                 kmem_free(nm, buflen);
6413                 return (NFS4ERR_NOTDIR);
6414         }
6415 
6416         NFS4_SET_FATTR4_CHANGE(cinfo->before, bva.va_ctime)
6417 
6418         switch (args->mode) {
6419         case GUARDED4:
6420                 /*FALLTHROUGH*/
6421         case UNCHECKED4:
6422                 nfs4_ntov_table_init(&ntov);
6423                 ntov_table_init = TRUE;
6424 
6425                 *attrset = 0;
6426                 status = do_rfs4_set_attrs(attrset,
6427                     &args->createhow4_u.createattrs,
6428                     cs, &sarg, &ntov, NFS4ATTR_SETIT);
6429 
6430                 if (status == NFS4_OK && (sarg.vap->va_mask & AT_TYPE) &&
6431                     sarg.vap->va_type != VREG) {
6432                         if (sarg.vap->va_type == VDIR)
6433                                 status = NFS4ERR_ISDIR;
6434                         else if (sarg.vap->va_type == VLNK)
6435                                 status = NFS4ERR_SYMLINK;
6436                         else
6437                                 status = NFS4ERR_INVAL;
6438                 }
6439 
6440                 if (status != NFS4_OK) {
6441                         kmem_free(nm, buflen);
6442                         nfs4_ntov_table_free(&ntov, &sarg);
6443                         *attrset = 0;
6444                         return (status);
6445                 }
6446 
6447                 vap = sarg.vap;
6448                 vap->va_type = VREG;
6449                 vap->va_mask |= AT_TYPE;
6450 
6451                 if ((vap->va_mask & AT_MODE) == 0) {
6452                         vap->va_mask |= AT_MODE;
6453                         vap->va_mode = (mode_t)0600;
6454                 }
6455 
6456                 if (vap->va_mask & AT_SIZE) {
6457 
6458                         /* Disallow create with a non-zero size */
6459 
6460                         if ((reqsize = sarg.vap->va_size) != 0) {
6461                                 kmem_free(nm, buflen);
6462                                 nfs4_ntov_table_free(&ntov, &sarg);
6463                                 *attrset = 0;
6464                                 return (NFS4ERR_INVAL);
6465                         }
6466                         setsize = TRUE;
6467                 }
6468                 break;
6469 
6470         case EXCLUSIVE4:
6471                 /* prohibit EXCL create of named attributes */
6472                 if (dvp->v_flag & V_XATTRDIR) {
6473                         kmem_free(nm, buflen);
6474                         *attrset = 0;
6475                         return (NFS4ERR_INVAL);
6476                 }
6477 
6478                 cva.va_mask = AT_TYPE | AT_MTIME | AT_MODE;
6479                 cva.va_type = VREG;
6480                 /*
6481                  * Ensure no time overflows. Assumes underlying
6482                  * filesystem supports at least 32 bits.
6483                  * Truncate nsec to usec resolution to allow valid
6484                  * compares even if the underlying filesystem truncates.
6485                  */
6486                 mtime = (timespec32_t *)&args->createhow4_u.createverf;
6487                 cva.va_mtime.tv_sec = mtime->tv_sec % TIME32_MAX;
6488                 cva.va_mtime.tv_nsec = (mtime->tv_nsec / 1000) * 1000;
6489                 cva.va_mode = (mode_t)0;
6490                 vap = &cva;
6491 
6492                 /*
6493                  * For EXCL create, attrset is set to the server attr
6494                  * used to cache the client's verifier.
6495                  */
6496                 *attrset = FATTR4_TIME_MODIFY_MASK;
6497                 break;
6498         }
6499 
6500         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
6501         name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
6502             MAXPATHLEN  + 1);
6503 
6504         if (name == NULL) {
6505                 kmem_free(nm, buflen);
6506                 return (NFS4ERR_SERVERFAULT);
6507         }
6508 
6509         status = create_vnode(dvp, name, vap, args->mode,
6510             cs->cr, &vp, &created);
6511         if (nm != name)
6512                 kmem_free(name, MAXPATHLEN + 1);
6513         kmem_free(nm, buflen);
6514 
6515         if (status != NFS4_OK) {
6516                 if (ntov_table_init)
6517                         nfs4_ntov_table_free(&ntov, &sarg);
6518                 *attrset = 0;
6519                 return (status);
6520         }
6521 
6522         trunc = (setsize && !created);
6523 
6524         if (args->mode != EXCLUSIVE4) {
6525                 bitmap4 createmask = args->createhow4_u.createattrs.attrmask;
6526 
6527                 /*
6528                  * True verification that object was created with correct
6529                  * attrs is impossible.  The attrs could have been changed
6530                  * immediately after object creation.  If attributes did
6531                  * not verify, the only recourse for the server is to
6532                  * destroy the object.  Maybe if some attrs (like gid)
6533                  * are set incorrectly, the object should be destroyed;
6534                  * however, seems bad as a default policy.  Do we really
6535                  * want to destroy an object over one of the times not
6536                  * verifying correctly?  For these reasons, the server
6537                  * currently sets bits in attrset for createattrs
6538                  * that were set; however, no verification is done.
6539                  *
6540                  * vmask_to_nmask accounts for vattr bits set on create
6541                  *      [do_rfs4_set_attrs() only sets resp bits for
6542                  *       non-vattr/vfs bits.]
6543                  * Mask off any bits we set by default so as not to return
6544                  * more attrset bits than were requested in createattrs
6545                  */
6546                 if (created) {
6547                         nfs4_vmask_to_nmask(sarg.vap->va_mask, attrset);
6548                         *attrset &= createmask;
6549                 } else {
6550                         /*
6551                          * We did not create the vnode (we tried but it
6552                          * already existed).  In this case, the only createattr
6553                          * that the spec allows the server to set is size,
6554                          * and even then, it can only be set if it is 0.
6555                          */
6556                         *attrset = 0;
6557                         if (trunc)
6558                                 *attrset = FATTR4_SIZE_MASK;
6559                 }
6560         }
6561         if (ntov_table_init)
6562                 nfs4_ntov_table_free(&ntov, &sarg);
6563 
6564         /*
6565          * Get the initial "after" sequence number, if it fails,
6566          * set to zero, time to before.
6567          */
6568         iva.va_mask = AT_CTIME|AT_SEQ;
6569         if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL)) {
6570                 iva.va_seq = 0;
6571                 iva.va_ctime = bva.va_ctime;
6572         }
6573 
6574         /*
6575          * create_vnode attempts to create the file exclusive,
6576          * if it already exists the VOP_CREATE will fail and
6577          * may not increase va_seq. It is atomic if
6578          * we haven't changed the directory, but if it has changed
6579          * we don't know what changed it.
6580          */
6581         if (!created) {
6582                 if (bva.va_seq && iva.va_seq &&
6583                     bva.va_seq == iva.va_seq)
6584                         cinfo->atomic = TRUE;
6585                 else
6586                         cinfo->atomic = FALSE;
6587                 NFS4_SET_FATTR4_CHANGE(cinfo->after, iva.va_ctime);
6588         } else {
6589                 /*
6590                  * The entry was created, we need to sync the
6591                  * directory metadata.
6592                  */
6593                 (void) VOP_FSYNC(dvp, 0, cs->cr, NULL);
6594 
6595                 /*
6596                  * Get "after" change value, if it fails, simply return the
6597                  * before value.
6598                  */
6599                 ava.va_mask = AT_CTIME|AT_SEQ;
6600                 if (VOP_GETATTR(dvp, &ava, 0, cs->cr, NULL)) {
6601                         ava.va_ctime = bva.va_ctime;
6602                         ava.va_seq = 0;
6603                 }
6604 
6605                 NFS4_SET_FATTR4_CHANGE(cinfo->after, ava.va_ctime);
6606 
6607                 /*
6608                  * The cinfo->atomic = TRUE only if we have
6609                  * non-zero va_seq's, and it has incremented by exactly one
6610                  * during the create_vnode and it didn't
6611                  * change during the VOP_FSYNC.
6612                  */
6613                 if (bva.va_seq && iva.va_seq && ava.va_seq &&
6614                     iva.va_seq == (bva.va_seq + 1) && iva.va_seq == ava.va_seq)
6615                         cinfo->atomic = TRUE;
6616                 else
6617                         cinfo->atomic = FALSE;
6618         }
6619 
6620         /* Check for mandatory locking and that the size gets set. */
6621         cva.va_mask = AT_MODE;
6622         if (setsize)
6623                 cva.va_mask |= AT_SIZE;
6624 
6625         /* Assume the worst */
6626         cs->mandlock = TRUE;
6627 
6628         if (VOP_GETATTR(vp, &cva, 0, cs->cr, NULL) == 0) {
6629                 cs->mandlock = MANDLOCK(cs->vp, cva.va_mode);
6630 
6631                 /*
6632                  * Truncate the file if necessary; this would be
6633                  * the case for create over an existing file.
6634                  */
6635 
6636                 if (trunc) {
6637                         int in_crit = 0;
6638                         rfs4_file_t *fp;
6639                         nfs4_srv_t *nsrv4;
6640                         bool_t create = FALSE;
6641 
6642                         /*
6643                          * We are writing over an existing file.
6644                          * Check to see if we need to recall a delegation.
6645                          */
6646                         nsrv4 = nfs4_get_srv();
6647                         rfs4_hold_deleg_policy(nsrv4);
6648                         if ((fp = rfs4_findfile(vp, NULL, &create)) != NULL) {
6649                                 if (rfs4_check_delegated_byfp(FWRITE, fp,
6650                                     (reqsize == 0), FALSE, FALSE, &clientid)) {
6651                                         rfs4_file_rele(fp);
6652                                         rfs4_rele_deleg_policy(nsrv4);
6653                                         VN_RELE(vp);
6654                                         *attrset = 0;
6655                                         return (NFS4ERR_DELAY);
6656                                 }
6657                                 rfs4_file_rele(fp);
6658                         }
6659                         rfs4_rele_deleg_policy(nsrv4);
6660 
6661                         if (nbl_need_check(vp)) {
6662                                 in_crit = 1;
6663 
6664                                 ASSERT(reqsize == 0);
6665 
6666                                 nbl_start_crit(vp, RW_READER);
6667                                 if (nbl_conflict(vp, NBL_WRITE, 0,
6668                                     cva.va_size, 0, NULL)) {
6669                                         in_crit = 0;
6670                                         nbl_end_crit(vp);
6671                                         VN_RELE(vp);
6672                                         *attrset = 0;
6673                                         return (NFS4ERR_ACCESS);
6674                                 }
6675                         }
6676                         ct.cc_sysid = 0;
6677                         ct.cc_pid = 0;
6678                         ct.cc_caller_id = nfs4_srv_caller_id;
6679                         ct.cc_flags = CC_DONTBLOCK;
6680 
6681                         cva.va_mask = AT_SIZE;
6682                         cva.va_size = reqsize;
6683                         (void) VOP_SETATTR(vp, &cva, 0, cs->cr, &ct);
6684                         if (in_crit)
6685                                 nbl_end_crit(vp);
6686                 }
6687         }
6688 
6689         error = makefh4(&cs->fh, vp, cs->exi);
6690 
6691         /*
6692          * Force modified data and metadata out to stable storage.
6693          */
6694         (void) VOP_FSYNC(vp, FNODSYNC, cs->cr, NULL);
6695 
6696         if (error) {
6697                 VN_RELE(vp);
6698                 *attrset = 0;
6699                 return (puterrno4(error));
6700         }
6701 
6702         /* if parent dir is attrdir, set namedattr fh flag */
6703         if (dvp->v_flag & V_XATTRDIR)
6704                 set_fh4_flag(&cs->fh, FH4_NAMEDATTR);
6705 
6706         if (cs->vp)
6707                 VN_RELE(cs->vp);
6708 
6709         cs->vp = vp;
6710 
6711         /*
6712          * if we did not create the file, we will need to check
6713          * the access bits on the file
6714          */
6715 
6716         if (!created) {
6717                 if (setsize)
6718                         args->share_access |= OPEN4_SHARE_ACCESS_WRITE;
6719                 status = check_open_access(args->share_access, cs, req);
6720                 if (status != NFS4_OK)
6721                         *attrset = 0;
6722         }
6723         return (status);
6724 }
6725 
6726 /*ARGSUSED*/
6727 static void
6728 rfs4_do_open(struct compound_state *cs, struct svc_req *req,
6729     rfs4_openowner_t *oo, delegreq_t deleg,
6730     uint32_t access, uint32_t deny,
6731     OPEN4res *resp, int deleg_cur)
6732 {
6733         /* XXX Currently not using req  */
6734         rfs4_state_t *sp;
6735         rfs4_file_t *fp;
6736         bool_t screate = TRUE;
6737         bool_t fcreate = TRUE;
6738         uint32_t open_a, share_a;
6739         uint32_t open_d, share_d;
6740         rfs4_deleg_state_t *dsp;
6741         sysid_t sysid;
6742         nfsstat4 status;
6743         caller_context_t ct;
6744         int fflags = 0;
6745         int recall = 0;
6746         int err;
6747         int first_open;
6748 
6749         /* get the file struct and hold a lock on it during initial open */
6750         fp = rfs4_findfile_withlock(cs->vp, &cs->fh, &fcreate);
6751         if (fp == NULL) {
6752                 resp->status = NFS4ERR_RESOURCE;
6753                 DTRACE_PROBE1(nfss__e__do__open1, nfsstat4, resp->status);
6754                 return;
6755         }
6756 
6757         sp = rfs4_findstate_by_owner_file(oo, fp, &screate);
6758         if (sp == NULL) {
6759                 resp->status = NFS4ERR_RESOURCE;
6760                 DTRACE_PROBE1(nfss__e__do__open2, nfsstat4, resp->status);
6761                 /* No need to keep any reference */
6762                 rw_exit(&fp->rf_file_rwlock);
6763                 rfs4_file_rele(fp);
6764                 return;
6765         }
6766 
6767         /* try to get the sysid before continuing */
6768         if ((status = rfs4_client_sysid(oo->ro_client, &sysid)) != NFS4_OK) {
6769                 resp->status = status;
6770                 rfs4_file_rele(fp);
6771                 /* Not a fully formed open; "close" it */
6772                 if (screate == TRUE)
6773                         rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6774                 rfs4_state_rele(sp);
6775                 return;
6776         }
6777 
6778         /* Calculate the fflags for this OPEN. */
6779         if (access & OPEN4_SHARE_ACCESS_READ)
6780                 fflags |= FREAD;
6781         if (access & OPEN4_SHARE_ACCESS_WRITE)
6782                 fflags |= FWRITE;
6783 
6784         rfs4_dbe_lock(sp->rs_dbe);
6785 
6786         /*
6787          * Calculate the new deny and access mode that this open is adding to
6788          * the file for this open owner;
6789          */
6790         open_d = (deny & ~sp->rs_open_deny);
6791         open_a = (access & ~sp->rs_open_access);
6792 
6793         /*
6794          * Calculate the new share access and share deny modes that this open
6795          * is adding to the file for this open owner;
6796          */
6797         share_a = (access & ~sp->rs_share_access);
6798         share_d = (deny & ~sp->rs_share_deny);
6799 
6800         first_open = (sp->rs_open_access & OPEN4_SHARE_ACCESS_BOTH) == 0;
6801 
6802         /*
6803          * Check to see the client has already sent an open for this
6804          * open owner on this file with the same share/deny modes.
6805          * If so, we don't need to check for a conflict and we don't
6806          * need to add another shrlock.  If not, then we need to
6807          * check for conflicts in deny and access before checking for
6808          * conflicts in delegation.  We don't want to recall a
6809          * delegation based on an open that will eventually fail based
6810          * on shares modes.
6811          */
6812 
6813         if (share_a || share_d) {
6814                 if ((err = rfs4_share(sp, access, deny)) != 0) {
6815                         rfs4_dbe_unlock(sp->rs_dbe);
6816                         resp->status = err;
6817 
6818                         rfs4_file_rele(fp);
6819                         /* Not a fully formed open; "close" it */
6820                         if (screate == TRUE)
6821                                 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6822                         rfs4_state_rele(sp);
6823                         return;
6824                 }
6825         }
6826 
6827         rfs4_dbe_lock(fp->rf_dbe);
6828 
6829         /*
6830          * Check to see if this file is delegated and if so, if a
6831          * recall needs to be done.
6832          */
6833         if (rfs4_check_recall(sp, access)) {
6834                 rfs4_dbe_unlock(fp->rf_dbe);
6835                 rfs4_dbe_unlock(sp->rs_dbe);
6836                 rfs4_recall_deleg(fp, FALSE, sp->rs_owner->ro_client);
6837                 delay(NFS4_DELEGATION_CONFLICT_DELAY);
6838                 rfs4_dbe_lock(sp->rs_dbe);
6839 
6840                 /* if state closed while lock was dropped */
6841                 if (sp->rs_closed) {
6842                         if (share_a || share_d)
6843                                 (void) rfs4_unshare(sp);
6844                         rfs4_dbe_unlock(sp->rs_dbe);
6845                         rfs4_file_rele(fp);
6846                         /* Not a fully formed open; "close" it */
6847                         if (screate == TRUE)
6848                                 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6849                         rfs4_state_rele(sp);
6850                         resp->status = NFS4ERR_OLD_STATEID;
6851                         return;
6852                 }
6853 
6854                 rfs4_dbe_lock(fp->rf_dbe);
6855                 /* Let's see if the delegation was returned */
6856                 if (rfs4_check_recall(sp, access)) {
6857                         rfs4_dbe_unlock(fp->rf_dbe);
6858                         if (share_a || share_d)
6859                                 (void) rfs4_unshare(sp);
6860                         rfs4_dbe_unlock(sp->rs_dbe);
6861                         rfs4_file_rele(fp);
6862                         rfs4_update_lease(sp->rs_owner->ro_client);
6863 
6864                         /* Not a fully formed open; "close" it */
6865                         if (screate == TRUE)
6866                                 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6867                         rfs4_state_rele(sp);
6868                         resp->status = NFS4ERR_DELAY;
6869                         return;
6870                 }
6871         }
6872         /*
6873          * the share check passed and any delegation conflict has been
6874          * taken care of, now call vop_open.
6875          * if this is the first open then call vop_open with fflags.
6876          * if not, call vn_open_upgrade with just the upgrade flags.
6877          *
6878          * if the file has been opened already, it will have the current
6879          * access mode in the state struct.  if it has no share access, then
6880          * this is a new open.
6881          *
6882          * However, if this is open with CLAIM_DLEGATE_CUR, then don't
6883          * call VOP_OPEN(), just do the open upgrade.
6884          */
6885         if (first_open && !deleg_cur) {
6886                 ct.cc_sysid = sysid;
6887                 ct.cc_pid = rfs4_dbe_getid(sp->rs_owner->ro_dbe);
6888                 ct.cc_caller_id = nfs4_srv_caller_id;
6889                 ct.cc_flags = CC_DONTBLOCK;
6890                 err = VOP_OPEN(&cs->vp, fflags, cs->cr, &ct);
6891                 if (err) {
6892                         rfs4_dbe_unlock(fp->rf_dbe);
6893                         if (share_a || share_d)
6894                                 (void) rfs4_unshare(sp);
6895                         rfs4_dbe_unlock(sp->rs_dbe);
6896                         rfs4_file_rele(fp);
6897 
6898                         /* Not a fully formed open; "close" it */
6899                         if (screate == TRUE)
6900                                 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6901                         rfs4_state_rele(sp);
6902                         /* check if a monitor detected a delegation conflict */
6903                         if (err == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK))
6904                                 resp->status = NFS4ERR_DELAY;
6905                         else
6906                                 resp->status = NFS4ERR_SERVERFAULT;
6907                         return;
6908                 }
6909         } else { /* open upgrade */
6910                 /*
6911                  * calculate the fflags for the new mode that is being added
6912                  * by this upgrade.
6913                  */
6914                 fflags = 0;
6915                 if (open_a & OPEN4_SHARE_ACCESS_READ)
6916                         fflags |= FREAD;
6917                 if (open_a & OPEN4_SHARE_ACCESS_WRITE)
6918                         fflags |= FWRITE;
6919                 vn_open_upgrade(cs->vp, fflags);
6920         }
6921         sp->rs_open_access |= access;
6922         sp->rs_open_deny |= deny;
6923 
6924         if (open_d & OPEN4_SHARE_DENY_READ)
6925                 fp->rf_deny_read++;
6926         if (open_d & OPEN4_SHARE_DENY_WRITE)
6927                 fp->rf_deny_write++;
6928         fp->rf_share_deny |= deny;
6929 
6930         if (open_a & OPEN4_SHARE_ACCESS_READ)
6931                 fp->rf_access_read++;
6932         if (open_a & OPEN4_SHARE_ACCESS_WRITE)
6933                 fp->rf_access_write++;
6934         fp->rf_share_access |= access;
6935 
6936         /*
6937          * Check for delegation here. if the deleg argument is not
6938          * DELEG_ANY, then this is a reclaim from a client and
6939          * we must honor the delegation requested. If necessary we can
6940          * set the recall flag.
6941          */
6942 
6943         dsp = rfs4_grant_delegation(deleg, sp, &recall);
6944 
6945         cs->deleg = (fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_WRITE);
6946 
6947         next_stateid(&sp->rs_stateid);
6948 
6949         resp->stateid = sp->rs_stateid.stateid;
6950 
6951         rfs4_dbe_unlock(fp->rf_dbe);
6952         rfs4_dbe_unlock(sp->rs_dbe);
6953 
6954         if (dsp) {
6955                 rfs4_set_deleg_response(dsp, &resp->delegation, NULL, recall);
6956                 rfs4_deleg_state_rele(dsp);
6957         }
6958 
6959         rfs4_file_rele(fp);
6960         rfs4_state_rele(sp);
6961 
6962         resp->status = NFS4_OK;
6963 }
6964 
6965 /*ARGSUSED*/
6966 static void
6967 rfs4_do_opennull(struct compound_state *cs, struct svc_req *req,
6968     OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
6969 {
6970         change_info4 *cinfo = &resp->cinfo;
6971         bitmap4 *attrset = &resp->attrset;
6972 
6973         if (args->opentype == OPEN4_NOCREATE)
6974                 resp->status = rfs4_lookupfile(&args->open_claim4_u.file,
6975                     req, cs, args->share_access, cinfo);
6976         else {
6977                 /* inhibit delegation grants during exclusive create */
6978 
6979                 if (args->mode == EXCLUSIVE4)
6980                         rfs4_disable_delegation();
6981 
6982                 resp->status = rfs4_createfile(args, req, cs, cinfo, attrset,
6983                     oo->ro_client->rc_clientid);
6984         }
6985 
6986         if (resp->status == NFS4_OK) {
6987 
6988                 /* cs->vp cs->fh now reference the desired file */
6989 
6990                 rfs4_do_open(cs, req, oo,
6991                     oo->ro_need_confirm ? DELEG_NONE : DELEG_ANY,
6992                     args->share_access, args->share_deny, resp, 0);
6993 
6994                 /*
6995                  * If rfs4_createfile set attrset, we must
6996                  * clear this attrset before the response is copied.
6997                  */
6998                 if (resp->status != NFS4_OK && resp->attrset) {
6999                         resp->attrset = 0;
7000                 }
7001         }
7002         else
7003                 *cs->statusp = resp->status;
7004 
7005         if (args->mode == EXCLUSIVE4)
7006                 rfs4_enable_delegation();
7007 }
7008 
7009 /*ARGSUSED*/
7010 static void
7011 rfs4_do_openprev(struct compound_state *cs, struct svc_req *req,
7012     OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
7013 {
7014         change_info4 *cinfo = &resp->cinfo;
7015         vattr_t va;
7016         vtype_t v_type = cs->vp->v_type;
7017         int error = 0;
7018 
7019         /* Verify that we have a regular file */
7020         if (v_type != VREG) {
7021                 if (v_type == VDIR)
7022                         resp->status = NFS4ERR_ISDIR;
7023                 else if (v_type == VLNK)
7024                         resp->status = NFS4ERR_SYMLINK;
7025                 else
7026                         resp->status = NFS4ERR_INVAL;
7027                 return;
7028         }
7029 
7030         va.va_mask = AT_MODE|AT_UID;
7031         error = VOP_GETATTR(cs->vp, &va, 0, cs->cr, NULL);
7032         if (error) {
7033                 resp->status = puterrno4(error);
7034                 return;
7035         }
7036 
7037         cs->mandlock = MANDLOCK(cs->vp, va.va_mode);
7038 
7039         /*
7040          * Check if we have access to the file, Note the the file
7041          * could have originally been open UNCHECKED or GUARDED
7042          * with mode bits that will now fail, but there is nothing
7043          * we can really do about that except in the case that the
7044          * owner of the file is the one requesting the open.
7045          */
7046         if (crgetuid(cs->cr) != va.va_uid) {
7047                 resp->status = check_open_access(args->share_access, cs, req);
7048                 if (resp->status != NFS4_OK) {
7049                         return;
7050                 }
7051         }
7052 
7053         /*
7054          * cinfo on a CLAIM_PREVIOUS is undefined, initialize to zero
7055          */
7056         cinfo->before = 0;
7057         cinfo->after = 0;
7058         cinfo->atomic = FALSE;
7059 
7060         rfs4_do_open(cs, req, oo,
7061             NFS4_DELEG4TYPE2REQTYPE(args->open_claim4_u.delegate_type),
7062             args->share_access, args->share_deny, resp, 0);
7063 }
7064 
7065 static void
7066 rfs4_do_opendelcur(struct compound_state *cs, struct svc_req *req,
7067     OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
7068 {
7069         int error;
7070         nfsstat4 status;
7071         stateid4 stateid =
7072             args->open_claim4_u.delegate_cur_info.delegate_stateid;
7073         rfs4_deleg_state_t *dsp;
7074 
7075         /*
7076          * Find the state info from the stateid and confirm that the
7077          * file is delegated.  If the state openowner is the same as
7078          * the supplied openowner we're done. If not, get the file
7079          * info from the found state info. Use that file info to
7080          * create the state for this lock owner. Note solaris doen't
7081          * really need the pathname to find the file. We may want to
7082          * lookup the pathname and make sure that the vp exist and
7083          * matches the vp in the file structure. However it is
7084          * possible that the pathname nolonger exists (local process
7085          * unlinks the file), so this may not be that useful.
7086          */
7087 
7088         status = rfs4_get_deleg_state(&stateid, &dsp);
7089         if (status != NFS4_OK) {
7090                 resp->status = status;
7091                 return;
7092         }
7093 
7094         ASSERT(dsp->rds_finfo->rf_dinfo.rd_dtype != OPEN_DELEGATE_NONE);
7095 
7096         /*
7097          * New lock owner, create state. Since this was probably called
7098          * in response to a CB_RECALL we set deleg to DELEG_NONE
7099          */
7100 
7101         ASSERT(cs->vp != NULL);
7102         VN_RELE(cs->vp);
7103         VN_HOLD(dsp->rds_finfo->rf_vp);
7104         cs->vp = dsp->rds_finfo->rf_vp;
7105 
7106         if (error = makefh4(&cs->fh, cs->vp, cs->exi)) {
7107                 rfs4_deleg_state_rele(dsp);
7108                 *cs->statusp = resp->status = puterrno4(error);
7109                 return;
7110         }
7111 
7112         /* Mark progress for delegation returns */
7113         dsp->rds_finfo->rf_dinfo.rd_time_lastwrite = gethrestime_sec();
7114         rfs4_deleg_state_rele(dsp);
7115         rfs4_do_open(cs, req, oo, DELEG_NONE,
7116             args->share_access, args->share_deny, resp, 1);
7117 }
7118 
7119 /*ARGSUSED*/
7120 static void
7121 rfs4_do_opendelprev(struct compound_state *cs, struct svc_req *req,
7122     OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
7123 {
7124         /*
7125          * Lookup the pathname, it must already exist since this file
7126          * was delegated.
7127          *
7128          * Find the file and state info for this vp and open owner pair.
7129          *      check that they are in fact delegated.
7130          *      check that the state access and deny modes are the same.
7131          *
7132          * Return the delgation possibly seting the recall flag.
7133          */
7134         rfs4_file_t *fp;
7135         rfs4_state_t *sp;
7136         bool_t create = FALSE;
7137         bool_t dcreate = FALSE;
7138         rfs4_deleg_state_t *dsp;
7139         nfsace4 *ace;
7140 
7141         /* Note we ignore oflags */
7142         resp->status = rfs4_lookupfile(&args->open_claim4_u.file_delegate_prev,
7143             req, cs, args->share_access, &resp->cinfo);
7144 
7145         if (resp->status != NFS4_OK) {
7146                 return;
7147         }
7148 
7149         /* get the file struct and hold a lock on it during initial open */
7150         fp = rfs4_findfile_withlock(cs->vp, NULL, &create);
7151         if (fp == NULL) {
7152                 resp->status = NFS4ERR_RESOURCE;
7153                 DTRACE_PROBE1(nfss__e__do_opendelprev1, nfsstat4, resp->status);
7154                 return;
7155         }
7156 
7157         sp = rfs4_findstate_by_owner_file(oo, fp, &create);
7158         if (sp == NULL) {
7159                 resp->status = NFS4ERR_SERVERFAULT;
7160                 DTRACE_PROBE1(nfss__e__do_opendelprev2, nfsstat4, resp->status);
7161                 rw_exit(&fp->rf_file_rwlock);
7162                 rfs4_file_rele(fp);
7163                 return;
7164         }
7165 
7166         rfs4_dbe_lock(sp->rs_dbe);
7167         rfs4_dbe_lock(fp->rf_dbe);
7168         if (args->share_access != sp->rs_share_access ||
7169             args->share_deny != sp->rs_share_deny ||
7170             sp->rs_finfo->rf_dinfo.rd_dtype == OPEN_DELEGATE_NONE) {
7171                 NFS4_DEBUG(rfs4_debug,
7172                     (CE_NOTE, "rfs4_do_opendelprev: state mixup"));
7173                 rfs4_dbe_unlock(fp->rf_dbe);
7174                 rfs4_dbe_unlock(sp->rs_dbe);
7175                 rfs4_file_rele(fp);
7176                 rfs4_state_rele(sp);
7177                 resp->status = NFS4ERR_SERVERFAULT;
7178                 return;
7179         }
7180         rfs4_dbe_unlock(fp->rf_dbe);
7181         rfs4_dbe_unlock(sp->rs_dbe);
7182 
7183         dsp = rfs4_finddeleg(sp, &dcreate);
7184         if (dsp == NULL) {
7185                 rfs4_state_rele(sp);
7186                 rfs4_file_rele(fp);
7187                 resp->status = NFS4ERR_SERVERFAULT;
7188                 return;
7189         }
7190 
7191         next_stateid(&sp->rs_stateid);
7192 
7193         resp->stateid = sp->rs_stateid.stateid;
7194 
7195         resp->delegation.delegation_type = dsp->rds_dtype;
7196 
7197         if (dsp->rds_dtype == OPEN_DELEGATE_READ) {
7198                 open_read_delegation4 *rv =
7199                     &resp->delegation.open_delegation4_u.read;
7200 
7201                 rv->stateid = dsp->rds_delegid.stateid;
7202                 rv->recall = FALSE; /* no policy in place to set to TRUE */
7203                 ace = &rv->permissions;
7204         } else {
7205                 open_write_delegation4 *rv =
7206                     &resp->delegation.open_delegation4_u.write;
7207 
7208                 rv->stateid = dsp->rds_delegid.stateid;
7209                 rv->recall = FALSE;  /* no policy in place to set to TRUE */
7210                 ace = &rv->permissions;
7211                 rv->space_limit.limitby = NFS_LIMIT_SIZE;
7212                 rv->space_limit.nfs_space_limit4_u.filesize = UINT64_MAX;
7213         }
7214 
7215         /* XXX For now */
7216         ace->type = ACE4_ACCESS_ALLOWED_ACE_TYPE;
7217         ace->flag = 0;
7218         ace->access_mask = 0;
7219         ace->who.utf8string_len = 0;
7220         ace->who.utf8string_val = 0;
7221 
7222         rfs4_deleg_state_rele(dsp);
7223         rfs4_state_rele(sp);
7224         rfs4_file_rele(fp);
7225 }
7226 
7227 typedef enum {
7228         NFS4_CHKSEQ_OKAY = 0,
7229         NFS4_CHKSEQ_REPLAY = 1,
7230         NFS4_CHKSEQ_BAD = 2
7231 } rfs4_chkseq_t;
7232 
7233 /*
7234  * Generic function for sequence number checks.
7235  */
7236 static rfs4_chkseq_t
7237 rfs4_check_seqid(seqid4 seqid, nfs_resop4 *lastop,
7238     seqid4 rqst_seq, nfs_resop4 *resop, bool_t copyres)
7239 {
7240         /* Same sequence ids and matching operations? */
7241         if (seqid == rqst_seq && resop->resop == lastop->resop) {
7242                 if (copyres == TRUE) {
7243                         rfs4_free_reply(resop);
7244                         rfs4_copy_reply(resop, lastop);
7245                 }
7246                 NFS4_DEBUG(rfs4_debug, (CE_NOTE,
7247                     "Replayed SEQID %d\n", seqid));
7248                 return (NFS4_CHKSEQ_REPLAY);
7249         }
7250 
7251         /* If the incoming sequence is not the next expected then it is bad */
7252         if (rqst_seq != seqid + 1) {
7253                 if (rqst_seq == seqid) {
7254                         NFS4_DEBUG(rfs4_debug,
7255                             (CE_NOTE, "BAD SEQID: Replayed sequence id "
7256                             "but last op was %d current op is %d\n",
7257                             lastop->resop, resop->resop));
7258                         return (NFS4_CHKSEQ_BAD);
7259                 }
7260                 NFS4_DEBUG(rfs4_debug,
7261                     (CE_NOTE, "BAD SEQID: got %u expecting %u\n",
7262                     rqst_seq, seqid));
7263                 return (NFS4_CHKSEQ_BAD);
7264         }
7265 
7266         /* Everything okay -- next expected */
7267         return (NFS4_CHKSEQ_OKAY);
7268 }
7269 
7270 
7271 static rfs4_chkseq_t
7272 rfs4_check_open_seqid(seqid4 seqid, rfs4_openowner_t *op, nfs_resop4 *resop)
7273 {
7274         rfs4_chkseq_t rc;
7275 
7276         rfs4_dbe_lock(op->ro_dbe);
7277         rc = rfs4_check_seqid(op->ro_open_seqid, &op->ro_reply, seqid, resop,
7278             TRUE);
7279         rfs4_dbe_unlock(op->ro_dbe);
7280 
7281         if (rc == NFS4_CHKSEQ_OKAY)
7282                 rfs4_update_lease(op->ro_client);
7283 
7284         return (rc);
7285 }
7286 
7287 static rfs4_chkseq_t
7288 rfs4_check_olo_seqid(seqid4 olo_seqid, rfs4_openowner_t *op, nfs_resop4 *resop)
7289 {
7290         rfs4_chkseq_t rc;
7291 
7292         rfs4_dbe_lock(op->ro_dbe);
7293         rc = rfs4_check_seqid(op->ro_open_seqid, &op->ro_reply,
7294             olo_seqid, resop, FALSE);
7295         rfs4_dbe_unlock(op->ro_dbe);
7296 
7297         return (rc);
7298 }
7299 
7300 static rfs4_chkseq_t
7301 rfs4_check_lock_seqid(seqid4 seqid, rfs4_lo_state_t *lsp, nfs_resop4 *resop)
7302 {
7303         rfs4_chkseq_t rc = NFS4_CHKSEQ_OKAY;
7304 
7305         rfs4_dbe_lock(lsp->rls_dbe);
7306         if (!lsp->rls_skip_seqid_check)
7307                 rc = rfs4_check_seqid(lsp->rls_seqid, &lsp->rls_reply, seqid,
7308                     resop, TRUE);
7309         rfs4_dbe_unlock(lsp->rls_dbe);
7310 
7311         return (rc);
7312 }
7313 
7314 static void
7315 rfs4_op_open(nfs_argop4 *argop, nfs_resop4 *resop,
7316     struct svc_req *req, struct compound_state *cs)
7317 {
7318         OPEN4args *args = &argop->nfs_argop4_u.opopen;
7319         OPEN4res *resp = &resop->nfs_resop4_u.opopen;
7320         open_owner4 *owner = &args->owner;
7321         open_claim_type4 claim = args->claim;
7322         rfs4_client_t *cp;
7323         rfs4_openowner_t *oo;
7324         bool_t create;
7325         bool_t replay = FALSE;
7326         int can_reclaim;
7327 
7328         DTRACE_NFSV4_2(op__open__start, struct compound_state *, cs,
7329             OPEN4args *, args);
7330 
7331         if (cs->vp == NULL) {
7332                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
7333                 goto end;
7334         }
7335 
7336         /*
7337          * Need to check clientid and lease expiration first based on
7338          * error ordering and incrementing sequence id.
7339          */
7340         cp = rfs4_findclient_by_id(owner->clientid, FALSE);
7341         if (cp == NULL) {
7342                 *cs->statusp = resp->status =
7343                     rfs4_check_clientid(&owner->clientid, 0);
7344                 goto end;
7345         }
7346 
7347         if (rfs4_lease_expired(cp)) {
7348                 rfs4_client_close(cp);
7349                 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
7350                 goto end;
7351         }
7352         can_reclaim = cp->rc_can_reclaim;
7353 
7354         /*
7355          * Find the open_owner for use from this point forward.  Take
7356          * care in updating the sequence id based on the type of error
7357          * being returned.
7358          */
7359 retry:
7360         create = TRUE;
7361         oo = rfs4_findopenowner(owner, &create, args->seqid);
7362         if (oo == NULL) {
7363                 *cs->statusp = resp->status = NFS4ERR_RESOURCE;
7364                 rfs4_client_rele(cp);
7365                 goto end;
7366         }
7367 
7368         /* Hold off access to the sequence space while the open is done */
7369         rfs4_sw_enter(&oo->ro_sw);
7370 
7371         /*
7372          * If the open_owner existed before at the server, then check
7373          * the sequence id.
7374          */
7375         if (!create && !oo->ro_postpone_confirm) {
7376                 switch (rfs4_check_open_seqid(args->seqid, oo, resop)) {
7377                 case NFS4_CHKSEQ_BAD:
7378                         if ((args->seqid > oo->ro_open_seqid) &&
7379                             oo->ro_need_confirm) {
7380                                 rfs4_free_opens(oo, TRUE, FALSE);
7381                                 rfs4_sw_exit(&oo->ro_sw);
7382                                 rfs4_openowner_rele(oo);
7383                                 goto retry;
7384                         }
7385                         resp->status = NFS4ERR_BAD_SEQID;
7386                         goto out;
7387                 case NFS4_CHKSEQ_REPLAY: /* replay of previous request */
7388                         replay = TRUE;
7389                         goto out;
7390                 default:
7391                         break;
7392                 }
7393 
7394                 /*
7395                  * Sequence was ok and open owner exists
7396                  * check to see if we have yet to see an
7397                  * open_confirm.
7398                  */
7399                 if (oo->ro_need_confirm) {
7400                         rfs4_free_opens(oo, TRUE, FALSE);
7401                         rfs4_sw_exit(&oo->ro_sw);
7402                         rfs4_openowner_rele(oo);
7403                         goto retry;
7404                 }
7405         }
7406         /* Grace only applies to regular-type OPENs */
7407         if (rfs4_clnt_in_grace(cp) &&
7408             (claim == CLAIM_NULL || claim == CLAIM_DELEGATE_CUR)) {
7409                 *cs->statusp = resp->status = NFS4ERR_GRACE;
7410                 goto out;
7411         }
7412 
7413         /*
7414          * If previous state at the server existed then can_reclaim
7415          * will be set. If not reply NFS4ERR_NO_GRACE to the
7416          * client.
7417          */
7418         if (rfs4_clnt_in_grace(cp) && claim == CLAIM_PREVIOUS && !can_reclaim) {
7419                 *cs->statusp = resp->status = NFS4ERR_NO_GRACE;
7420                 goto out;
7421         }
7422 
7423 
7424         /*
7425          * Reject the open if the client has missed the grace period
7426          */
7427         if (!rfs4_clnt_in_grace(cp) && claim == CLAIM_PREVIOUS) {
7428                 *cs->statusp = resp->status = NFS4ERR_NO_GRACE;
7429                 goto out;
7430         }
7431 
7432         /* Couple of up-front bookkeeping items */
7433         if (oo->ro_need_confirm) {
7434                 /*
7435                  * If this is a reclaim OPEN then we should not ask
7436                  * for a confirmation of the open_owner per the
7437                  * protocol specification.
7438                  */
7439                 if (claim == CLAIM_PREVIOUS)
7440                         oo->ro_need_confirm = FALSE;
7441                 else
7442                         resp->rflags |= OPEN4_RESULT_CONFIRM;
7443         }
7444         resp->rflags |= OPEN4_RESULT_LOCKTYPE_POSIX;
7445 
7446         /*
7447          * If there is an unshared filesystem mounted on this vnode,
7448          * do not allow to open/create in this directory.
7449          */
7450         if (vn_ismntpt(cs->vp)) {
7451                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
7452                 goto out;
7453         }
7454 
7455         /*
7456          * access must READ, WRITE, or BOTH.  No access is invalid.
7457          * deny can be READ, WRITE, BOTH, or NONE.
7458          * bits not defined for access/deny are invalid.
7459          */
7460         if (! (args->share_access & OPEN4_SHARE_ACCESS_BOTH) ||
7461             (args->share_access & ~OPEN4_SHARE_ACCESS_BOTH) ||
7462             (args->share_deny & ~OPEN4_SHARE_DENY_BOTH)) {
7463                 *cs->statusp = resp->status = NFS4ERR_INVAL;
7464                 goto out;
7465         }
7466 
7467 
7468         /*
7469          * make sure attrset is zero before response is built.
7470          */
7471         resp->attrset = 0;
7472 
7473         switch (claim) {
7474         case CLAIM_NULL:
7475                 rfs4_do_opennull(cs, req, args, oo, resp);
7476                 break;
7477         case CLAIM_PREVIOUS:
7478                 rfs4_do_openprev(cs, req, args, oo, resp);
7479                 break;
7480         case CLAIM_DELEGATE_CUR:
7481                 rfs4_do_opendelcur(cs, req, args, oo, resp);
7482                 break;
7483         case CLAIM_DELEGATE_PREV:
7484                 rfs4_do_opendelprev(cs, req, args, oo, resp);
7485                 break;
7486         default:
7487                 resp->status = NFS4ERR_INVAL;
7488                 break;
7489         }
7490 
7491 out:
7492         rfs4_client_rele(cp);
7493 
7494         /* Catch sequence id handling here to make it a little easier */
7495         switch (resp->status) {
7496         case NFS4ERR_BADXDR:
7497         case NFS4ERR_BAD_SEQID:
7498         case NFS4ERR_BAD_STATEID:
7499         case NFS4ERR_NOFILEHANDLE:
7500         case NFS4ERR_RESOURCE:
7501         case NFS4ERR_STALE_CLIENTID:
7502         case NFS4ERR_STALE_STATEID:
7503                 /*
7504                  * The protocol states that if any of these errors are
7505                  * being returned, the sequence id should not be
7506                  * incremented.  Any other return requires an
7507                  * increment.
7508                  */
7509                 break;
7510         default:
7511                 /* Always update the lease in this case */
7512                 rfs4_update_lease(oo->ro_client);
7513 
7514                 /* Regular response - copy the result */
7515                 if (!replay)
7516                         rfs4_update_open_resp(oo, resop, &cs->fh);
7517 
7518                 /*
7519                  * REPLAY case: Only if the previous response was OK
7520                  * do we copy the filehandle.  If not OK, no
7521                  * filehandle to copy.
7522                  */
7523                 if (replay == TRUE &&
7524                     resp->status == NFS4_OK &&
7525                     oo->ro_reply_fh.nfs_fh4_val) {
7526                         /*
7527                          * If this is a replay, we must restore the
7528                          * current filehandle/vp to that of what was
7529                          * returned originally.  Try our best to do
7530                          * it.
7531                          */
7532                         nfs_fh4_fmt_t *fh_fmtp =
7533                             (nfs_fh4_fmt_t *)oo->ro_reply_fh.nfs_fh4_val;
7534 
7535                         cs->exi = checkexport4(&fh_fmtp->fh4_fsid,
7536                             (fid_t *)&fh_fmtp->fh4_xlen, NULL);
7537 
7538                         if (cs->exi == NULL) {
7539                                 resp->status = NFS4ERR_STALE;
7540                                 goto finish;
7541                         }
7542 
7543                         VN_RELE(cs->vp);
7544 
7545                         cs->vp = nfs4_fhtovp(&oo->ro_reply_fh, cs->exi,
7546                             &resp->status);
7547 
7548                         if (cs->vp == NULL)
7549                                 goto finish;
7550 
7551                         nfs_fh4_copy(&oo->ro_reply_fh, &cs->fh);
7552                 }
7553 
7554                 /*
7555                  * If this was a replay, no need to update the
7556                  * sequence id. If the open_owner was not created on
7557                  * this pass, then update.  The first use of an
7558                  * open_owner will not bump the sequence id.
7559                  */
7560                 if (replay == FALSE && !create)
7561                         rfs4_update_open_sequence(oo);
7562                 /*
7563                  * If the client is receiving an error and the
7564                  * open_owner needs to be confirmed, there is no way
7565                  * to notify the client of this fact ignoring the fact
7566                  * that the server has no method of returning a
7567                  * stateid to confirm.  Therefore, the server needs to
7568                  * mark this open_owner in a way as to avoid the
7569                  * sequence id checking the next time the client uses
7570                  * this open_owner.
7571                  */
7572                 if (resp->status != NFS4_OK && oo->ro_need_confirm)
7573                         oo->ro_postpone_confirm = TRUE;
7574                 /*
7575                  * If OK response then clear the postpone flag and
7576                  * reset the sequence id to keep in sync with the
7577                  * client.
7578                  */
7579                 if (resp->status == NFS4_OK && oo->ro_postpone_confirm) {
7580                         oo->ro_postpone_confirm = FALSE;
7581                         oo->ro_open_seqid = args->seqid;
7582                 }
7583                 break;
7584         }
7585 
7586 finish:
7587         *cs->statusp = resp->status;
7588 
7589         rfs4_sw_exit(&oo->ro_sw);
7590         rfs4_openowner_rele(oo);
7591 
7592 end:
7593         DTRACE_NFSV4_2(op__open__done, struct compound_state *, cs,
7594             OPEN4res *, resp);
7595 }
7596 
7597 /*ARGSUSED*/
7598 void
7599 rfs4_op_open_confirm(nfs_argop4 *argop, nfs_resop4 *resop,
7600     struct svc_req *req, struct compound_state *cs)
7601 {
7602         OPEN_CONFIRM4args *args = &argop->nfs_argop4_u.opopen_confirm;
7603         OPEN_CONFIRM4res *resp = &resop->nfs_resop4_u.opopen_confirm;
7604         rfs4_state_t *sp;
7605         nfsstat4 status;
7606 
7607         DTRACE_NFSV4_2(op__open__confirm__start, struct compound_state *, cs,
7608             OPEN_CONFIRM4args *, args);
7609 
7610         if (cs->vp == NULL) {
7611                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
7612                 goto out;
7613         }
7614 
7615         if (cs->vp->v_type != VREG) {
7616                 *cs->statusp = resp->status =
7617                     cs->vp->v_type == VDIR ? NFS4ERR_ISDIR : NFS4ERR_INVAL;
7618                 return;
7619         }
7620 
7621         status = rfs4_get_state(&args->open_stateid, &sp, RFS4_DBS_VALID);
7622         if (status != NFS4_OK) {
7623                 *cs->statusp = resp->status = status;
7624                 goto out;
7625         }
7626 
7627         /* Ensure specified filehandle matches */
7628         if (cs->vp != sp->rs_finfo->rf_vp) {
7629                 rfs4_state_rele(sp);
7630                 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7631                 goto out;
7632         }
7633 
7634         /* hold off other access to open_owner while we tinker */
7635         rfs4_sw_enter(&sp->rs_owner->ro_sw);
7636 
7637         switch (rfs4_check_stateid_seqid(sp, &args->open_stateid)) {
7638         case NFS4_CHECK_STATEID_OKAY:
7639                 if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7640                     resop) != 0) {
7641                         *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7642                         break;
7643                 }
7644                 /*
7645                  * If it is the appropriate stateid and determined to
7646                  * be "OKAY" then this means that the stateid does not
7647                  * need to be confirmed and the client is in error for
7648                  * sending an OPEN_CONFIRM.
7649                  */
7650                 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7651                 break;
7652         case NFS4_CHECK_STATEID_OLD:
7653                 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7654                 break;
7655         case NFS4_CHECK_STATEID_BAD:
7656                 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7657                 break;
7658         case NFS4_CHECK_STATEID_EXPIRED:
7659                 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
7660                 break;
7661         case NFS4_CHECK_STATEID_CLOSED:
7662                 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7663                 break;
7664         case NFS4_CHECK_STATEID_REPLAY:
7665                 switch (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7666                     resop)) {
7667                 case NFS4_CHKSEQ_OKAY:
7668                         /*
7669                          * This is replayed stateid; if seqid matches
7670                          * next expected, then client is using wrong seqid.
7671                          */
7672                         /* fall through */
7673                 case NFS4_CHKSEQ_BAD:
7674                         *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7675                         break;
7676                 case NFS4_CHKSEQ_REPLAY:
7677                         /*
7678                          * Note this case is the duplicate case so
7679                          * resp->status is already set.
7680                          */
7681                         *cs->statusp = resp->status;
7682                         rfs4_update_lease(sp->rs_owner->ro_client);
7683                         break;
7684                 }
7685                 break;
7686         case NFS4_CHECK_STATEID_UNCONFIRMED:
7687                 if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7688                     resop) != NFS4_CHKSEQ_OKAY) {
7689                         *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7690                         break;
7691                 }
7692                 *cs->statusp = resp->status = NFS4_OK;
7693 
7694                 next_stateid(&sp->rs_stateid);
7695                 resp->open_stateid = sp->rs_stateid.stateid;
7696                 sp->rs_owner->ro_need_confirm = FALSE;
7697                 rfs4_update_lease(sp->rs_owner->ro_client);
7698                 rfs4_update_open_sequence(sp->rs_owner);
7699                 rfs4_update_open_resp(sp->rs_owner, resop, NULL);
7700                 break;
7701         default:
7702                 ASSERT(FALSE);
7703                 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
7704                 break;
7705         }
7706         rfs4_sw_exit(&sp->rs_owner->ro_sw);
7707         rfs4_state_rele(sp);
7708 
7709 out:
7710         DTRACE_NFSV4_2(op__open__confirm__done, struct compound_state *, cs,
7711             OPEN_CONFIRM4res *, resp);
7712 }
7713 
7714 /*ARGSUSED*/
7715 void
7716 rfs4_op_open_downgrade(nfs_argop4 *argop, nfs_resop4 *resop,
7717     struct svc_req *req, struct compound_state *cs)
7718 {
7719         OPEN_DOWNGRADE4args *args = &argop->nfs_argop4_u.opopen_downgrade;
7720         OPEN_DOWNGRADE4res *resp = &resop->nfs_resop4_u.opopen_downgrade;
7721         uint32_t access = args->share_access;
7722         uint32_t deny = args->share_deny;
7723         nfsstat4 status;
7724         rfs4_state_t *sp;
7725         rfs4_file_t *fp;
7726         int fflags = 0;
7727 
7728         DTRACE_NFSV4_2(op__open__downgrade__start, struct compound_state *, cs,
7729             OPEN_DOWNGRADE4args *, args);
7730 
7731         if (cs->vp == NULL) {
7732                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
7733                 goto out;
7734         }
7735 
7736         if (cs->vp->v_type != VREG) {
7737                 *cs->statusp = resp->status = NFS4ERR_INVAL;
7738                 return;
7739         }
7740 
7741         status = rfs4_get_state(&args->open_stateid, &sp, RFS4_DBS_VALID);
7742         if (status != NFS4_OK) {
7743                 *cs->statusp = resp->status = status;
7744                 goto out;
7745         }
7746 
7747         /* Ensure specified filehandle matches */
7748         if (cs->vp != sp->rs_finfo->rf_vp) {
7749                 rfs4_state_rele(sp);
7750                 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7751                 goto out;
7752         }
7753 
7754         /* hold off other access to open_owner while we tinker */
7755         rfs4_sw_enter(&sp->rs_owner->ro_sw);
7756 
7757         switch (rfs4_check_stateid_seqid(sp, &args->open_stateid)) {
7758         case NFS4_CHECK_STATEID_OKAY:
7759                 if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7760                     resop) != NFS4_CHKSEQ_OKAY) {
7761                         *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7762                         goto end;
7763                 }
7764                 break;
7765         case NFS4_CHECK_STATEID_OLD:
7766                 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7767                 goto end;
7768         case NFS4_CHECK_STATEID_BAD:
7769                 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7770                 goto end;
7771         case NFS4_CHECK_STATEID_EXPIRED:
7772                 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
7773                 goto end;
7774         case NFS4_CHECK_STATEID_CLOSED:
7775                 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7776                 goto end;
7777         case NFS4_CHECK_STATEID_UNCONFIRMED:
7778                 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7779                 goto end;
7780         case NFS4_CHECK_STATEID_REPLAY:
7781                 /* Check the sequence id for the open owner */
7782                 switch (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7783                     resop)) {
7784                 case NFS4_CHKSEQ_OKAY:
7785                         /*
7786                          * This is replayed stateid; if seqid matches
7787                          * next expected, then client is using wrong seqid.
7788                          */
7789                         /* fall through */
7790                 case NFS4_CHKSEQ_BAD:
7791                         *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7792                         goto end;
7793                 case NFS4_CHKSEQ_REPLAY:
7794                         /*
7795                          * Note this case is the duplicate case so
7796                          * resp->status is already set.
7797                          */
7798                         *cs->statusp = resp->status;
7799                         rfs4_update_lease(sp->rs_owner->ro_client);
7800                         goto end;
7801                 }
7802                 break;
7803         default:
7804                 ASSERT(FALSE);
7805                 break;
7806         }
7807 
7808         rfs4_dbe_lock(sp->rs_dbe);
7809         /*
7810          * Check that the new access modes and deny modes are valid.
7811          * Check that no invalid bits are set.
7812          */
7813         if ((access & ~(OPEN4_SHARE_ACCESS_READ | OPEN4_SHARE_ACCESS_WRITE)) ||
7814             (deny & ~(OPEN4_SHARE_DENY_READ | OPEN4_SHARE_DENY_WRITE))) {
7815                 *cs->statusp = resp->status = NFS4ERR_INVAL;
7816                 rfs4_update_open_sequence(sp->rs_owner);
7817                 rfs4_dbe_unlock(sp->rs_dbe);
7818                 goto end;
7819         }
7820 
7821         /*
7822          * The new modes must be a subset of the current modes and
7823          * the access must specify at least one mode. To test that
7824          * the new mode is a subset of the current modes we bitwise
7825          * AND them together and check that the result equals the new
7826          * mode. For example:
7827          * New mode, access == R and current mode, sp->rs_open_access  == RW
7828          * access & sp->rs_open_access == R == access, so the new access mode
7829          * is valid. Consider access == RW, sp->rs_open_access = R
7830          * access & sp->rs_open_access == R != access, so the new access mode
7831          * is invalid.
7832          */
7833         if ((access & sp->rs_open_access) != access ||
7834             (deny & sp->rs_open_deny) != deny ||
7835             (access &
7836             (OPEN4_SHARE_ACCESS_READ | OPEN4_SHARE_ACCESS_WRITE)) == 0) {
7837                 *cs->statusp = resp->status = NFS4ERR_INVAL;
7838                 rfs4_update_open_sequence(sp->rs_owner);
7839                 rfs4_dbe_unlock(sp->rs_dbe);
7840                 goto end;
7841         }
7842 
7843         /*
7844          * Release any share locks associated with this stateID.
7845          * Strictly speaking, this violates the spec because the
7846          * spec effectively requires that open downgrade be atomic.
7847          * At present, fs_shrlock does not have this capability.
7848          */
7849         (void) rfs4_unshare(sp);
7850 
7851         status = rfs4_share(sp, access, deny);
7852         if (status != NFS4_OK) {
7853                 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
7854                 rfs4_update_open_sequence(sp->rs_owner);
7855                 rfs4_dbe_unlock(sp->rs_dbe);
7856                 goto end;
7857         }
7858 
7859         fp = sp->rs_finfo;
7860         rfs4_dbe_lock(fp->rf_dbe);
7861 
7862         /*
7863          * If the current mode has deny read and the new mode
7864          * does not, decrement the number of deny read mode bits
7865          * and if it goes to zero turn off the deny read bit
7866          * on the file.
7867          */
7868         if ((sp->rs_open_deny & OPEN4_SHARE_DENY_READ) &&
7869             (deny & OPEN4_SHARE_DENY_READ) == 0) {
7870                 fp->rf_deny_read--;
7871                 if (fp->rf_deny_read == 0)
7872                         fp->rf_share_deny &= ~OPEN4_SHARE_DENY_READ;
7873         }
7874 
7875         /*
7876          * If the current mode has deny write and the new mode
7877          * does not, decrement the number of deny write mode bits
7878          * and if it goes to zero turn off the deny write bit
7879          * on the file.
7880          */
7881         if ((sp->rs_open_deny & OPEN4_SHARE_DENY_WRITE) &&
7882             (deny & OPEN4_SHARE_DENY_WRITE) == 0) {
7883                 fp->rf_deny_write--;
7884                 if (fp->rf_deny_write == 0)
7885                         fp->rf_share_deny &= ~OPEN4_SHARE_DENY_WRITE;
7886         }
7887 
7888         /*
7889          * If the current mode has access read and the new mode
7890          * does not, decrement the number of access read mode bits
7891          * and if it goes to zero turn off the access read bit
7892          * on the file.  set fflags to FREAD for the call to
7893          * vn_open_downgrade().
7894          */
7895         if ((sp->rs_open_access & OPEN4_SHARE_ACCESS_READ) &&
7896             (access & OPEN4_SHARE_ACCESS_READ) == 0) {
7897                 fp->rf_access_read--;
7898                 if (fp->rf_access_read == 0)
7899                         fp->rf_share_access &= ~OPEN4_SHARE_ACCESS_READ;
7900                 fflags |= FREAD;
7901         }
7902 
7903         /*
7904          * If the current mode has access write and the new mode
7905          * does not, decrement the number of access write mode bits
7906          * and if it goes to zero turn off the access write bit
7907          * on the file.  set fflags to FWRITE for the call to
7908          * vn_open_downgrade().
7909          */
7910         if ((sp->rs_open_access & OPEN4_SHARE_ACCESS_WRITE) &&
7911             (access & OPEN4_SHARE_ACCESS_WRITE) == 0) {
7912                 fp->rf_access_write--;
7913                 if (fp->rf_access_write == 0)
7914                         fp->rf_share_deny &= ~OPEN4_SHARE_ACCESS_WRITE;
7915                 fflags |= FWRITE;
7916         }
7917 
7918         /* Check that the file is still accessible */
7919         ASSERT(fp->rf_share_access);
7920 
7921         rfs4_dbe_unlock(fp->rf_dbe);
7922 
7923         /* now set the new open access and deny modes */
7924         sp->rs_open_access = access;
7925         sp->rs_open_deny = deny;
7926 
7927         /*
7928          * we successfully downgraded the share lock, now we need to downgrade
7929          * the open. it is possible that the downgrade was only for a deny
7930          * mode and we have nothing else to do.
7931          */
7932         if ((fflags & (FREAD|FWRITE)) != 0)
7933                 vn_open_downgrade(cs->vp, fflags);
7934 
7935         /* Update the stateid */
7936         next_stateid(&sp->rs_stateid);
7937         resp->open_stateid = sp->rs_stateid.stateid;
7938 
7939         rfs4_dbe_unlock(sp->rs_dbe);
7940 
7941         *cs->statusp = resp->status = NFS4_OK;
7942         /* Update the lease */
7943         rfs4_update_lease(sp->rs_owner->ro_client);
7944         /* And the sequence */
7945         rfs4_update_open_sequence(sp->rs_owner);
7946         rfs4_update_open_resp(sp->rs_owner, resop, NULL);
7947 
7948 end:
7949         rfs4_sw_exit(&sp->rs_owner->ro_sw);
7950         rfs4_state_rele(sp);
7951 out:
7952         DTRACE_NFSV4_2(op__open__downgrade__done, struct compound_state *, cs,
7953             OPEN_DOWNGRADE4res *, resp);
7954 }
7955 
7956 static void *
7957 memstr(const void *s1, const char *s2, size_t n)
7958 {
7959         size_t l = strlen(s2);
7960         char *p = (char *)s1;
7961 
7962         while (n >= l) {
7963                 if (bcmp(p, s2, l) == 0)
7964                         return (p);
7965                 p++;
7966                 n--;
7967         }
7968 
7969         return (NULL);
7970 }
7971 
7972 /*
7973  * The logic behind this function is detailed in the NFSv4 RFC in the
7974  * SETCLIENTID operation description under IMPLEMENTATION.  Refer to
7975  * that section for explicit guidance to server behavior for
7976  * SETCLIENTID.
7977  */
7978 void
7979 rfs4_op_setclientid(nfs_argop4 *argop, nfs_resop4 *resop,
7980     struct svc_req *req, struct compound_state *cs)
7981 {
7982         SETCLIENTID4args *args = &argop->nfs_argop4_u.opsetclientid;
7983         SETCLIENTID4res *res = &resop->nfs_resop4_u.opsetclientid;
7984         rfs4_client_t *cp, *newcp, *cp_confirmed, *cp_unconfirmed;
7985         rfs4_clntip_t *ci;
7986         bool_t create;
7987         char *addr, *netid;
7988         int len;
7989 
7990         DTRACE_NFSV4_2(op__setclientid__start, struct compound_state *, cs,
7991             SETCLIENTID4args *, args);
7992 retry:
7993         newcp = cp_confirmed = cp_unconfirmed = NULL;
7994 
7995         /*
7996          * Save the caller's IP address
7997          */
7998         args->client.cl_addr =
7999             (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
8000 
8001         /*
8002          * Record if it is a Solaris client that cannot handle referrals.
8003          */
8004         if (memstr(args->client.id_val, "Solaris", args->client.id_len) &&
8005             !memstr(args->client.id_val, "+referrals", args->client.id_len)) {
8006                 /* Add a "yes, it's downrev" record */
8007                 create = TRUE;
8008                 ci = rfs4_find_clntip(args->client.cl_addr, &create);
8009                 ASSERT(ci != NULL);
8010                 rfs4_dbe_rele(ci->ri_dbe);
8011         } else {
8012                 /* Remove any previous record */
8013                 rfs4_invalidate_clntip(args->client.cl_addr);
8014         }
8015 
8016         /*
8017          * In search of an EXISTING client matching the incoming
8018          * request to establish a new client identifier at the server
8019          */
8020         create = TRUE;
8021         cp = rfs4_findclient(&args->client, &create, NULL);
8022 
8023         /* Should never happen */
8024         ASSERT(cp != NULL);
8025 
8026         if (cp == NULL) {
8027                 *cs->statusp = res->status = NFS4ERR_SERVERFAULT;
8028                 goto out;
8029         }
8030 
8031         /*
8032          * Easiest case. Client identifier is newly created and is
8033          * unconfirmed.  Also note that for this case, no other
8034          * entries exist for the client identifier.  Nothing else to
8035          * check.  Just setup the response and respond.
8036          */
8037         if (create) {
8038                 *cs->statusp = res->status = NFS4_OK;
8039                 res->SETCLIENTID4res_u.resok4.clientid = cp->rc_clientid;
8040                 res->SETCLIENTID4res_u.resok4.setclientid_confirm =
8041                     cp->rc_confirm_verf;
8042                 /* Setup callback information; CB_NULL confirmation later */
8043                 rfs4_client_setcb(cp, &args->callback, args->callback_ident);
8044 
8045                 rfs4_client_rele(cp);
8046                 goto out;
8047         }
8048 
8049         /*
8050          * An existing, confirmed client may exist but it may not have
8051          * been active for at least one lease period.  If so, then
8052          * "close" the client and create a new client identifier
8053          */
8054         if (rfs4_lease_expired(cp)) {
8055                 rfs4_client_close(cp);
8056                 goto retry;
8057         }
8058 
8059         if (cp->rc_need_confirm == TRUE)
8060                 cp_unconfirmed = cp;
8061         else
8062                 cp_confirmed = cp;
8063 
8064         cp = NULL;
8065 
8066         /*
8067          * We have a confirmed client, now check for an
8068          * unconfimred entry
8069          */
8070         if (cp_confirmed) {
8071                 /* If creds don't match then client identifier is inuse */
8072                 if (!creds_ok(cp_confirmed->rc_cr_set, req, cs)) {
8073                         rfs4_cbinfo_t *cbp;
8074                         /*
8075                          * Some one else has established this client
8076                          * id. Try and say * who they are. We will use
8077                          * the call back address supplied by * the
8078                          * first client.
8079                          */
8080                         *cs->statusp = res->status = NFS4ERR_CLID_INUSE;
8081 
8082                         addr = netid = NULL;
8083 
8084                         cbp = &cp_confirmed->rc_cbinfo;
8085                         if (cbp->cb_callback.cb_location.r_addr &&
8086                             cbp->cb_callback.cb_location.r_netid) {
8087                                 cb_client4 *cbcp = &cbp->cb_callback;
8088 
8089                                 len = strlen(cbcp->cb_location.r_addr)+1;
8090                                 addr = kmem_alloc(len, KM_SLEEP);
8091                                 bcopy(cbcp->cb_location.r_addr, addr, len);
8092                                 len = strlen(cbcp->cb_location.r_netid)+1;
8093                                 netid = kmem_alloc(len, KM_SLEEP);
8094                                 bcopy(cbcp->cb_location.r_netid, netid, len);
8095                         }
8096 
8097                         res->SETCLIENTID4res_u.client_using.r_addr = addr;
8098                         res->SETCLIENTID4res_u.client_using.r_netid = netid;
8099 
8100                         rfs4_client_rele(cp_confirmed);
8101                 }
8102 
8103                 /*
8104                  * Confirmed, creds match, and verifier matches; must
8105                  * be an update of the callback info
8106                  */
8107                 if (cp_confirmed->rc_nfs_client.verifier ==
8108                     args->client.verifier) {
8109                         /* Setup callback information */
8110                         rfs4_client_setcb(cp_confirmed, &args->callback,
8111                             args->callback_ident);
8112 
8113                         /* everything okay -- move ahead */
8114                         *cs->statusp = res->status = NFS4_OK;
8115                         res->SETCLIENTID4res_u.resok4.clientid =
8116                             cp_confirmed->rc_clientid;
8117 
8118                         /* update the confirm_verifier and return it */
8119                         rfs4_client_scv_next(cp_confirmed);
8120                         res->SETCLIENTID4res_u.resok4.setclientid_confirm =
8121                             cp_confirmed->rc_confirm_verf;
8122 
8123                         rfs4_client_rele(cp_confirmed);
8124                         goto out;
8125                 }
8126 
8127                 /*
8128                  * Creds match but the verifier doesn't.  Must search
8129                  * for an unconfirmed client that would be replaced by
8130                  * this request.
8131                  */
8132                 create = FALSE;
8133                 cp_unconfirmed = rfs4_findclient(&args->client, &create,
8134                     cp_confirmed);
8135         }
8136 
8137         /*
8138          * At this point, we have taken care of the brand new client
8139          * struct, INUSE case, update of an existing, and confirmed
8140          * client struct.
8141          */
8142 
8143         /*
8144          * check to see if things have changed while we originally
8145          * picked up the client struct.  If they have, then return and
8146          * retry the processing of this SETCLIENTID request.
8147          */
8148         if (cp_unconfirmed) {
8149                 rfs4_dbe_lock(cp_unconfirmed->rc_dbe);
8150                 if (!cp_unconfirmed->rc_need_confirm) {
8151                         rfs4_dbe_unlock(cp_unconfirmed->rc_dbe);
8152                         rfs4_client_rele(cp_unconfirmed);
8153                         if (cp_confirmed)
8154                                 rfs4_client_rele(cp_confirmed);
8155                         goto retry;
8156                 }
8157                 /* do away with the old unconfirmed one */
8158                 rfs4_dbe_invalidate(cp_unconfirmed->rc_dbe);
8159                 rfs4_dbe_unlock(cp_unconfirmed->rc_dbe);
8160                 rfs4_client_rele(cp_unconfirmed);
8161                 cp_unconfirmed = NULL;
8162         }
8163 
8164         /*
8165          * This search will temporarily hide the confirmed client
8166          * struct while a new client struct is created as the
8167          * unconfirmed one.
8168          */
8169         create = TRUE;
8170         newcp = rfs4_findclient(&args->client, &create, cp_confirmed);
8171 
8172         ASSERT(newcp != NULL);
8173 
8174         if (newcp == NULL) {
8175                 *cs->statusp = res->status = NFS4ERR_SERVERFAULT;
8176                 rfs4_client_rele(cp_confirmed);
8177                 goto out;
8178         }
8179 
8180         /*
8181          * If one was not created, then a similar request must be in
8182          * process so release and start over with this one
8183          */
8184         if (create != TRUE) {
8185                 rfs4_client_rele(newcp);
8186                 if (cp_confirmed)
8187                         rfs4_client_rele(cp_confirmed);
8188                 goto retry;
8189         }
8190 
8191         *cs->statusp = res->status = NFS4_OK;
8192         res->SETCLIENTID4res_u.resok4.clientid = newcp->rc_clientid;
8193         res->SETCLIENTID4res_u.resok4.setclientid_confirm =
8194             newcp->rc_confirm_verf;
8195         /* Setup callback information; CB_NULL confirmation later */
8196         rfs4_client_setcb(newcp, &args->callback, args->callback_ident);
8197 
8198         newcp->rc_cp_confirmed = cp_confirmed;
8199 
8200         rfs4_client_rele(newcp);
8201 
8202 out:
8203         DTRACE_NFSV4_2(op__setclientid__done, struct compound_state *, cs,
8204             SETCLIENTID4res *, res);
8205 }
8206 
8207 /*ARGSUSED*/
8208 void
8209 rfs4_op_setclientid_confirm(nfs_argop4 *argop, nfs_resop4 *resop,
8210     struct svc_req *req, struct compound_state *cs)
8211 {
8212         SETCLIENTID_CONFIRM4args *args =
8213             &argop->nfs_argop4_u.opsetclientid_confirm;
8214         SETCLIENTID_CONFIRM4res *res =
8215             &resop->nfs_resop4_u.opsetclientid_confirm;
8216         rfs4_client_t *cp, *cptoclose = NULL;
8217         nfs4_srv_t *nsrv4;
8218 
8219         DTRACE_NFSV4_2(op__setclientid__confirm__start,
8220             struct compound_state *, cs,
8221             SETCLIENTID_CONFIRM4args *, args);
8222 
8223         nsrv4 = nfs4_get_srv();
8224         *cs->statusp = res->status = NFS4_OK;
8225 
8226         cp = rfs4_findclient_by_id(args->clientid, TRUE);
8227 
8228         if (cp == NULL) {
8229                 *cs->statusp = res->status =
8230                     rfs4_check_clientid(&args->clientid, 1);
8231                 goto out;
8232         }
8233 
8234         if (!creds_ok(cp, req, cs)) {
8235                 *cs->statusp = res->status = NFS4ERR_CLID_INUSE;
8236                 rfs4_client_rele(cp);
8237                 goto out;
8238         }
8239 
8240         /* If the verifier doesn't match, the record doesn't match */
8241         if (cp->rc_confirm_verf != args->setclientid_confirm) {
8242                 *cs->statusp = res->status = NFS4ERR_STALE_CLIENTID;
8243                 rfs4_client_rele(cp);
8244                 goto out;
8245         }
8246 
8247         rfs4_dbe_lock(cp->rc_dbe);
8248         cp->rc_need_confirm = FALSE;
8249         if (cp->rc_cp_confirmed) {
8250                 cptoclose = cp->rc_cp_confirmed;
8251                 cptoclose->rc_ss_remove = 1;
8252                 cp->rc_cp_confirmed = NULL;
8253         }
8254 
8255         /*
8256          * Update the client's associated server instance, if it's changed
8257          * since the client was created.
8258          */
8259         if (rfs4_servinst(cp) != nsrv4->nfs4_cur_servinst)
8260                 rfs4_servinst_assign(nsrv4, cp, nsrv4->nfs4_cur_servinst);
8261 
8262         /*
8263          * Record clientid in stable storage.
8264          * Must be done after server instance has been assigned.
8265          */
8266         rfs4_ss_clid(nsrv4, cp);
8267 
8268         rfs4_dbe_unlock(cp->rc_dbe);
8269 
8270         if (cptoclose)
8271                 /* don't need to rele, client_close does it */
8272                 rfs4_client_close(cptoclose);
8273 
8274         /* If needed, initiate CB_NULL call for callback path */
8275         rfs4_deleg_cb_check(cp);
8276         rfs4_update_lease(cp);
8277 
8278         /*
8279          * Check to see if client can perform reclaims
8280          */
8281         rfs4_ss_chkclid(nsrv4, cp);
8282 
8283         rfs4_client_rele(cp);
8284 
8285 out:
8286         DTRACE_NFSV4_2(op__setclientid__confirm__done,
8287             struct compound_state *, cs,
8288             SETCLIENTID_CONFIRM4 *, res);
8289 }
8290 
8291 
8292 /*ARGSUSED*/
8293 void
8294 rfs4_op_close(nfs_argop4 *argop, nfs_resop4 *resop,
8295     struct svc_req *req, struct compound_state *cs)
8296 {
8297         CLOSE4args *args = &argop->nfs_argop4_u.opclose;
8298         CLOSE4res *resp = &resop->nfs_resop4_u.opclose;
8299         rfs4_state_t *sp;
8300         nfsstat4 status;
8301 
8302         DTRACE_NFSV4_2(op__close__start, struct compound_state *, cs,
8303             CLOSE4args *, args);
8304 
8305         if (cs->vp == NULL) {
8306                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
8307                 goto out;
8308         }
8309 
8310         status = rfs4_get_state(&args->open_stateid, &sp, RFS4_DBS_INVALID);
8311         if (status != NFS4_OK) {
8312                 *cs->statusp = resp->status = status;
8313                 goto out;
8314         }
8315 
8316         /* Ensure specified filehandle matches */
8317         if (cs->vp != sp->rs_finfo->rf_vp) {
8318                 rfs4_state_rele(sp);
8319                 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8320                 goto out;
8321         }
8322 
8323         /* hold off other access to open_owner while we tinker */
8324         rfs4_sw_enter(&sp->rs_owner->ro_sw);
8325 
8326         switch (rfs4_check_stateid_seqid(sp, &args->open_stateid)) {
8327         case NFS4_CHECK_STATEID_OKAY:
8328                 if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
8329                     resop) != NFS4_CHKSEQ_OKAY) {
8330                         *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8331                         goto end;
8332                 }
8333                 break;
8334         case NFS4_CHECK_STATEID_OLD:
8335                 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8336                 goto end;
8337         case NFS4_CHECK_STATEID_BAD:
8338                 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8339                 goto end;
8340         case NFS4_CHECK_STATEID_EXPIRED:
8341                 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
8342                 goto end;
8343         case NFS4_CHECK_STATEID_CLOSED:
8344                 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8345                 goto end;
8346         case NFS4_CHECK_STATEID_UNCONFIRMED:
8347                 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8348                 goto end;
8349         case NFS4_CHECK_STATEID_REPLAY:
8350                 /* Check the sequence id for the open owner */
8351                 switch (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
8352                     resop)) {
8353                 case NFS4_CHKSEQ_OKAY:
8354                         /*
8355                          * This is replayed stateid; if seqid matches
8356                          * next expected, then client is using wrong seqid.
8357                          */
8358                         /* FALL THROUGH */
8359                 case NFS4_CHKSEQ_BAD:
8360                         *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8361                         goto end;
8362                 case NFS4_CHKSEQ_REPLAY:
8363                         /*
8364                          * Note this case is the duplicate case so
8365                          * resp->status is already set.
8366                          */
8367                         *cs->statusp = resp->status;
8368                         rfs4_update_lease(sp->rs_owner->ro_client);
8369                         goto end;
8370                 }
8371                 break;
8372         default:
8373                 ASSERT(FALSE);
8374                 break;
8375         }
8376 
8377         rfs4_dbe_lock(sp->rs_dbe);
8378 
8379         /* Update the stateid. */
8380         next_stateid(&sp->rs_stateid);
8381         resp->open_stateid = sp->rs_stateid.stateid;
8382 
8383         rfs4_dbe_unlock(sp->rs_dbe);
8384 
8385         rfs4_update_lease(sp->rs_owner->ro_client);
8386         rfs4_update_open_sequence(sp->rs_owner);
8387         rfs4_update_open_resp(sp->rs_owner, resop, NULL);
8388 
8389         rfs4_state_close(sp, FALSE, FALSE, cs->cr);
8390 
8391         *cs->statusp = resp->status = status;
8392 
8393 end:
8394         rfs4_sw_exit(&sp->rs_owner->ro_sw);
8395         rfs4_state_rele(sp);
8396 out:
8397         DTRACE_NFSV4_2(op__close__done, struct compound_state *, cs,
8398             CLOSE4res *, resp);
8399 }
8400 
8401 /*
8402  * Manage the counts on the file struct and close all file locks
8403  */
8404 /*ARGSUSED*/
8405 void
8406 rfs4_release_share_lock_state(rfs4_state_t *sp, cred_t *cr,
8407     bool_t close_of_client)
8408 {
8409         rfs4_file_t *fp = sp->rs_finfo;
8410         rfs4_lo_state_t *lsp;
8411         int fflags = 0;
8412 
8413         /*
8414          * If this call is part of the larger closing down of client
8415          * state then it is just easier to release all locks
8416          * associated with this client instead of going through each
8417          * individual file and cleaning locks there.
8418          */
8419         if (close_of_client) {
8420                 if (sp->rs_owner->ro_client->rc_unlksys_completed == FALSE &&
8421                     !list_is_empty(&sp->rs_lostatelist) &&
8422                     sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID) {
8423                         /* Is the PxFS kernel module loaded? */
8424                         if (lm_remove_file_locks != NULL) {
8425                                 int new_sysid;
8426 
8427                                 /* Encode the cluster nodeid in new sysid */
8428                                 new_sysid = sp->rs_owner->ro_client->rc_sysidt;
8429                                 lm_set_nlmid_flk(&new_sysid);
8430 
8431                                 /*
8432                                  * This PxFS routine removes file locks for a
8433                                  * client over all nodes of a cluster.
8434                                  */
8435                                 NFS4_DEBUG(rfs4_debug, (CE_NOTE,
8436                                     "lm_remove_file_locks(sysid=0x%x)\n",
8437                                     new_sysid));
8438                                 (*lm_remove_file_locks)(new_sysid);
8439                         } else {
8440                                 struct flock64 flk;
8441 
8442                                 /* Release all locks for this client */
8443                                 flk.l_type = F_UNLKSYS;
8444                                 flk.l_whence = 0;
8445                                 flk.l_start = 0;
8446                                 flk.l_len = 0;
8447                                 flk.l_sysid =
8448                                     sp->rs_owner->ro_client->rc_sysidt;
8449                                 flk.l_pid = 0;
8450                                 (void) VOP_FRLOCK(sp->rs_finfo->rf_vp, F_SETLK,
8451                                     &flk, F_REMOTELOCK | FREAD | FWRITE,
8452                                     (u_offset_t)0, NULL, CRED(), NULL);
8453                         }
8454 
8455                         sp->rs_owner->ro_client->rc_unlksys_completed = TRUE;
8456                 }
8457         }
8458 
8459         /*
8460          * Release all locks on this file by this lock owner or at
8461          * least mark the locks as having been released
8462          */
8463         for (lsp = list_head(&sp->rs_lostatelist); lsp != NULL;
8464             lsp = list_next(&sp->rs_lostatelist, lsp)) {
8465                 lsp->rls_locks_cleaned = TRUE;
8466 
8467                 /* Was this already taken care of above? */
8468                 if (!close_of_client &&
8469                     sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID)
8470                         (void) cleanlocks(sp->rs_finfo->rf_vp,
8471                             lsp->rls_locker->rl_pid,
8472                             lsp->rls_locker->rl_client->rc_sysidt);
8473         }
8474 
8475         /*
8476          * Release any shrlocks associated with this open state ID.
8477          * This must be done before the rfs4_state gets marked closed.
8478          */
8479         if (sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID)
8480                 (void) rfs4_unshare(sp);
8481 
8482         if (sp->rs_open_access) {
8483                 rfs4_dbe_lock(fp->rf_dbe);
8484 
8485                 /*
8486                  * Decrement the count for each access and deny bit that this
8487                  * state has contributed to the file.
8488                  * If the file counts go to zero
8489                  * clear the appropriate bit in the appropriate mask.
8490                  */
8491                 if (sp->rs_open_access & OPEN4_SHARE_ACCESS_READ) {
8492                         fp->rf_access_read--;
8493                         fflags |= FREAD;
8494                         if (fp->rf_access_read == 0)
8495                                 fp->rf_share_access &= ~OPEN4_SHARE_ACCESS_READ;
8496                 }
8497                 if (sp->rs_open_access & OPEN4_SHARE_ACCESS_WRITE) {
8498                         fp->rf_access_write--;
8499                         fflags |= FWRITE;
8500                         if (fp->rf_access_write == 0)
8501                                 fp->rf_share_access &=
8502                                     ~OPEN4_SHARE_ACCESS_WRITE;
8503                 }
8504                 if (sp->rs_open_deny & OPEN4_SHARE_DENY_READ) {
8505                         fp->rf_deny_read--;
8506                         if (fp->rf_deny_read == 0)
8507                                 fp->rf_share_deny &= ~OPEN4_SHARE_DENY_READ;
8508                 }
8509                 if (sp->rs_open_deny & OPEN4_SHARE_DENY_WRITE) {
8510                         fp->rf_deny_write--;
8511                         if (fp->rf_deny_write == 0)
8512                                 fp->rf_share_deny &= ~OPEN4_SHARE_DENY_WRITE;
8513                 }
8514 
8515                 (void) VOP_CLOSE(fp->rf_vp, fflags, 1, (offset_t)0, cr, NULL);
8516 
8517                 rfs4_dbe_unlock(fp->rf_dbe);
8518 
8519                 sp->rs_open_access = 0;
8520                 sp->rs_open_deny = 0;
8521         }
8522 }
8523 
8524 /*
8525  * lock_denied: Fill in a LOCK4deneid structure given an flock64 structure.
8526  */
8527 static nfsstat4
8528 lock_denied(LOCK4denied *dp, struct flock64 *flk)
8529 {
8530         rfs4_lockowner_t *lo;
8531         rfs4_client_t *cp;
8532         uint32_t len;
8533 
8534         lo = rfs4_findlockowner_by_pid(flk->l_pid);
8535         if (lo != NULL) {
8536                 cp = lo->rl_client;
8537                 if (rfs4_lease_expired(cp)) {
8538                         rfs4_lockowner_rele(lo);
8539                         rfs4_dbe_hold(cp->rc_dbe);
8540                         rfs4_client_close(cp);
8541                         return (NFS4ERR_EXPIRED);
8542                 }
8543                 dp->owner.clientid = lo->rl_owner.clientid;
8544                 len = lo->rl_owner.owner_len;
8545                 dp->owner.owner_val = kmem_alloc(len, KM_SLEEP);
8546                 bcopy(lo->rl_owner.owner_val, dp->owner.owner_val, len);
8547                 dp->owner.owner_len = len;
8548                 rfs4_lockowner_rele(lo);
8549                 goto finish;
8550         }
8551 
8552         /*
8553          * Its not a NFS4 lock. We take advantage that the upper 32 bits
8554          * of the client id contain the boot time for a NFS4 lock. So we
8555          * fabricate and identity by setting clientid to the sysid, and
8556          * the lock owner to the pid.
8557          */
8558         dp->owner.clientid = flk->l_sysid;
8559         len = sizeof (pid_t);
8560         dp->owner.owner_len = len;
8561         dp->owner.owner_val = kmem_alloc(len, KM_SLEEP);
8562         bcopy(&flk->l_pid, dp->owner.owner_val, len);
8563 finish:
8564         dp->offset = flk->l_start;
8565         dp->length = flk->l_len;
8566 
8567         if (flk->l_type == F_RDLCK)
8568                 dp->locktype = READ_LT;
8569         else if (flk->l_type == F_WRLCK)
8570                 dp->locktype = WRITE_LT;
8571         else
8572                 return (NFS4ERR_INVAL); /* no mapping from POSIX ltype to v4 */
8573 
8574         return (NFS4_OK);
8575 }
8576 
8577 /*
8578  * The NFSv4.0 LOCK operation does not support the blocking lock (at the
8579  * NFSv4.0 protocol level) so the client needs to resend the LOCK request in a
8580  * case the lock is denied by the NFSv4.0 server.  NFSv4.0 clients are prepared
8581  * for that (obviously); they are sending the LOCK requests with some delays
8582  * between the attempts.  See nfs4frlock() and nfs4_block_and_wait() for the
8583  * locking and delay implementation at the client side.
8584  *
8585  * To make the life of the clients easier, the NFSv4.0 server tries to do some
8586  * fast retries on its own (the for loop below) in a hope the lock will be
8587  * available soon.  And if not, the client won't need to resend the LOCK
8588  * requests so fast to check the lock availability.  This basically saves some
8589  * network traffic and tries to make sure the client gets the lock ASAP.
8590  */
8591 static int
8592 setlock(vnode_t *vp, struct flock64 *flock, int flag, cred_t *cred)
8593 {
8594         int error;
8595         struct flock64 flk;
8596         int i;
8597         clock_t delaytime;
8598         int cmd;
8599         int spin_cnt = 0;
8600 
8601         cmd = nbl_need_check(vp) ? F_SETLK_NBMAND : F_SETLK;
8602 retry:
8603         delaytime = MSEC_TO_TICK_ROUNDUP(rfs4_lock_delay);
8604 
8605         for (i = 0; i < rfs4_maxlock_tries; i++) {
8606                 LOCK_PRINT(rfs4_debug, "setlock", cmd, flock);
8607                 error = VOP_FRLOCK(vp, cmd,
8608                     flock, flag, (u_offset_t)0, NULL, cred, NULL);
8609 
8610                 if (error != EAGAIN && error != EACCES)
8611                         break;
8612 
8613                 if (i < rfs4_maxlock_tries - 1) {
8614                         delay(delaytime);
8615                         delaytime *= 2;
8616                 }
8617         }
8618 
8619         if (error == EAGAIN || error == EACCES) {
8620                 /* Get the owner of the lock */
8621                 flk = *flock;
8622                 LOCK_PRINT(rfs4_debug, "setlock", F_GETLK, &flk);
8623                 if (VOP_FRLOCK(vp, F_GETLK, &flk, flag, 0, NULL, cred,
8624                     NULL) == 0) {
8625                         /*
8626                          * There's a race inherent in the current VOP_FRLOCK
8627                          * design where:
8628                          * a: "other guy" takes a lock that conflicts with a
8629                          * lock we want
8630                          * b: we attempt to take our lock (non-blocking) and
8631                          * the attempt fails.
8632                          * c: "other guy" releases the conflicting lock
8633                          * d: we ask what lock conflicts with the lock we want,
8634                          * getting F_UNLCK (no lock blocks us)
8635                          *
8636                          * If we retry the non-blocking lock attempt in this
8637                          * case (restart at step 'b') there's some possibility
8638                          * that many such attempts might fail.  However a test
8639                          * designed to actually provoke this race shows that
8640                          * the vast majority of cases require no retry, and
8641                          * only a few took as many as three retries.  Here's
8642                          * the test outcome:
8643                          *
8644                          *         number of retries    how many times we needed
8645                          *                              that many retries
8646                          *         0                    79461
8647                          *         1                      862
8648                          *         2                       49
8649                          *         3                        5
8650                          *
8651                          * Given those empirical results, we arbitrarily limit
8652                          * the retry count to ten.
8653                          *
8654                          * If we actually make to ten retries and give up,
8655                          * nothing catastrophic happens, but we're unable to
8656                          * return the information about the conflicting lock to
8657                          * the NFS client.  That's an acceptable trade off vs.
8658                          * letting this retry loop run forever.
8659                          */
8660                         if (flk.l_type == F_UNLCK) {
8661                                 if (spin_cnt++ < 10) {
8662                                         /* No longer locked, retry */
8663                                         goto retry;
8664                                 }
8665                         } else {
8666                                 *flock = flk;
8667                                 LOCK_PRINT(rfs4_debug, "setlock(blocking lock)",
8668                                     F_GETLK, &flk);
8669                         }
8670                 }
8671         }
8672 
8673         return (error);
8674 }
8675 
8676 /*ARGSUSED*/
8677 static nfsstat4
8678 rfs4_do_lock(rfs4_lo_state_t *lsp, nfs_lock_type4 locktype,
8679     offset4 offset, length4 length, cred_t *cred, nfs_resop4 *resop)
8680 {
8681         nfsstat4 status;
8682         rfs4_lockowner_t *lo = lsp->rls_locker;
8683         rfs4_state_t *sp = lsp->rls_state;
8684         struct flock64 flock;
8685         int16_t ltype;
8686         int flag;
8687         int error;
8688         sysid_t sysid;
8689         LOCK4res *lres;
8690         vnode_t *vp;
8691 
8692         if (rfs4_lease_expired(lo->rl_client)) {
8693                 return (NFS4ERR_EXPIRED);
8694         }
8695 
8696         if ((status = rfs4_client_sysid(lo->rl_client, &sysid)) != NFS4_OK)
8697                 return (status);
8698 
8699         /* Check for zero length. To lock to end of file use all ones for V4 */
8700         if (length == 0)
8701                 return (NFS4ERR_INVAL);
8702         else if (length == (length4)(~0))
8703                 length = 0;             /* Posix to end of file  */
8704 
8705 retry:
8706         rfs4_dbe_lock(sp->rs_dbe);
8707         if (sp->rs_closed == TRUE) {
8708                 rfs4_dbe_unlock(sp->rs_dbe);
8709                 return (NFS4ERR_OLD_STATEID);
8710         }
8711 
8712         if (resop->resop != OP_LOCKU) {
8713                 switch (locktype) {
8714                 case READ_LT:
8715                 case READW_LT:
8716                         if ((sp->rs_share_access
8717                             & OPEN4_SHARE_ACCESS_READ) == 0) {
8718                                 rfs4_dbe_unlock(sp->rs_dbe);
8719 
8720                                 return (NFS4ERR_OPENMODE);
8721                         }
8722                         ltype = F_RDLCK;
8723                         break;
8724                 case WRITE_LT:
8725                 case WRITEW_LT:
8726                         if ((sp->rs_share_access
8727                             & OPEN4_SHARE_ACCESS_WRITE) == 0) {
8728                                 rfs4_dbe_unlock(sp->rs_dbe);
8729 
8730                                 return (NFS4ERR_OPENMODE);
8731                         }
8732                         ltype = F_WRLCK;
8733                         break;
8734                 }
8735         } else
8736                 ltype = F_UNLCK;
8737 
8738         flock.l_type = ltype;
8739         flock.l_whence = 0;             /* SEEK_SET */
8740         flock.l_start = offset;
8741         flock.l_len = length;
8742         flock.l_sysid = sysid;
8743         flock.l_pid = lsp->rls_locker->rl_pid;
8744 
8745         /* Note that length4 is uint64_t but l_len and l_start are off64_t */
8746         if (flock.l_len < 0 || flock.l_start < 0) {
8747                 rfs4_dbe_unlock(sp->rs_dbe);
8748                 return (NFS4ERR_INVAL);
8749         }
8750 
8751         /*
8752          * N.B. FREAD has the same value as OPEN4_SHARE_ACCESS_READ and
8753          * FWRITE has the same value as OPEN4_SHARE_ACCESS_WRITE.
8754          */
8755         flag = (int)sp->rs_share_access | F_REMOTELOCK;
8756 
8757         vp = sp->rs_finfo->rf_vp;
8758         VN_HOLD(vp);
8759 
8760         /*
8761          * We need to unlock sp before we call the underlying filesystem to
8762          * acquire the file lock.
8763          */
8764         rfs4_dbe_unlock(sp->rs_dbe);
8765 
8766         error = setlock(vp, &flock, flag, cred);
8767 
8768         /*
8769          * Make sure the file is still open.  In a case the file was closed in
8770          * the meantime, clean the lock we acquired using the setlock() call
8771          * above, and return the appropriate error.
8772          */
8773         rfs4_dbe_lock(sp->rs_dbe);
8774         if (sp->rs_closed == TRUE) {
8775                 cleanlocks(vp, lsp->rls_locker->rl_pid, sysid);
8776                 rfs4_dbe_unlock(sp->rs_dbe);
8777 
8778                 VN_RELE(vp);
8779 
8780                 return (NFS4ERR_OLD_STATEID);
8781         }
8782         rfs4_dbe_unlock(sp->rs_dbe);
8783 
8784         VN_RELE(vp);
8785 
8786         if (error == 0) {
8787                 rfs4_dbe_lock(lsp->rls_dbe);
8788                 next_stateid(&lsp->rls_lockid);
8789                 rfs4_dbe_unlock(lsp->rls_dbe);
8790         }
8791 
8792         /*
8793          * N.B. We map error values to nfsv4 errors. This is differrent
8794          * than puterrno4 routine.
8795          */
8796         switch (error) {
8797         case 0:
8798                 status = NFS4_OK;
8799                 break;
8800         case EAGAIN:
8801         case EACCES:            /* Old value */
8802                 /* Can only get here if op is OP_LOCK */
8803                 ASSERT(resop->resop == OP_LOCK);
8804                 lres = &resop->nfs_resop4_u.oplock;
8805                 status = NFS4ERR_DENIED;
8806                 if (lock_denied(&lres->LOCK4res_u.denied, &flock)
8807                     == NFS4ERR_EXPIRED)
8808                         goto retry;
8809                 break;
8810         case ENOLCK:
8811                 status = NFS4ERR_DELAY;
8812                 break;
8813         case EOVERFLOW:
8814                 status = NFS4ERR_INVAL;
8815                 break;
8816         case EINVAL:
8817                 status = NFS4ERR_NOTSUPP;
8818                 break;
8819         default:
8820                 status = NFS4ERR_SERVERFAULT;
8821                 break;
8822         }
8823 
8824         return (status);
8825 }
8826 
8827 /*ARGSUSED*/
8828 void
8829 rfs4_op_lock(nfs_argop4 *argop, nfs_resop4 *resop,
8830     struct svc_req *req, struct compound_state *cs)
8831 {
8832         LOCK4args *args = &argop->nfs_argop4_u.oplock;
8833         LOCK4res *resp = &resop->nfs_resop4_u.oplock;
8834         nfsstat4 status;
8835         stateid4 *stateid;
8836         rfs4_lockowner_t *lo;
8837         rfs4_client_t *cp;
8838         rfs4_state_t *sp = NULL;
8839         rfs4_lo_state_t *lsp = NULL;
8840         bool_t ls_sw_held = FALSE;
8841         bool_t create = TRUE;
8842         bool_t lcreate = TRUE;
8843         bool_t dup_lock = FALSE;
8844         int rc;
8845 
8846         DTRACE_NFSV4_2(op__lock__start, struct compound_state *, cs,
8847             LOCK4args *, args);
8848 
8849         if (cs->vp == NULL) {
8850                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
8851                 DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
8852                     cs, LOCK4res *, resp);
8853                 return;
8854         }
8855 
8856         if (args->locker.new_lock_owner) {
8857                 /* Create a new lockowner for this instance */
8858                 open_to_lock_owner4 *olo = &args->locker.locker4_u.open_owner;
8859 
8860                 NFS4_DEBUG(rfs4_debug, (CE_NOTE, "Creating new lock owner"));
8861 
8862                 stateid = &olo->open_stateid;
8863                 status = rfs4_get_state(stateid, &sp, RFS4_DBS_VALID);
8864                 if (status != NFS4_OK) {
8865                         NFS4_DEBUG(rfs4_debug,
8866                             (CE_NOTE, "Get state failed in lock %d", status));
8867                         *cs->statusp = resp->status = status;
8868                         DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
8869                             cs, LOCK4res *, resp);
8870                         return;
8871                 }
8872 
8873                 /* Ensure specified filehandle matches */
8874                 if (cs->vp != sp->rs_finfo->rf_vp) {
8875                         rfs4_state_rele(sp);
8876                         *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8877                         DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
8878                             cs, LOCK4res *, resp);
8879                         return;
8880                 }
8881 
8882                 /* hold off other access to open_owner while we tinker */
8883                 rfs4_sw_enter(&sp->rs_owner->ro_sw);
8884 
8885                 switch (rc = rfs4_check_stateid_seqid(sp, stateid)) {
8886                 case NFS4_CHECK_STATEID_OLD:
8887                         *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8888                         goto end;
8889                 case NFS4_CHECK_STATEID_BAD:
8890                         *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8891                         goto end;
8892                 case NFS4_CHECK_STATEID_EXPIRED:
8893                         *cs->statusp = resp->status = NFS4ERR_EXPIRED;
8894                         goto end;
8895                 case NFS4_CHECK_STATEID_UNCONFIRMED:
8896                         *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8897                         goto end;
8898                 case NFS4_CHECK_STATEID_CLOSED:
8899                         *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8900                         goto end;
8901                 case NFS4_CHECK_STATEID_OKAY:
8902                 case NFS4_CHECK_STATEID_REPLAY:
8903                         switch (rfs4_check_olo_seqid(olo->open_seqid,
8904                             sp->rs_owner, resop)) {
8905                         case NFS4_CHKSEQ_OKAY:
8906                                 if (rc == NFS4_CHECK_STATEID_OKAY)
8907                                         break;
8908                                 /*
8909                                  * This is replayed stateid; if seqid
8910                                  * matches next expected, then client
8911                                  * is using wrong seqid.
8912                                  */
8913                                 /* FALLTHROUGH */
8914                         case NFS4_CHKSEQ_BAD:
8915                                 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8916                                 goto end;
8917                         case NFS4_CHKSEQ_REPLAY:
8918                                 /* This is a duplicate LOCK request */
8919                                 dup_lock = TRUE;
8920 
8921                                 /*
8922                                  * For a duplicate we do not want to
8923                                  * create a new lockowner as it should
8924                                  * already exist.
8925                                  * Turn off the lockowner create flag.
8926                                  */
8927                                 lcreate = FALSE;
8928                         }
8929                         break;
8930                 }
8931 
8932                 lo = rfs4_findlockowner(&olo->lock_owner, &lcreate);
8933                 if (lo == NULL) {
8934                         NFS4_DEBUG(rfs4_debug,
8935                             (CE_NOTE, "rfs4_op_lock: no lock owner"));
8936                         *cs->statusp = resp->status = NFS4ERR_RESOURCE;
8937                         goto end;
8938                 }
8939 
8940                 lsp = rfs4_findlo_state_by_owner(lo, sp, &create);
8941                 if (lsp == NULL) {
8942                         rfs4_update_lease(sp->rs_owner->ro_client);
8943                         /*
8944                          * Only update theh open_seqid if this is not
8945                          * a duplicate request
8946                          */
8947                         if (dup_lock == FALSE) {
8948                                 rfs4_update_open_sequence(sp->rs_owner);
8949                         }
8950 
8951                         NFS4_DEBUG(rfs4_debug,
8952                             (CE_NOTE, "rfs4_op_lock: no state"));
8953                         *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
8954                         rfs4_update_open_resp(sp->rs_owner, resop, NULL);
8955                         rfs4_lockowner_rele(lo);
8956                         goto end;
8957                 }
8958 
8959                 /*
8960                  * This is the new_lock_owner branch and the client is
8961                  * supposed to be associating a new lock_owner with
8962                  * the open file at this point.  If we find that a
8963                  * lock_owner/state association already exists and a
8964                  * successful LOCK request was returned to the client,
8965                  * an error is returned to the client since this is
8966                  * not appropriate.  The client should be using the
8967                  * existing lock_owner branch.
8968                  */
8969                 if (dup_lock == FALSE && create == FALSE) {
8970                         if (lsp->rls_lock_completed == TRUE) {
8971                                 *cs->statusp =
8972                                     resp->status = NFS4ERR_BAD_SEQID;
8973                                 rfs4_lockowner_rele(lo);
8974                                 goto end;
8975                         }
8976                 }
8977 
8978                 rfs4_update_lease(sp->rs_owner->ro_client);
8979 
8980                 /*
8981                  * Only update theh open_seqid if this is not
8982                  * a duplicate request
8983                  */
8984                 if (dup_lock == FALSE) {
8985                         rfs4_update_open_sequence(sp->rs_owner);
8986                 }
8987 
8988                 /*
8989                  * If this is a duplicate lock request, just copy the
8990                  * previously saved reply and return.
8991                  */
8992                 if (dup_lock == TRUE) {
8993                         /* verify that lock_seqid's match */
8994                         if (lsp->rls_seqid != olo->lock_seqid) {
8995                                 NFS4_DEBUG(rfs4_debug,
8996                                     (CE_NOTE, "rfs4_op_lock: Dup-Lock seqid bad"
8997                                     "lsp->seqid=%d old->seqid=%d",
8998                                     lsp->rls_seqid, olo->lock_seqid));
8999                                 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9000                         } else {
9001                                 rfs4_copy_reply(resop, &lsp->rls_reply);
9002                                 /*
9003                                  * Make sure to copy the just
9004                                  * retrieved reply status into the
9005                                  * overall compound status
9006                                  */
9007                                 *cs->statusp = resp->status;
9008                         }
9009                         rfs4_lockowner_rele(lo);
9010                         goto end;
9011                 }
9012 
9013                 rfs4_dbe_lock(lsp->rls_dbe);
9014 
9015                 /* Make sure to update the lock sequence id */
9016                 lsp->rls_seqid = olo->lock_seqid;
9017 
9018                 NFS4_DEBUG(rfs4_debug,
9019                     (CE_NOTE, "Lock seqid established as %d", lsp->rls_seqid));
9020 
9021                 /*
9022                  * This is used to signify the newly created lockowner
9023                  * stateid and its sequence number.  The checks for
9024                  * sequence number and increment don't occur on the
9025                  * very first lock request for a lockowner.
9026                  */
9027                 lsp->rls_skip_seqid_check = TRUE;
9028 
9029                 /* hold off other access to lsp while we tinker */
9030                 rfs4_sw_enter(&lsp->rls_sw);
9031                 ls_sw_held = TRUE;
9032 
9033                 rfs4_dbe_unlock(lsp->rls_dbe);
9034 
9035                 rfs4_lockowner_rele(lo);
9036         } else {
9037                 stateid = &args->locker.locker4_u.lock_owner.lock_stateid;
9038                 /* get lsp and hold the lock on the underlying file struct */
9039                 if ((status = rfs4_get_lo_state(stateid, &lsp, TRUE))
9040                     != NFS4_OK) {
9041                         *cs->statusp = resp->status = status;
9042                         DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
9043                             cs, LOCK4res *, resp);
9044                         return;
9045                 }
9046                 create = FALSE; /* We didn't create lsp */
9047 
9048                 /* Ensure specified filehandle matches */
9049                 if (cs->vp != lsp->rls_state->rs_finfo->rf_vp) {
9050                         rfs4_lo_state_rele(lsp, TRUE);
9051                         *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9052                         DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
9053                             cs, LOCK4res *, resp);
9054                         return;
9055                 }
9056 
9057                 /* hold off other access to lsp while we tinker */
9058                 rfs4_sw_enter(&lsp->rls_sw);
9059                 ls_sw_held = TRUE;
9060 
9061                 switch (rfs4_check_lo_stateid_seqid(lsp, stateid)) {
9062                 /*
9063                  * The stateid looks like it was okay (expected to be
9064                  * the next one)
9065                  */
9066                 case NFS4_CHECK_STATEID_OKAY:
9067                         /*
9068                          * The sequence id is now checked.  Determine
9069                          * if this is a replay or if it is in the
9070                          * expected (next) sequence.  In the case of a
9071                          * replay, there are two replay conditions
9072                          * that may occur.  The first is the normal
9073                          * condition where a LOCK is done with a
9074                          * NFS4_OK response and the stateid is
9075                          * updated.  That case is handled below when
9076                          * the stateid is identified as a REPLAY.  The
9077                          * second is the case where an error is
9078                          * returned, like NFS4ERR_DENIED, and the
9079                          * sequence number is updated but the stateid
9080                          * is not updated.  This second case is dealt
9081                          * with here.  So it may seem odd that the
9082                          * stateid is okay but the sequence id is a
9083                          * replay but it is okay.
9084                          */
9085                         switch (rfs4_check_lock_seqid(
9086                             args->locker.locker4_u.lock_owner.lock_seqid,
9087                             lsp, resop)) {
9088                         case NFS4_CHKSEQ_REPLAY:
9089                                 if (resp->status != NFS4_OK) {
9090                                         /*
9091                                          * Here is our replay and need
9092                                          * to verify that the last
9093                                          * response was an error.
9094                                          */
9095                                         *cs->statusp = resp->status;
9096                                         goto end;
9097                                 }
9098                                 /*
9099                                  * This is done since the sequence id
9100                                  * looked like a replay but it didn't
9101                                  * pass our check so a BAD_SEQID is
9102                                  * returned as a result.
9103                                  */
9104                                 /*FALLTHROUGH*/
9105                         case NFS4_CHKSEQ_BAD:
9106                                 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9107                                 goto end;
9108                         case NFS4_CHKSEQ_OKAY:
9109                                 /* Everything looks okay move ahead */
9110                                 break;
9111                         }
9112                         break;
9113                 case NFS4_CHECK_STATEID_OLD:
9114                         *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9115                         goto end;
9116                 case NFS4_CHECK_STATEID_BAD:
9117                         *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9118                         goto end;
9119                 case NFS4_CHECK_STATEID_EXPIRED:
9120                         *cs->statusp = resp->status = NFS4ERR_EXPIRED;
9121                         goto end;
9122                 case NFS4_CHECK_STATEID_CLOSED:
9123                         *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9124                         goto end;
9125                 case NFS4_CHECK_STATEID_REPLAY:
9126                         switch (rfs4_check_lock_seqid(
9127                             args->locker.locker4_u.lock_owner.lock_seqid,
9128                             lsp, resop)) {
9129                         case NFS4_CHKSEQ_OKAY:
9130                                 /*
9131                                  * This is a replayed stateid; if
9132                                  * seqid matches the next expected,
9133                                  * then client is using wrong seqid.
9134                                  */
9135                         case NFS4_CHKSEQ_BAD:
9136                                 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9137                                 goto end;
9138                         case NFS4_CHKSEQ_REPLAY:
9139                                 rfs4_update_lease(lsp->rls_locker->rl_client);
9140                                 *cs->statusp = status = resp->status;
9141                                 goto end;
9142                         }
9143                         break;
9144                 default:
9145                         ASSERT(FALSE);
9146                         break;
9147                 }
9148 
9149                 rfs4_update_lock_sequence(lsp);
9150                 rfs4_update_lease(lsp->rls_locker->rl_client);
9151         }
9152 
9153         /*
9154          * NFS4 only allows locking on regular files, so
9155          * verify type of object.
9156          */
9157         if (cs->vp->v_type != VREG) {
9158                 if (cs->vp->v_type == VDIR)
9159                         status = NFS4ERR_ISDIR;
9160                 else
9161                         status = NFS4ERR_INVAL;
9162                 goto out;
9163         }
9164 
9165         cp = lsp->rls_state->rs_owner->ro_client;
9166 
9167         if (rfs4_clnt_in_grace(cp) && !args->reclaim) {
9168                 status = NFS4ERR_GRACE;
9169                 goto out;
9170         }
9171 
9172         if (rfs4_clnt_in_grace(cp) && args->reclaim && !cp->rc_can_reclaim) {
9173                 status = NFS4ERR_NO_GRACE;
9174                 goto out;
9175         }
9176 
9177         if (!rfs4_clnt_in_grace(cp) && args->reclaim) {
9178                 status = NFS4ERR_NO_GRACE;
9179                 goto out;
9180         }
9181 
9182         if (lsp->rls_state->rs_finfo->rf_dinfo.rd_dtype == OPEN_DELEGATE_WRITE)
9183                 cs->deleg = TRUE;
9184 
9185         status = rfs4_do_lock(lsp, args->locktype,
9186             args->offset, args->length, cs->cr, resop);
9187 
9188 out:
9189         lsp->rls_skip_seqid_check = FALSE;
9190 
9191         *cs->statusp = resp->status = status;
9192 
9193         if (status == NFS4_OK) {
9194                 resp->LOCK4res_u.lock_stateid = lsp->rls_lockid.stateid;
9195                 lsp->rls_lock_completed = TRUE;
9196         }
9197         /*
9198          * Only update the "OPEN" response here if this was a new
9199          * lock_owner
9200          */
9201         if (sp)
9202                 rfs4_update_open_resp(sp->rs_owner, resop, NULL);
9203 
9204         rfs4_update_lock_resp(lsp, resop);
9205 
9206 end:
9207         if (lsp) {
9208                 if (ls_sw_held)
9209                         rfs4_sw_exit(&lsp->rls_sw);
9210                 /*
9211                  * If an sp obtained, then the lsp does not represent
9212                  * a lock on the file struct.
9213                  */
9214                 if (sp != NULL)
9215                         rfs4_lo_state_rele(lsp, FALSE);
9216                 else
9217                         rfs4_lo_state_rele(lsp, TRUE);
9218         }
9219         if (sp) {
9220                 rfs4_sw_exit(&sp->rs_owner->ro_sw);
9221                 rfs4_state_rele(sp);
9222         }
9223 
9224         DTRACE_NFSV4_2(op__lock__done, struct compound_state *, cs,
9225             LOCK4res *, resp);
9226 }
9227 
9228 /* free function for LOCK/LOCKT */
9229 static void
9230 lock_denied_free(nfs_resop4 *resop)
9231 {
9232         LOCK4denied *dp = NULL;
9233 
9234         switch (resop->resop) {
9235         case OP_LOCK:
9236                 if (resop->nfs_resop4_u.oplock.status == NFS4ERR_DENIED)
9237                         dp = &resop->nfs_resop4_u.oplock.LOCK4res_u.denied;
9238                 break;
9239         case OP_LOCKT:
9240                 if (resop->nfs_resop4_u.oplockt.status == NFS4ERR_DENIED)
9241                         dp = &resop->nfs_resop4_u.oplockt.denied;
9242                 break;
9243         default:
9244                 break;
9245         }
9246 
9247         if (dp)
9248                 kmem_free(dp->owner.owner_val, dp->owner.owner_len);
9249 }
9250 
9251 /*ARGSUSED*/
9252 void
9253 rfs4_op_locku(nfs_argop4 *argop, nfs_resop4 *resop,
9254     struct svc_req *req, struct compound_state *cs)
9255 {
9256         LOCKU4args *args = &argop->nfs_argop4_u.oplocku;
9257         LOCKU4res *resp = &resop->nfs_resop4_u.oplocku;
9258         nfsstat4 status;
9259         stateid4 *stateid = &args->lock_stateid;
9260         rfs4_lo_state_t *lsp;
9261 
9262         DTRACE_NFSV4_2(op__locku__start, struct compound_state *, cs,
9263             LOCKU4args *, args);
9264 
9265         if (cs->vp == NULL) {
9266                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
9267                 DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
9268                     LOCKU4res *, resp);
9269                 return;
9270         }
9271 
9272         if ((status = rfs4_get_lo_state(stateid, &lsp, TRUE)) != NFS4_OK) {
9273                 *cs->statusp = resp->status = status;
9274                 DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
9275                     LOCKU4res *, resp);
9276                 return;
9277         }
9278 
9279         /* Ensure specified filehandle matches */
9280         if (cs->vp != lsp->rls_state->rs_finfo->rf_vp) {
9281                 rfs4_lo_state_rele(lsp, TRUE);
9282                 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9283                 DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
9284                     LOCKU4res *, resp);
9285                 return;
9286         }
9287 
9288         /* hold off other access to lsp while we tinker */
9289         rfs4_sw_enter(&lsp->rls_sw);
9290 
9291         switch (rfs4_check_lo_stateid_seqid(lsp, stateid)) {
9292         case NFS4_CHECK_STATEID_OKAY:
9293                 if (rfs4_check_lock_seqid(args->seqid, lsp, resop)
9294                     != NFS4_CHKSEQ_OKAY) {
9295                         *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9296                         goto end;
9297                 }
9298                 break;
9299         case NFS4_CHECK_STATEID_OLD:
9300                 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9301                 goto end;
9302         case NFS4_CHECK_STATEID_BAD:
9303                 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9304                 goto end;
9305         case NFS4_CHECK_STATEID_EXPIRED:
9306                 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
9307                 goto end;
9308         case NFS4_CHECK_STATEID_CLOSED:
9309                 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9310                 goto end;
9311         case NFS4_CHECK_STATEID_REPLAY:
9312                 switch (rfs4_check_lock_seqid(args->seqid, lsp, resop)) {
9313                 case NFS4_CHKSEQ_OKAY:
9314                                 /*
9315                                  * This is a replayed stateid; if
9316                                  * seqid matches the next expected,
9317                                  * then client is using wrong seqid.
9318                                  */
9319                 case NFS4_CHKSEQ_BAD:
9320                         *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9321                         goto end;
9322                 case NFS4_CHKSEQ_REPLAY:
9323                         rfs4_update_lease(lsp->rls_locker->rl_client);
9324                         *cs->statusp = status = resp->status;
9325                         goto end;
9326                 }
9327                 break;
9328         default:
9329                 ASSERT(FALSE);
9330                 break;
9331         }
9332 
9333         rfs4_update_lock_sequence(lsp);
9334         rfs4_update_lease(lsp->rls_locker->rl_client);
9335 
9336         /*
9337          * NFS4 only allows locking on regular files, so
9338          * verify type of object.
9339          */
9340         if (cs->vp->v_type != VREG) {
9341                 if (cs->vp->v_type == VDIR)
9342                         status = NFS4ERR_ISDIR;
9343                 else
9344                         status = NFS4ERR_INVAL;
9345                 goto out;
9346         }
9347 
9348         if (rfs4_clnt_in_grace(lsp->rls_state->rs_owner->ro_client)) {
9349                 status = NFS4ERR_GRACE;
9350                 goto out;
9351         }
9352 
9353         status = rfs4_do_lock(lsp, args->locktype,
9354             args->offset, args->length, cs->cr, resop);
9355 
9356 out:
9357         *cs->statusp = resp->status = status;
9358 
9359         if (status == NFS4_OK)
9360                 resp->lock_stateid = lsp->rls_lockid.stateid;
9361 
9362         rfs4_update_lock_resp(lsp, resop);
9363 
9364 end:
9365         rfs4_sw_exit(&lsp->rls_sw);
9366         rfs4_lo_state_rele(lsp, TRUE);
9367 
9368         DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
9369             LOCKU4res *, resp);
9370 }
9371 
9372 /*
9373  * LOCKT is a best effort routine, the client can not be guaranteed that
9374  * the status return is still in effect by the time the reply is received.
9375  * They are numerous race conditions in this routine, but we are not required
9376  * and can not be accurate.
9377  */
9378 /*ARGSUSED*/
9379 void
9380 rfs4_op_lockt(nfs_argop4 *argop, nfs_resop4 *resop,
9381     struct svc_req *req, struct compound_state *cs)
9382 {
9383         LOCKT4args *args = &argop->nfs_argop4_u.oplockt;
9384         LOCKT4res *resp = &resop->nfs_resop4_u.oplockt;
9385         rfs4_lockowner_t *lo;
9386         rfs4_client_t *cp;
9387         bool_t create = FALSE;
9388         struct flock64 flk;
9389         int error;
9390         int flag = FREAD | FWRITE;
9391         int ltype;
9392         length4 posix_length;
9393         sysid_t sysid;
9394         pid_t pid;
9395 
9396         DTRACE_NFSV4_2(op__lockt__start, struct compound_state *, cs,
9397             LOCKT4args *, args);
9398 
9399         if (cs->vp == NULL) {
9400                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
9401                 goto out;
9402         }
9403 
9404         /*
9405          * NFS4 only allows locking on regular files, so
9406          * verify type of object.
9407          */
9408         if (cs->vp->v_type != VREG) {
9409                 if (cs->vp->v_type == VDIR)
9410                         *cs->statusp = resp->status = NFS4ERR_ISDIR;
9411                 else
9412                         *cs->statusp = resp->status =  NFS4ERR_INVAL;
9413                 goto out;
9414         }
9415 
9416         /*
9417          * Check out the clientid to ensure the server knows about it
9418          * so that we correctly inform the client of a server reboot.
9419          */
9420         if ((cp = rfs4_findclient_by_id(args->owner.clientid, FALSE))
9421             == NULL) {
9422                 *cs->statusp = resp->status =
9423                     rfs4_check_clientid(&args->owner.clientid, 0);
9424                 goto out;
9425         }
9426         if (rfs4_lease_expired(cp)) {
9427                 rfs4_client_close(cp);
9428                 /*
9429                  * Protocol doesn't allow returning NFS4ERR_STALE as
9430                  * other operations do on this check so STALE_CLIENTID
9431                  * is returned instead
9432                  */
9433                 *cs->statusp = resp->status = NFS4ERR_STALE_CLIENTID;
9434                 goto out;
9435         }
9436 
9437         if (rfs4_clnt_in_grace(cp) && !(cp->rc_can_reclaim)) {
9438                 *cs->statusp = resp->status = NFS4ERR_GRACE;
9439                 rfs4_client_rele(cp);
9440                 goto out;
9441         }
9442         rfs4_client_rele(cp);
9443 
9444         resp->status = NFS4_OK;
9445 
9446         switch (args->locktype) {
9447         case READ_LT:
9448         case READW_LT:
9449                 ltype = F_RDLCK;
9450                 break;
9451         case WRITE_LT:
9452         case WRITEW_LT:
9453                 ltype = F_WRLCK;
9454                 break;
9455         }
9456 
9457         posix_length = args->length;
9458         /* Check for zero length. To lock to end of file use all ones for V4 */
9459         if (posix_length == 0) {
9460                 *cs->statusp = resp->status = NFS4ERR_INVAL;
9461                 goto out;
9462         } else if (posix_length == (length4)(~0)) {
9463                 posix_length = 0;       /* Posix to end of file  */
9464         }
9465 
9466         /* Find or create a lockowner */
9467         lo = rfs4_findlockowner(&args->owner, &create);
9468 
9469         if (lo) {
9470                 pid = lo->rl_pid;
9471                 if ((resp->status =
9472                     rfs4_client_sysid(lo->rl_client, &sysid)) != NFS4_OK)
9473                         goto err;
9474         } else {
9475                 pid = 0;
9476                 sysid = lockt_sysid;
9477         }
9478 retry:
9479         flk.l_type = ltype;
9480         flk.l_whence = 0;               /* SEEK_SET */
9481         flk.l_start = args->offset;
9482         flk.l_len = posix_length;
9483         flk.l_sysid = sysid;
9484         flk.l_pid = pid;
9485         flag |= F_REMOTELOCK;
9486 
9487         LOCK_PRINT(rfs4_debug, "rfs4_op_lockt", F_GETLK, &flk);
9488 
9489         /* Note that length4 is uint64_t but l_len and l_start are off64_t */
9490         if (flk.l_len < 0 || flk.l_start < 0) {
9491                 resp->status = NFS4ERR_INVAL;
9492                 goto err;
9493         }
9494         error = VOP_FRLOCK(cs->vp, F_GETLK, &flk, flag, (u_offset_t)0,
9495             NULL, cs->cr, NULL);
9496 
9497         /*
9498          * N.B. We map error values to nfsv4 errors. This is differrent
9499          * than puterrno4 routine.
9500          */
9501         switch (error) {
9502         case 0:
9503                 if (flk.l_type == F_UNLCK)
9504                         resp->status = NFS4_OK;
9505                 else {
9506                         if (lock_denied(&resp->denied, &flk) == NFS4ERR_EXPIRED)
9507                                 goto retry;
9508                         resp->status = NFS4ERR_DENIED;
9509                 }
9510                 break;
9511         case EOVERFLOW:
9512                 resp->status = NFS4ERR_INVAL;
9513                 break;
9514         case EINVAL:
9515                 resp->status = NFS4ERR_NOTSUPP;
9516                 break;
9517         default:
9518                 cmn_err(CE_WARN, "rfs4_op_lockt: unexpected errno (%d)",
9519                     error);
9520                 resp->status = NFS4ERR_SERVERFAULT;
9521                 break;
9522         }
9523 
9524 err:
9525         if (lo)
9526                 rfs4_lockowner_rele(lo);
9527         *cs->statusp = resp->status;
9528 out:
9529         DTRACE_NFSV4_2(op__lockt__done, struct compound_state *, cs,
9530             LOCKT4res *, resp);
9531 }
9532 
9533 int
9534 rfs4_share(rfs4_state_t *sp, uint32_t access, uint32_t deny)
9535 {
9536         int err;
9537         int cmd;
9538         vnode_t *vp;
9539         struct shrlock shr;
9540         struct shr_locowner shr_loco;
9541         int fflags = 0;
9542 
9543         ASSERT(rfs4_dbe_islocked(sp->rs_dbe));
9544         ASSERT(sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID);
9545 
9546         if (sp->rs_closed)
9547                 return (NFS4ERR_OLD_STATEID);
9548 
9549         vp = sp->rs_finfo->rf_vp;
9550         ASSERT(vp);
9551 
9552         shr.s_access = shr.s_deny = 0;
9553 
9554         if (access & OPEN4_SHARE_ACCESS_READ) {
9555                 fflags |= FREAD;
9556                 shr.s_access |= F_RDACC;
9557         }
9558         if (access & OPEN4_SHARE_ACCESS_WRITE) {
9559                 fflags |= FWRITE;
9560                 shr.s_access |= F_WRACC;
9561         }
9562         ASSERT(shr.s_access);
9563 
9564         if (deny & OPEN4_SHARE_DENY_READ)
9565                 shr.s_deny |= F_RDDNY;
9566         if (deny & OPEN4_SHARE_DENY_WRITE)
9567                 shr.s_deny |= F_WRDNY;
9568 
9569         shr.s_pid = rfs4_dbe_getid(sp->rs_owner->ro_dbe);
9570         shr.s_sysid = sp->rs_owner->ro_client->rc_sysidt;
9571         shr_loco.sl_pid = shr.s_pid;
9572         shr_loco.sl_id = shr.s_sysid;
9573         shr.s_owner = (caddr_t)&shr_loco;
9574         shr.s_own_len = sizeof (shr_loco);
9575 
9576         cmd = nbl_need_check(vp) ? F_SHARE_NBMAND : F_SHARE;
9577 
9578         err = VOP_SHRLOCK(vp, cmd, &shr, fflags, CRED(), NULL);
9579         if (err != 0) {
9580                 if (err == EAGAIN)
9581                         err = NFS4ERR_SHARE_DENIED;
9582                 else
9583                         err = puterrno4(err);
9584                 return (err);
9585         }
9586 
9587         sp->rs_share_access |= access;
9588         sp->rs_share_deny |= deny;
9589 
9590         return (0);
9591 }
9592 
9593 int
9594 rfs4_unshare(rfs4_state_t *sp)
9595 {
9596         int err;
9597         struct shrlock shr;
9598         struct shr_locowner shr_loco;
9599 
9600         ASSERT(rfs4_dbe_islocked(sp->rs_dbe));
9601 
9602         if (sp->rs_closed || sp->rs_share_access == 0)
9603                 return (0);
9604 
9605         ASSERT(sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID);
9606         ASSERT(sp->rs_finfo->rf_vp);
9607 
9608         shr.s_access = shr.s_deny = 0;
9609         shr.s_pid = rfs4_dbe_getid(sp->rs_owner->ro_dbe);
9610         shr.s_sysid = sp->rs_owner->ro_client->rc_sysidt;
9611         shr_loco.sl_pid = shr.s_pid;
9612         shr_loco.sl_id = shr.s_sysid;
9613         shr.s_owner = (caddr_t)&shr_loco;
9614         shr.s_own_len = sizeof (shr_loco);
9615 
9616         err = VOP_SHRLOCK(sp->rs_finfo->rf_vp, F_UNSHARE, &shr, 0, CRED(),
9617             NULL);
9618         if (err != 0) {
9619                 err = puterrno4(err);
9620                 return (err);
9621         }
9622 
9623         sp->rs_share_access = 0;
9624         sp->rs_share_deny = 0;
9625 
9626         return (0);
9627 
9628 }
9629 
9630 static int
9631 rdma_setup_read_data4(READ4args *args, READ4res *rok)
9632 {
9633         struct clist    *wcl;
9634         count4          count = rok->data_len;
9635         int             wlist_len;
9636 
9637         wcl = args->wlist;
9638         if (rdma_setup_read_chunks(wcl, count, &wlist_len) == FALSE) {
9639                 return (FALSE);
9640         }
9641         wcl = args->wlist;
9642         rok->wlist_len = wlist_len;
9643         rok->wlist = wcl;
9644         return (TRUE);
9645 }
9646 
9647 /* tunable to disable server referrals */
9648 int rfs4_no_referrals = 0;
9649 
9650 /*
9651  * Find an NFS record in reparse point data.
9652  * Returns 0 for success and <0 or an errno value on failure.
9653  */
9654 int
9655 vn_find_nfs_record(vnode_t *vp, nvlist_t **nvlp, char **svcp, char **datap)
9656 {
9657         int err;
9658         char *stype, *val;
9659         nvlist_t *nvl;
9660         nvpair_t *curr;
9661 
9662         if ((nvl = reparse_init()) == NULL)
9663                 return (-1);
9664 
9665         if ((err = reparse_vnode_parse(vp, nvl)) != 0) {
9666                 reparse_free(nvl);
9667                 return (err);
9668         }
9669 
9670         curr = NULL;
9671         while ((curr = nvlist_next_nvpair(nvl, curr)) != NULL) {
9672                 if ((stype = nvpair_name(curr)) == NULL) {
9673                         reparse_free(nvl);
9674                         return (-2);
9675                 }
9676                 if (strncasecmp(stype, "NFS", 3) == 0)
9677                         break;
9678         }
9679 
9680         if ((curr == NULL) ||
9681             (nvpair_value_string(curr, &val))) {
9682                 reparse_free(nvl);
9683                 return (-3);
9684         }
9685         *nvlp = nvl;
9686         *svcp = stype;
9687         *datap = val;
9688         return (0);
9689 }
9690 
9691 int
9692 vn_is_nfs_reparse(vnode_t *vp, cred_t *cr)
9693 {
9694         nvlist_t *nvl;
9695         char *s, *d;
9696 
9697         if (rfs4_no_referrals != 0)
9698                 return (B_FALSE);
9699 
9700         if (vn_is_reparse(vp, cr, NULL) == B_FALSE)
9701                 return (B_FALSE);
9702 
9703         if (vn_find_nfs_record(vp, &nvl, &s, &d) != 0)
9704                 return (B_FALSE);
9705 
9706         reparse_free(nvl);
9707 
9708         return (B_TRUE);
9709 }
9710 
9711 /*
9712  * There is a user-level copy of this routine in ref_subr.c.
9713  * Changes should be kept in sync.
9714  */
9715 static int
9716 nfs4_create_components(char *path, component4 *comp4)
9717 {
9718         int slen, plen, ncomp;
9719         char *ori_path, *nxtc, buf[MAXNAMELEN];
9720 
9721         if (path == NULL)
9722                 return (0);
9723 
9724         plen = strlen(path) + 1;        /* include the terminator */
9725         ori_path = path;
9726         ncomp = 0;
9727 
9728         /* count number of components in the path */
9729         for (nxtc = path; nxtc < ori_path + plen; nxtc++) {
9730                 if (*nxtc == '/' || *nxtc == '\0' || *nxtc == '\n') {
9731                         if ((slen = nxtc - path) == 0) {
9732                                 path = nxtc + 1;
9733                                 continue;
9734                         }
9735 
9736                         if (comp4 != NULL) {
9737                                 bcopy(path, buf, slen);
9738                                 buf[slen] = '\0';
9739                                 (void) str_to_utf8(buf, &comp4[ncomp]);
9740                         }
9741 
9742                         ncomp++;        /* 1 valid component */
9743                         path = nxtc + 1;
9744                 }
9745                 if (*nxtc == '\0' || *nxtc == '\n')
9746                         break;
9747         }
9748 
9749         return (ncomp);
9750 }
9751 
9752 /*
9753  * There is a user-level copy of this routine in ref_subr.c.
9754  * Changes should be kept in sync.
9755  */
9756 static int
9757 make_pathname4(char *path, pathname4 *pathname)
9758 {
9759         int ncomp;
9760         component4 *comp4;
9761 
9762         if (pathname == NULL)
9763                 return (0);
9764 
9765         if (path == NULL) {
9766                 pathname->pathname4_val = NULL;
9767                 pathname->pathname4_len = 0;
9768                 return (0);
9769         }
9770 
9771         /* count number of components to alloc buffer */
9772         if ((ncomp = nfs4_create_components(path, NULL)) == 0) {
9773                 pathname->pathname4_val = NULL;
9774                 pathname->pathname4_len = 0;
9775                 return (0);
9776         }
9777         comp4 = kmem_zalloc(ncomp * sizeof (component4), KM_SLEEP);
9778 
9779         /* copy components into allocated buffer */
9780         ncomp = nfs4_create_components(path, comp4);
9781 
9782         pathname->pathname4_val = comp4;
9783         pathname->pathname4_len = ncomp;
9784 
9785         return (ncomp);
9786 }
9787 
9788 #define xdr_fs_locations4 xdr_fattr4_fs_locations
9789 
9790 fs_locations4 *
9791 fetch_referral(vnode_t *vp, cred_t *cr)
9792 {
9793         nvlist_t *nvl;
9794         char *stype, *sdata;
9795         fs_locations4 *result;
9796         char buf[1024];
9797         size_t bufsize;
9798         XDR xdr;
9799         int err;
9800 
9801         /*
9802          * Check attrs to ensure it's a reparse point
9803          */
9804         if (vn_is_reparse(vp, cr, NULL) == B_FALSE)
9805                 return (NULL);
9806 
9807         /*
9808          * Look for an NFS record and get the type and data
9809          */
9810         if (vn_find_nfs_record(vp, &nvl, &stype, &sdata) != 0)
9811                 return (NULL);
9812 
9813         /*
9814          * With the type and data, upcall to get the referral
9815          */
9816         bufsize = sizeof (buf);
9817         bzero(buf, sizeof (buf));
9818         err = reparse_kderef((const char *)stype, (const char *)sdata,
9819             buf, &bufsize);
9820         reparse_free(nvl);
9821 
9822         DTRACE_PROBE4(nfs4serv__func__referral__upcall,
9823             char *, stype, char *, sdata, char *, buf, int, err);
9824         if (err) {
9825                 cmn_err(CE_NOTE,
9826                     "reparsed daemon not running: unable to get referral (%d)",
9827                     err);
9828                 return (NULL);
9829         }
9830 
9831         /*
9832          * We get an XDR'ed record back from the kderef call
9833          */
9834         xdrmem_create(&xdr, buf, bufsize, XDR_DECODE);
9835         result = kmem_alloc(sizeof (fs_locations4), KM_SLEEP);
9836         err = xdr_fs_locations4(&xdr, result);
9837         XDR_DESTROY(&xdr);
9838         if (err != TRUE) {
9839                 DTRACE_PROBE1(nfs4serv__func__referral__upcall__xdrfail,
9840                     int, err);
9841                 return (NULL);
9842         }
9843 
9844         /*
9845          * Look at path to recover fs_root, ignoring the leading '/'
9846          */
9847         (void) make_pathname4(vp->v_path, &result->fs_root);
9848 
9849         return (result);
9850 }
9851 
9852 char *
9853 build_symlink(vnode_t *vp, cred_t *cr, size_t *strsz)
9854 {
9855         fs_locations4 *fsl;
9856         fs_location4 *fs;
9857         char *server, *path, *symbuf;
9858         static char *prefix = "/net/";
9859         int i, size, npaths;
9860         uint_t len;
9861 
9862         /* Get the referral */
9863         if ((fsl = fetch_referral(vp, cr)) == NULL)
9864                 return (NULL);
9865 
9866         /* Deal with only the first location and first server */
9867         fs = &fsl->locations_val[0];
9868         server = utf8_to_str(&fs->server_val[0], &len, NULL);
9869         if (server == NULL) {
9870                 rfs4_free_fs_locations4(fsl);
9871                 kmem_free(fsl, sizeof (fs_locations4));
9872                 return (NULL);
9873         }
9874 
9875         /* Figure out size for "/net/" + host + /path/path/path + NULL */
9876         size = strlen(prefix) + len;
9877         for (i = 0; i < fs->rootpath.pathname4_len; i++)
9878                 size += fs->rootpath.pathname4_val[i].utf8string_len + 1;
9879 
9880         /* Allocate the symlink buffer and fill it */
9881         symbuf = kmem_zalloc(size, KM_SLEEP);
9882         (void) strcat(symbuf, prefix);
9883         (void) strcat(symbuf, server);
9884         kmem_free(server, len);
9885 
9886         npaths = 0;
9887         for (i = 0; i < fs->rootpath.pathname4_len; i++) {
9888                 path = utf8_to_str(&fs->rootpath.pathname4_val[i], &len, NULL);
9889                 if (path == NULL)
9890                         continue;
9891                 (void) strcat(symbuf, "/");
9892                 (void) strcat(symbuf, path);
9893                 npaths++;
9894                 kmem_free(path, len);
9895         }
9896 
9897         rfs4_free_fs_locations4(fsl);
9898         kmem_free(fsl, sizeof (fs_locations4));
9899 
9900         if (strsz != NULL)
9901                 *strsz = size;
9902         return (symbuf);
9903 }
9904 
9905 /*
9906  * Check to see if we have a downrev Solaris client, so that we
9907  * can send it a symlink instead of a referral.
9908  */
9909 int
9910 client_is_downrev(struct svc_req *req)
9911 {
9912         struct sockaddr *ca;
9913         rfs4_clntip_t *ci;
9914         bool_t create = FALSE;
9915         int is_downrev;
9916 
9917         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
9918         ASSERT(ca);
9919         ci = rfs4_find_clntip(ca, &create);
9920         if (ci == NULL)
9921                 return (0);
9922         is_downrev = ci->ri_no_referrals;
9923         rfs4_dbe_rele(ci->ri_dbe);
9924         return (is_downrev);
9925 }
9926 
9927 /*
9928  * Do the main work of handling HA-NFSv4 Resource Group failover on
9929  * Sun Cluster.
9930  * We need to detect whether any RG admin paths have been added or removed,
9931  * and adjust resources accordingly.
9932  * Currently we're using a very inefficient algorithm, ~ 2 * O(n**2). In
9933  * order to scale, the list and array of paths need to be held in more
9934  * suitable data structures.
9935  */
9936 static void
9937 hanfsv4_failover(nfs4_srv_t *nsrv4)
9938 {
9939         int i, start_grace, numadded_paths = 0;
9940         char **added_paths = NULL;
9941         rfs4_dss_path_t *dss_path;
9942 
9943         /*
9944          * Note: currently, dss_pathlist cannot be NULL, since
9945          * it will always include an entry for NFS4_DSS_VAR_DIR. If we
9946          * make the latter dynamically specified too, the following will
9947          * need to be adjusted.
9948          */
9949 
9950         /*
9951          * First, look for removed paths: RGs that have been failed-over
9952          * away from this node.
9953          * Walk the "currently-serving" dss_pathlist and, for each
9954          * path, check if it is on the "passed-in" rfs4_dss_newpaths array
9955          * from nfsd. If not, that RG path has been removed.
9956          *
9957          * Note that nfsd has sorted rfs4_dss_newpaths for us, and removed
9958          * any duplicates.
9959          */
9960         dss_path = nsrv4->dss_pathlist;
9961         do {
9962                 int found = 0;
9963                 char *path = dss_path->path;
9964 
9965                 /* used only for non-HA so may not be removed */
9966                 if (strcmp(path, NFS4_DSS_VAR_DIR) == 0) {
9967                         dss_path = dss_path->next;
9968                         continue;
9969                 }
9970 
9971                 for (i = 0; i < rfs4_dss_numnewpaths; i++) {
9972                         int cmpret;
9973                         char *newpath = rfs4_dss_newpaths[i];
9974 
9975                         /*
9976                          * Since nfsd has sorted rfs4_dss_newpaths for us,
9977                          * once the return from strcmp is negative we know
9978                          * we've passed the point where "path" should be,
9979                          * and can stop searching: "path" has been removed.
9980                          */
9981                         cmpret = strcmp(path, newpath);
9982                         if (cmpret < 0)
9983                                 break;
9984                         if (cmpret == 0) {
9985                                 found = 1;
9986                                 break;
9987                         }
9988                 }
9989 
9990                 if (found == 0) {
9991                         unsigned index = dss_path->index;
9992                         rfs4_servinst_t *sip = dss_path->sip;
9993                         rfs4_dss_path_t *path_next = dss_path->next;
9994 
9995                         /*
9996                          * This path has been removed.
9997                          * We must clear out the servinst reference to
9998                          * it, since it's now owned by another
9999                          * node: we should not attempt to touch it.
10000                          */
10001                         ASSERT(dss_path == sip->dss_paths[index]);
10002                         sip->dss_paths[index] = NULL;
10003 
10004                         /* remove from "currently-serving" list, and destroy */
10005                         remque(dss_path);
10006                         /* allow for NUL */
10007                         kmem_free(dss_path->path, strlen(dss_path->path) + 1);
10008                         kmem_free(dss_path, sizeof (rfs4_dss_path_t));
10009 
10010                         dss_path = path_next;
10011                 } else {
10012                         /* path was found; not removed */
10013                         dss_path = dss_path->next;
10014                 }
10015         } while (dss_path != nsrv4->dss_pathlist);
10016 
10017         /*
10018          * Now, look for added paths: RGs that have been failed-over
10019          * to this node.
10020          * Walk the "passed-in" rfs4_dss_newpaths array from nfsd and,
10021          * for each path, check if it is on the "currently-serving"
10022          * dss_pathlist. If not, that RG path has been added.
10023          *
10024          * Note: we don't do duplicate detection here; nfsd does that for us.
10025          *
10026          * Note: numadded_paths <= rfs4_dss_numnewpaths, which gives us
10027          * an upper bound for the size needed for added_paths[numadded_paths].
10028          */
10029 
10030         /* probably more space than we need, but guaranteed to be enough */
10031         if (rfs4_dss_numnewpaths > 0) {
10032                 size_t sz = rfs4_dss_numnewpaths * sizeof (char *);
10033                 added_paths = kmem_zalloc(sz, KM_SLEEP);
10034         }
10035 
10036         /* walk the "passed-in" rfs4_dss_newpaths array from nfsd */
10037         for (i = 0; i < rfs4_dss_numnewpaths; i++) {
10038                 int found = 0;
10039                 char *newpath = rfs4_dss_newpaths[i];
10040 
10041                 dss_path = nsrv4->dss_pathlist;
10042                 do {
10043                         char *path = dss_path->path;
10044 
10045                         /* used only for non-HA */
10046                         if (strcmp(path, NFS4_DSS_VAR_DIR) == 0) {
10047                                 dss_path = dss_path->next;
10048                                 continue;
10049                         }
10050 
10051                         if (strncmp(path, newpath, strlen(path)) == 0) {
10052                                 found = 1;
10053                                 break;
10054                         }
10055 
10056                         dss_path = dss_path->next;
10057                 } while (dss_path != nsrv4->dss_pathlist);
10058 
10059                 if (found == 0) {
10060                         added_paths[numadded_paths] = newpath;
10061                         numadded_paths++;
10062                 }
10063         }
10064 
10065         /* did we find any added paths? */
10066         if (numadded_paths > 0) {
10067 
10068                 /* create a new server instance, and start its grace period */
10069                 start_grace = 1;
10070                 /* CSTYLED */
10071                 rfs4_servinst_create(nsrv4, start_grace, numadded_paths, added_paths);
10072 
10073                 /* read in the stable storage state from these paths */
10074                 rfs4_dss_readstate(nsrv4, numadded_paths, added_paths);
10075 
10076                 /*
10077                  * Multiple failovers during a grace period will cause
10078                  * clients of the same resource group to be partitioned
10079                  * into different server instances, with different
10080                  * grace periods.  Since clients of the same resource
10081                  * group must be subject to the same grace period,
10082                  * we need to reset all currently active grace periods.
10083                  */
10084                 rfs4_grace_reset_all(nsrv4);
10085         }
10086 
10087         if (rfs4_dss_numnewpaths > 0)
10088                 kmem_free(added_paths, rfs4_dss_numnewpaths * sizeof (char *));
10089 }