1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright 2016 Nexenta Systems, Inc.  All rights reserved.
  24  * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
  25  * Copyright (c) 2012, 2016 by Delphix. All rights reserved.
  26  */
  27 
  28 /*
  29  *      Copyright (c) 1983,1984,1985,1986,1987,1988,1989  AT&T.
  30  *      All Rights Reserved
  31  */
  32 
  33 #include <sys/param.h>
  34 #include <sys/types.h>
  35 #include <sys/systm.h>
  36 #include <sys/cred.h>
  37 #include <sys/buf.h>
  38 #include <sys/vfs.h>
  39 #include <sys/vfs_opreg.h>
  40 #include <sys/vnode.h>
  41 #include <sys/uio.h>
  42 #include <sys/errno.h>
  43 #include <sys/sysmacros.h>
  44 #include <sys/statvfs.h>
  45 #include <sys/kmem.h>
  46 #include <sys/dirent.h>
  47 #include <sys/cmn_err.h>
  48 #include <sys/debug.h>
  49 #include <sys/systeminfo.h>
  50 #include <sys/flock.h>
  51 #include <sys/pathname.h>
  52 #include <sys/nbmlock.h>
  53 #include <sys/share.h>
  54 #include <sys/atomic.h>
  55 #include <sys/policy.h>
  56 #include <sys/fem.h>
  57 #include <sys/sdt.h>
  58 #include <sys/ddi.h>
  59 #include <sys/zone.h>
  60 
  61 #include <fs/fs_reparse.h>
  62 
  63 #include <rpc/types.h>
  64 #include <rpc/auth.h>
  65 #include <rpc/rpcsec_gss.h>
  66 #include <rpc/svc.h>
  67 
  68 #include <nfs/nfs.h>
  69 #include <nfs/export.h>
  70 #include <nfs/nfs_cmd.h>
  71 #include <nfs/lm.h>
  72 #include <nfs/nfs4.h>
  73 
  74 #include <sys/strsubr.h>
  75 #include <sys/strsun.h>
  76 
  77 #include <inet/common.h>
  78 #include <inet/ip.h>
  79 #include <inet/ip6.h>
  80 
  81 #include <sys/tsol/label.h>
  82 #include <sys/tsol/tndb.h>
  83 
  84 #define RFS4_MAXLOCK_TRIES 4    /* Try to get the lock this many times */
  85 static int rfs4_maxlock_tries = RFS4_MAXLOCK_TRIES;
  86 #define RFS4_LOCK_DELAY 10      /* Milliseconds */
  87 static clock_t  rfs4_lock_delay = RFS4_LOCK_DELAY;
  88 extern struct svc_ops rdma_svc_ops;
  89 extern int nfs_loaned_buffers;
  90 /* End of Tunables */
  91 
  92 static int rdma_setup_read_data4(READ4args *, READ4res *);
  93 
  94 /*
  95  * Used to bump the stateid4.seqid value and show changes in the stateid
  96  */
  97 #define next_stateid(sp) (++(sp)->bits.chgseq)
  98 
  99 /*
 100  * RFS4_MINLEN_ENTRY4: XDR-encoded size of smallest possible dirent.
 101  *      This is used to return NFS4ERR_TOOSMALL when clients specify
 102  *      maxcount that isn't large enough to hold the smallest possible
 103  *      XDR encoded dirent.
 104  *
 105  *          sizeof cookie (8 bytes) +
 106  *          sizeof name_len (4 bytes) +
 107  *          sizeof smallest (padded) name (4 bytes) +
 108  *          sizeof bitmap4_len (12 bytes) +   NOTE: we always encode len=2 bm4
 109  *          sizeof attrlist4_len (4 bytes) +
 110  *          sizeof next boolean (4 bytes)
 111  *
 112  * RFS4_MINLEN_RDDIR4: XDR-encoded size of READDIR op reply containing
 113  * the smallest possible entry4 (assumes no attrs requested).
 114  *      sizeof nfsstat4 (4 bytes) +
 115  *      sizeof verifier4 (8 bytes) +
 116  *      sizeof entry4list bool (4 bytes) +
 117  *      sizeof entry4   (36 bytes) +
 118  *      sizeof eof bool  (4 bytes)
 119  *
 120  * RFS4_MINLEN_RDDIR_BUF: minimum length of buffer server will provide to
 121  *      VOP_READDIR.  Its value is the size of the maximum possible dirent
 122  *      for solaris.  The DIRENT64_RECLEN macro returns the size of dirent
 123  *      required for a given name length.  MAXNAMELEN is the maximum
 124  *      filename length allowed in Solaris.  The first two DIRENT64_RECLEN()
 125  *      macros are to allow for . and .. entries -- just a minor tweak to try
 126  *      and guarantee that buffer we give to VOP_READDIR will be large enough
 127  *      to hold ., .., and the largest possible solaris dirent64.
 128  */
 129 #define RFS4_MINLEN_ENTRY4 36
 130 #define RFS4_MINLEN_RDDIR4 (4 + NFS4_VERIFIER_SIZE + 4 + RFS4_MINLEN_ENTRY4 + 4)
 131 #define RFS4_MINLEN_RDDIR_BUF \
 132         (DIRENT64_RECLEN(1) + DIRENT64_RECLEN(2) + DIRENT64_RECLEN(MAXNAMELEN))
 133 
 134 /*
 135  * It would be better to pad to 4 bytes since that's what XDR would do,
 136  * but the dirents UFS gives us are already padded to 8, so just take
 137  * what we're given.  Dircount is only a hint anyway.  Currently the
 138  * solaris kernel is ASCII only, so there's no point in calling the
 139  * UTF8 functions.
 140  *
 141  * dirent64: named padded to provide 8 byte struct alignment
 142  *      d_ino(8) + d_off(8) + d_reclen(2) + d_name(namelen + null(1) + pad)
 143  *
 144  * cookie: uint64_t   +  utf8namelen: uint_t  +   utf8name padded to 8 bytes
 145  *
 146  */
 147 #define DIRENT64_TO_DIRCOUNT(dp) \
 148         (3 * BYTES_PER_XDR_UNIT + DIRENT64_NAMELEN((dp)->d_reclen))
 149 
 150 time_t rfs4_start_time;                 /* Initialized in rfs4_srvrinit */
 151 
 152 static sysid_t lockt_sysid;             /* dummy sysid for all LOCKT calls */
 153 
 154 u_longlong_t    nfs4_srv_caller_id;
 155 uint_t          nfs4_srv_vkey = 0;
 156 
 157 verifier4       Write4verf;
 158 verifier4       Readdir4verf;
 159 
 160 void    rfs4_init_compound_state(struct compound_state *);
 161 
 162 static void     nullfree(caddr_t);
 163 static void     rfs4_op_inval(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 164                         struct compound_state *);
 165 static void     rfs4_op_access(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 166                         struct compound_state *);
 167 static void     rfs4_op_close(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 168                         struct compound_state *);
 169 static void     rfs4_op_commit(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 170                         struct compound_state *);
 171 static void     rfs4_op_create(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 172                         struct compound_state *);
 173 static void     rfs4_op_create_free(nfs_resop4 *resop);
 174 static void     rfs4_op_delegreturn(nfs_argop4 *, nfs_resop4 *,
 175                         struct svc_req *, struct compound_state *);
 176 static void     rfs4_op_delegpurge(nfs_argop4 *, nfs_resop4 *,
 177                         struct svc_req *, struct compound_state *);
 178 static void     rfs4_op_getattr(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 179                         struct compound_state *);
 180 static void     rfs4_op_getattr_free(nfs_resop4 *);
 181 static void     rfs4_op_getfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 182                         struct compound_state *);
 183 static void     rfs4_op_getfh_free(nfs_resop4 *);
 184 static void     rfs4_op_illegal(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 185                         struct compound_state *);
 186 static void     rfs4_op_link(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 187                         struct compound_state *);
 188 static void     rfs4_op_lock(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 189                         struct compound_state *);
 190 static void     lock_denied_free(nfs_resop4 *);
 191 static void     rfs4_op_locku(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 192                         struct compound_state *);
 193 static void     rfs4_op_lockt(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 194                         struct compound_state *);
 195 static void     rfs4_op_lookup(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 196                         struct compound_state *);
 197 static void     rfs4_op_lookupp(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 198                         struct compound_state *);
 199 static void     rfs4_op_openattr(nfs_argop4 *argop, nfs_resop4 *resop,
 200                                 struct svc_req *req, struct compound_state *cs);
 201 static void     rfs4_op_nverify(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 202                         struct compound_state *);
 203 static void     rfs4_op_open(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 204                         struct compound_state *);
 205 static void     rfs4_op_open_confirm(nfs_argop4 *, nfs_resop4 *,
 206                         struct svc_req *, struct compound_state *);
 207 static void     rfs4_op_open_downgrade(nfs_argop4 *, nfs_resop4 *,
 208                         struct svc_req *, struct compound_state *);
 209 static void     rfs4_op_putfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 210                         struct compound_state *);
 211 static void     rfs4_op_putpubfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 212                         struct compound_state *);
 213 static void     rfs4_op_putrootfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 214                         struct compound_state *);
 215 static void     rfs4_op_read(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 216                         struct compound_state *);
 217 static void     rfs4_op_read_free(nfs_resop4 *);
 218 static void     rfs4_op_readdir_free(nfs_resop4 *resop);
 219 static void     rfs4_op_readlink(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 220                         struct compound_state *);
 221 static void     rfs4_op_readlink_free(nfs_resop4 *);
 222 static void     rfs4_op_release_lockowner(nfs_argop4 *, nfs_resop4 *,
 223                         struct svc_req *, struct compound_state *);
 224 static void     rfs4_op_remove(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 225                         struct compound_state *);
 226 static void     rfs4_op_rename(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 227                         struct compound_state *);
 228 static void     rfs4_op_renew(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 229                         struct compound_state *);
 230 static void     rfs4_op_restorefh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 231                         struct compound_state *);
 232 static void     rfs4_op_savefh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 233                         struct compound_state *);
 234 static void     rfs4_op_setattr(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 235                         struct compound_state *);
 236 static void     rfs4_op_verify(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 237                         struct compound_state *);
 238 static void     rfs4_op_write(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 239                         struct compound_state *);
 240 static void     rfs4_op_setclientid(nfs_argop4 *, nfs_resop4 *,
 241                         struct svc_req *, struct compound_state *);
 242 static void     rfs4_op_setclientid_confirm(nfs_argop4 *, nfs_resop4 *,
 243                         struct svc_req *req, struct compound_state *);
 244 static void     rfs4_op_secinfo(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 245                         struct compound_state *);
 246 static void     rfs4_op_secinfo_free(nfs_resop4 *);
 247 
 248 static nfsstat4 check_open_access(uint32_t,
 249                                 struct compound_state *, struct svc_req *);
 250 nfsstat4 rfs4_client_sysid(rfs4_client_t *, sysid_t *);
 251 void rfs4_ss_clid(rfs4_client_t *);
 252 
 253 /*
 254  * translation table for attrs
 255  */
 256 struct nfs4_ntov_table {
 257         union nfs4_attr_u *na;
 258         uint8_t amap[NFS4_MAXNUM_ATTRS];
 259         int attrcnt;
 260         bool_t vfsstat;
 261 };
 262 
 263 static void     nfs4_ntov_table_init(struct nfs4_ntov_table *ntovp);
 264 static void     nfs4_ntov_table_free(struct nfs4_ntov_table *ntovp,
 265                                     struct nfs4_svgetit_arg *sargp);
 266 
 267 static nfsstat4 do_rfs4_set_attrs(bitmap4 *resp, fattr4 *fattrp,
 268                     struct compound_state *cs, struct nfs4_svgetit_arg *sargp,
 269                     struct nfs4_ntov_table *ntovp, nfs4_attr_cmd_t cmd);
 270 
 271 fem_t           *deleg_rdops;
 272 fem_t           *deleg_wrops;
 273 
 274 rfs4_servinst_t *rfs4_cur_servinst = NULL;      /* current server instance */
 275 kmutex_t        rfs4_servinst_lock;     /* protects linked list */
 276 int             rfs4_seen_first_compound;       /* set first time we see one */
 277 
 278 /*
 279  * NFS4 op dispatch table
 280  */
 281 
 282 struct rfsv4disp {
 283         void    (*dis_proc)();          /* proc to call */
 284         void    (*dis_resfree)();       /* frees space allocated by proc */
 285         int     dis_flags;              /* RPC_IDEMPOTENT, etc... */
 286 };
 287 
 288 static struct rfsv4disp rfsv4disptab[] = {
 289         /*
 290          * NFS VERSION 4
 291          */
 292 
 293         /* RFS_NULL = 0 */
 294         {rfs4_op_illegal, nullfree, 0},
 295 
 296         /* UNUSED = 1 */
 297         {rfs4_op_illegal, nullfree, 0},
 298 
 299         /* UNUSED = 2 */
 300         {rfs4_op_illegal, nullfree, 0},
 301 
 302         /* OP_ACCESS = 3 */
 303         {rfs4_op_access, nullfree, RPC_IDEMPOTENT},
 304 
 305         /* OP_CLOSE = 4 */
 306         {rfs4_op_close, nullfree, 0},
 307 
 308         /* OP_COMMIT = 5 */
 309         {rfs4_op_commit, nullfree, RPC_IDEMPOTENT},
 310 
 311         /* OP_CREATE = 6 */
 312         {rfs4_op_create, nullfree, 0},
 313 
 314         /* OP_DELEGPURGE = 7 */
 315         {rfs4_op_delegpurge, nullfree, 0},
 316 
 317         /* OP_DELEGRETURN = 8 */
 318         {rfs4_op_delegreturn, nullfree, 0},
 319 
 320         /* OP_GETATTR = 9 */
 321         {rfs4_op_getattr, rfs4_op_getattr_free, RPC_IDEMPOTENT},
 322 
 323         /* OP_GETFH = 10 */
 324         {rfs4_op_getfh, rfs4_op_getfh_free, RPC_ALL},
 325 
 326         /* OP_LINK = 11 */
 327         {rfs4_op_link, nullfree, 0},
 328 
 329         /* OP_LOCK = 12 */
 330         {rfs4_op_lock, lock_denied_free, 0},
 331 
 332         /* OP_LOCKT = 13 */
 333         {rfs4_op_lockt, lock_denied_free, 0},
 334 
 335         /* OP_LOCKU = 14 */
 336         {rfs4_op_locku, nullfree, 0},
 337 
 338         /* OP_LOOKUP = 15 */
 339         {rfs4_op_lookup, nullfree, (RPC_IDEMPOTENT | RPC_PUBLICFH_OK)},
 340 
 341         /* OP_LOOKUPP = 16 */
 342         {rfs4_op_lookupp, nullfree, (RPC_IDEMPOTENT | RPC_PUBLICFH_OK)},
 343 
 344         /* OP_NVERIFY = 17 */
 345         {rfs4_op_nverify, nullfree, RPC_IDEMPOTENT},
 346 
 347         /* OP_OPEN = 18 */
 348         {rfs4_op_open, rfs4_free_reply, 0},
 349 
 350         /* OP_OPENATTR = 19 */
 351         {rfs4_op_openattr, nullfree, 0},
 352 
 353         /* OP_OPEN_CONFIRM = 20 */
 354         {rfs4_op_open_confirm, nullfree, 0},
 355 
 356         /* OP_OPEN_DOWNGRADE = 21 */
 357         {rfs4_op_open_downgrade, nullfree, 0},
 358 
 359         /* OP_OPEN_PUTFH = 22 */
 360         {rfs4_op_putfh, nullfree, RPC_ALL},
 361 
 362         /* OP_PUTPUBFH = 23 */
 363         {rfs4_op_putpubfh, nullfree, RPC_ALL},
 364 
 365         /* OP_PUTROOTFH = 24 */
 366         {rfs4_op_putrootfh, nullfree, RPC_ALL},
 367 
 368         /* OP_READ = 25 */
 369         {rfs4_op_read, rfs4_op_read_free, RPC_IDEMPOTENT},
 370 
 371         /* OP_READDIR = 26 */
 372         {rfs4_op_readdir, rfs4_op_readdir_free, RPC_IDEMPOTENT},
 373 
 374         /* OP_READLINK = 27 */
 375         {rfs4_op_readlink, rfs4_op_readlink_free, RPC_IDEMPOTENT},
 376 
 377         /* OP_REMOVE = 28 */
 378         {rfs4_op_remove, nullfree, 0},
 379 
 380         /* OP_RENAME = 29 */
 381         {rfs4_op_rename, nullfree, 0},
 382 
 383         /* OP_RENEW = 30 */
 384         {rfs4_op_renew, nullfree, 0},
 385 
 386         /* OP_RESTOREFH = 31 */
 387         {rfs4_op_restorefh, nullfree, RPC_ALL},
 388 
 389         /* OP_SAVEFH = 32 */
 390         {rfs4_op_savefh, nullfree, RPC_ALL},
 391 
 392         /* OP_SECINFO = 33 */
 393         {rfs4_op_secinfo, rfs4_op_secinfo_free, 0},
 394 
 395         /* OP_SETATTR = 34 */
 396         {rfs4_op_setattr, nullfree, 0},
 397 
 398         /* OP_SETCLIENTID = 35 */
 399         {rfs4_op_setclientid, nullfree, 0},
 400 
 401         /* OP_SETCLIENTID_CONFIRM = 36 */
 402         {rfs4_op_setclientid_confirm, nullfree, 0},
 403 
 404         /* OP_VERIFY = 37 */
 405         {rfs4_op_verify, nullfree, RPC_IDEMPOTENT},
 406 
 407         /* OP_WRITE = 38 */
 408         {rfs4_op_write, nullfree, 0},
 409 
 410         /* OP_RELEASE_LOCKOWNER = 39 */
 411         {rfs4_op_release_lockowner, nullfree, 0},
 412 };
 413 
 414 static uint_t rfsv4disp_cnt = sizeof (rfsv4disptab) / sizeof (rfsv4disptab[0]);
 415 
 416 #define OP_ILLEGAL_IDX (rfsv4disp_cnt)
 417 
 418 #ifdef DEBUG
 419 
 420 int             rfs4_fillone_debug = 0;
 421 int             rfs4_no_stub_access = 1;
 422 int             rfs4_rddir_debug = 0;
 423 
 424 static char    *rfs4_op_string[] = {
 425         "rfs4_op_null",
 426         "rfs4_op_1 unused",
 427         "rfs4_op_2 unused",
 428         "rfs4_op_access",
 429         "rfs4_op_close",
 430         "rfs4_op_commit",
 431         "rfs4_op_create",
 432         "rfs4_op_delegpurge",
 433         "rfs4_op_delegreturn",
 434         "rfs4_op_getattr",
 435         "rfs4_op_getfh",
 436         "rfs4_op_link",
 437         "rfs4_op_lock",
 438         "rfs4_op_lockt",
 439         "rfs4_op_locku",
 440         "rfs4_op_lookup",
 441         "rfs4_op_lookupp",
 442         "rfs4_op_nverify",
 443         "rfs4_op_open",
 444         "rfs4_op_openattr",
 445         "rfs4_op_open_confirm",
 446         "rfs4_op_open_downgrade",
 447         "rfs4_op_putfh",
 448         "rfs4_op_putpubfh",
 449         "rfs4_op_putrootfh",
 450         "rfs4_op_read",
 451         "rfs4_op_readdir",
 452         "rfs4_op_readlink",
 453         "rfs4_op_remove",
 454         "rfs4_op_rename",
 455         "rfs4_op_renew",
 456         "rfs4_op_restorefh",
 457         "rfs4_op_savefh",
 458         "rfs4_op_secinfo",
 459         "rfs4_op_setattr",
 460         "rfs4_op_setclientid",
 461         "rfs4_op_setclient_confirm",
 462         "rfs4_op_verify",
 463         "rfs4_op_write",
 464         "rfs4_op_release_lockowner",
 465         "rfs4_op_illegal"
 466 };
 467 #endif
 468 
 469 void    rfs4_ss_chkclid(rfs4_client_t *);
 470 
 471 extern size_t   strlcpy(char *dst, const char *src, size_t dstsize);
 472 
 473 extern void     rfs4_free_fs_locations4(fs_locations4 *);
 474 
 475 #ifdef  nextdp
 476 #undef nextdp
 477 #endif
 478 #define nextdp(dp)      ((struct dirent64 *)((char *)(dp) + (dp)->d_reclen))
 479 
 480 static const fs_operation_def_t nfs4_rd_deleg_tmpl[] = {
 481         VOPNAME_OPEN,           { .femop_open = deleg_rd_open },
 482         VOPNAME_WRITE,          { .femop_write = deleg_rd_write },
 483         VOPNAME_SETATTR,        { .femop_setattr = deleg_rd_setattr },
 484         VOPNAME_RWLOCK,         { .femop_rwlock = deleg_rd_rwlock },
 485         VOPNAME_SPACE,          { .femop_space = deleg_rd_space },
 486         VOPNAME_SETSECATTR,     { .femop_setsecattr = deleg_rd_setsecattr },
 487         VOPNAME_VNEVENT,        { .femop_vnevent = deleg_rd_vnevent },
 488         NULL,                   NULL
 489 };
 490 static const fs_operation_def_t nfs4_wr_deleg_tmpl[] = {
 491         VOPNAME_OPEN,           { .femop_open = deleg_wr_open },
 492         VOPNAME_READ,           { .femop_read = deleg_wr_read },
 493         VOPNAME_WRITE,          { .femop_write = deleg_wr_write },
 494         VOPNAME_SETATTR,        { .femop_setattr = deleg_wr_setattr },
 495         VOPNAME_RWLOCK,         { .femop_rwlock = deleg_wr_rwlock },
 496         VOPNAME_SPACE,          { .femop_space = deleg_wr_space },
 497         VOPNAME_SETSECATTR,     { .femop_setsecattr = deleg_wr_setsecattr },
 498         VOPNAME_VNEVENT,        { .femop_vnevent = deleg_wr_vnevent },
 499         NULL,                   NULL
 500 };
 501 
 502 int
 503 rfs4_srvrinit(void)
 504 {
 505         timespec32_t verf;
 506         int error;
 507         extern void rfs4_attr_init();
 508         extern krwlock_t rfs4_deleg_policy_lock;
 509 
 510         /*
 511          * The following algorithm attempts to find a unique verifier
 512          * to be used as the write verifier returned from the server
 513          * to the client.  It is important that this verifier change
 514          * whenever the server reboots.  Of secondary importance, it
 515          * is important for the verifier to be unique between two
 516          * different servers.
 517          *
 518          * Thus, an attempt is made to use the system hostid and the
 519          * current time in seconds when the nfssrv kernel module is
 520          * loaded.  It is assumed that an NFS server will not be able
 521          * to boot and then to reboot in less than a second.  If the
 522          * hostid has not been set, then the current high resolution
 523          * time is used.  This will ensure different verifiers each
 524          * time the server reboots and minimize the chances that two
 525          * different servers will have the same verifier.
 526          * XXX - this is broken on LP64 kernels.
 527          */
 528         verf.tv_sec = (time_t)zone_get_hostid(NULL);
 529         if (verf.tv_sec != 0) {
 530                 verf.tv_nsec = gethrestime_sec();
 531         } else {
 532                 timespec_t tverf;
 533 
 534                 gethrestime(&tverf);
 535                 verf.tv_sec = (time_t)tverf.tv_sec;
 536                 verf.tv_nsec = tverf.tv_nsec;
 537         }
 538 
 539         Write4verf = *(uint64_t *)&verf;
 540 
 541         rfs4_attr_init();
 542         mutex_init(&rfs4_deleg_lock, NULL, MUTEX_DEFAULT, NULL);
 543 
 544         /* Used to manage create/destroy of server state */
 545         mutex_init(&rfs4_state_lock, NULL, MUTEX_DEFAULT, NULL);
 546 
 547         /* Used to manage access to server instance linked list */
 548         mutex_init(&rfs4_servinst_lock, NULL, MUTEX_DEFAULT, NULL);
 549 
 550         /* Used to manage access to rfs4_deleg_policy */
 551         rw_init(&rfs4_deleg_policy_lock, NULL, RW_DEFAULT, NULL);
 552 
 553         error = fem_create("deleg_rdops", nfs4_rd_deleg_tmpl, &deleg_rdops);
 554         if (error != 0) {
 555                 rfs4_disable_delegation();
 556         } else {
 557                 error = fem_create("deleg_wrops", nfs4_wr_deleg_tmpl,
 558                     &deleg_wrops);
 559                 if (error != 0) {
 560                         rfs4_disable_delegation();
 561                         fem_free(deleg_rdops);
 562                 }
 563         }
 564 
 565         nfs4_srv_caller_id = fs_new_caller_id();
 566 
 567         lockt_sysid = lm_alloc_sysidt();
 568 
 569         vsd_create(&nfs4_srv_vkey, NULL);
 570 
 571         return (0);
 572 }
 573 
 574 void
 575 rfs4_srvrfini(void)
 576 {
 577         extern krwlock_t rfs4_deleg_policy_lock;
 578 
 579         if (lockt_sysid != LM_NOSYSID) {
 580                 lm_free_sysidt(lockt_sysid);
 581                 lockt_sysid = LM_NOSYSID;
 582         }
 583 
 584         mutex_destroy(&rfs4_deleg_lock);
 585         mutex_destroy(&rfs4_state_lock);
 586         rw_destroy(&rfs4_deleg_policy_lock);
 587 
 588         fem_free(deleg_rdops);
 589         fem_free(deleg_wrops);
 590 }
 591 
 592 void
 593 rfs4_init_compound_state(struct compound_state *cs)
 594 {
 595         bzero(cs, sizeof (*cs));
 596         cs->cont = TRUE;
 597         cs->access = CS_ACCESS_DENIED;
 598         cs->deleg = FALSE;
 599         cs->mandlock = FALSE;
 600         cs->fh.nfs_fh4_val = cs->fhbuf;
 601 }
 602 
 603 void
 604 rfs4_grace_start(rfs4_servinst_t *sip)
 605 {
 606         rw_enter(&sip->rwlock, RW_WRITER);
 607         sip->start_time = (time_t)TICK_TO_SEC(ddi_get_lbolt());
 608         sip->grace_period = rfs4_grace_period;
 609         rw_exit(&sip->rwlock);
 610 }
 611 
 612 /*
 613  * returns true if the instance's grace period has never been started
 614  */
 615 int
 616 rfs4_servinst_grace_new(rfs4_servinst_t *sip)
 617 {
 618         time_t start_time;
 619 
 620         rw_enter(&sip->rwlock, RW_READER);
 621         start_time = sip->start_time;
 622         rw_exit(&sip->rwlock);
 623 
 624         return (start_time == 0);
 625 }
 626 
 627 /*
 628  * Indicates if server instance is within the
 629  * grace period.
 630  */
 631 int
 632 rfs4_servinst_in_grace(rfs4_servinst_t *sip)
 633 {
 634         time_t grace_expiry;
 635 
 636         rw_enter(&sip->rwlock, RW_READER);
 637         grace_expiry = sip->start_time + sip->grace_period;
 638         rw_exit(&sip->rwlock);
 639 
 640         return (((time_t)TICK_TO_SEC(ddi_get_lbolt())) < grace_expiry);
 641 }
 642 
 643 int
 644 rfs4_clnt_in_grace(rfs4_client_t *cp)
 645 {
 646         ASSERT(rfs4_dbe_refcnt(cp->rc_dbe) > 0);
 647 
 648         return (rfs4_servinst_in_grace(cp->rc_server_instance));
 649 }
 650 
 651 /*
 652  * reset all currently active grace periods
 653  */
 654 void
 655 rfs4_grace_reset_all(void)
 656 {
 657         rfs4_servinst_t *sip;
 658 
 659         mutex_enter(&rfs4_servinst_lock);
 660         for (sip = rfs4_cur_servinst; sip != NULL; sip = sip->prev)
 661                 if (rfs4_servinst_in_grace(sip))
 662                         rfs4_grace_start(sip);
 663         mutex_exit(&rfs4_servinst_lock);
 664 }
 665 
 666 /*
 667  * start any new instances' grace periods
 668  */
 669 void
 670 rfs4_grace_start_new(void)
 671 {
 672         rfs4_servinst_t *sip;
 673 
 674         mutex_enter(&rfs4_servinst_lock);
 675         for (sip = rfs4_cur_servinst; sip != NULL; sip = sip->prev)
 676                 if (rfs4_servinst_grace_new(sip))
 677                         rfs4_grace_start(sip);
 678         mutex_exit(&rfs4_servinst_lock);
 679 }
 680 
 681 static rfs4_dss_path_t *
 682 rfs4_dss_newpath(rfs4_servinst_t *sip, char *path, unsigned index)
 683 {
 684         size_t len;
 685         rfs4_dss_path_t *dss_path;
 686 
 687         dss_path = kmem_alloc(sizeof (rfs4_dss_path_t), KM_SLEEP);
 688 
 689         /*
 690          * Take a copy of the string, since the original may be overwritten.
 691          * Sadly, no strdup() in the kernel.
 692          */
 693         /* allow for NUL */
 694         len = strlen(path) + 1;
 695         dss_path->path = kmem_alloc(len, KM_SLEEP);
 696         (void) strlcpy(dss_path->path, path, len);
 697 
 698         /* associate with servinst */
 699         dss_path->sip = sip;
 700         dss_path->index = index;
 701 
 702         /*
 703          * Add to list of served paths.
 704          * No locking required, as we're only ever called at startup.
 705          */
 706         if (rfs4_dss_pathlist == NULL) {
 707                 /* this is the first dss_path_t */
 708 
 709                 /* needed for insque/remque */
 710                 dss_path->next = dss_path->prev = dss_path;
 711 
 712                 rfs4_dss_pathlist = dss_path;
 713         } else {
 714                 insque(dss_path, rfs4_dss_pathlist);
 715         }
 716 
 717         return (dss_path);
 718 }
 719 
 720 /*
 721  * Create a new server instance, and make it the currently active instance.
 722  * Note that starting the grace period too early will reduce the clients'
 723  * recovery window.
 724  */
 725 void
 726 rfs4_servinst_create(int start_grace, int dss_npaths, char **dss_paths)
 727 {
 728         unsigned i;
 729         rfs4_servinst_t *sip;
 730         rfs4_oldstate_t *oldstate;
 731 
 732         sip = kmem_alloc(sizeof (rfs4_servinst_t), KM_SLEEP);
 733         rw_init(&sip->rwlock, NULL, RW_DEFAULT, NULL);
 734 
 735         sip->start_time = (time_t)0;
 736         sip->grace_period = (time_t)0;
 737         sip->next = NULL;
 738         sip->prev = NULL;
 739 
 740         rw_init(&sip->oldstate_lock, NULL, RW_DEFAULT, NULL);
 741         /*
 742          * This initial dummy entry is required to setup for insque/remque.
 743          * It must be skipped over whenever the list is traversed.
 744          */
 745         oldstate = kmem_alloc(sizeof (rfs4_oldstate_t), KM_SLEEP);
 746         /* insque/remque require initial list entry to be self-terminated */
 747         oldstate->next = oldstate;
 748         oldstate->prev = oldstate;
 749         sip->oldstate = oldstate;
 750 
 751 
 752         sip->dss_npaths = dss_npaths;
 753         sip->dss_paths = kmem_alloc(dss_npaths *
 754             sizeof (rfs4_dss_path_t *), KM_SLEEP);
 755 
 756         for (i = 0; i < dss_npaths; i++) {
 757                 sip->dss_paths[i] = rfs4_dss_newpath(sip, dss_paths[i], i);
 758         }
 759 
 760         mutex_enter(&rfs4_servinst_lock);
 761         if (rfs4_cur_servinst != NULL) {
 762                 /* add to linked list */
 763                 sip->prev = rfs4_cur_servinst;
 764                 rfs4_cur_servinst->next = sip;
 765         }
 766         if (start_grace)
 767                 rfs4_grace_start(sip);
 768         /* make the new instance "current" */
 769         rfs4_cur_servinst = sip;
 770 
 771         mutex_exit(&rfs4_servinst_lock);
 772 }
 773 
 774 /*
 775  * In future, we might add a rfs4_servinst_destroy(sip) but, for now, destroy
 776  * all instances directly.
 777  */
 778 void
 779 rfs4_servinst_destroy_all(void)
 780 {
 781         rfs4_servinst_t *sip, *prev, *current;
 782 #ifdef DEBUG
 783         int n = 0;
 784 #endif
 785 
 786         mutex_enter(&rfs4_servinst_lock);
 787         ASSERT(rfs4_cur_servinst != NULL);
 788         current = rfs4_cur_servinst;
 789         rfs4_cur_servinst = NULL;
 790         for (sip = current; sip != NULL; sip = prev) {
 791                 prev = sip->prev;
 792                 rw_destroy(&sip->rwlock);
 793                 if (sip->oldstate)
 794                         kmem_free(sip->oldstate, sizeof (rfs4_oldstate_t));
 795                 if (sip->dss_paths)
 796                         kmem_free(sip->dss_paths,
 797                             sip->dss_npaths * sizeof (rfs4_dss_path_t *));
 798                 kmem_free(sip, sizeof (rfs4_servinst_t));
 799 #ifdef DEBUG
 800                 n++;
 801 #endif
 802         }
 803         mutex_exit(&rfs4_servinst_lock);
 804 }
 805 
 806 /*
 807  * Assign the current server instance to a client_t.
 808  * Should be called with cp->rc_dbe held.
 809  */
 810 void
 811 rfs4_servinst_assign(rfs4_client_t *cp, rfs4_servinst_t *sip)
 812 {
 813         ASSERT(rfs4_dbe_refcnt(cp->rc_dbe) > 0);
 814 
 815         /*
 816          * The lock ensures that if the current instance is in the process
 817          * of changing, we will see the new one.
 818          */
 819         mutex_enter(&rfs4_servinst_lock);
 820         cp->rc_server_instance = sip;
 821         mutex_exit(&rfs4_servinst_lock);
 822 }
 823 
 824 rfs4_servinst_t *
 825 rfs4_servinst(rfs4_client_t *cp)
 826 {
 827         ASSERT(rfs4_dbe_refcnt(cp->rc_dbe) > 0);
 828 
 829         return (cp->rc_server_instance);
 830 }
 831 
 832 /* ARGSUSED */
 833 static void
 834 nullfree(caddr_t resop)
 835 {
 836 }
 837 
 838 /*
 839  * This is a fall-through for invalid or not implemented (yet) ops
 840  */
 841 /* ARGSUSED */
 842 static void
 843 rfs4_op_inval(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
 844     struct compound_state *cs)
 845 {
 846         *cs->statusp = *((nfsstat4 *)&(resop)->nfs_resop4_u) = NFS4ERR_INVAL;
 847 }
 848 
 849 /*
 850  * Check if the security flavor, nfsnum, is in the flavor_list.
 851  */
 852 bool_t
 853 in_flavor_list(int nfsnum, int *flavor_list, int count)
 854 {
 855         int i;
 856 
 857         for (i = 0; i < count; i++) {
 858                 if (nfsnum == flavor_list[i])
 859                         return (TRUE);
 860         }
 861         return (FALSE);
 862 }
 863 
 864 /*
 865  * Used by rfs4_op_secinfo to get the security information from the
 866  * export structure associated with the component.
 867  */
 868 /* ARGSUSED */
 869 static nfsstat4
 870 do_rfs4_op_secinfo(struct compound_state *cs, char *nm, SECINFO4res *resp)
 871 {
 872         int error, different_export = 0;
 873         vnode_t *dvp, *vp;
 874         struct exportinfo *exi = NULL;
 875         fid_t fid;
 876         uint_t count, i;
 877         secinfo4 *resok_val;
 878         struct secinfo *secp;
 879         seconfig_t *si;
 880         bool_t did_traverse = FALSE;
 881         int dotdot, walk;
 882 
 883         dvp = cs->vp;
 884         dotdot = (nm[0] == '.' && nm[1] == '.' && nm[2] == '\0');
 885 
 886         /*
 887          * If dotdotting, then need to check whether it's above the
 888          * root of a filesystem, or above an export point.
 889          */
 890         if (dotdot) {
 891 
 892                 /*
 893                  * If dotdotting at the root of a filesystem, then
 894                  * need to traverse back to the mounted-on filesystem
 895                  * and do the dotdot lookup there.
 896                  */
 897                 if (cs->vp->v_flag & VROOT) {
 898 
 899                         /*
 900                          * If at the system root, then can
 901                          * go up no further.
 902                          */
 903                         if (VN_CMP(dvp, rootdir))
 904                                 return (puterrno4(ENOENT));
 905 
 906                         /*
 907                          * Traverse back to the mounted-on filesystem
 908                          */
 909                         dvp = untraverse(cs->vp);
 910 
 911                         /*
 912                          * Set the different_export flag so we remember
 913                          * to pick up a new exportinfo entry for
 914                          * this new filesystem.
 915                          */
 916                         different_export = 1;
 917                 } else {
 918 
 919                         /*
 920                          * If dotdotting above an export point then set
 921                          * the different_export to get new export info.
 922                          */
 923                         different_export = nfs_exported(cs->exi, cs->vp);
 924                 }
 925         }
 926 
 927         /*
 928          * Get the vnode for the component "nm".
 929          */
 930         error = VOP_LOOKUP(dvp, nm, &vp, NULL, 0, NULL, cs->cr,
 931             NULL, NULL, NULL);
 932         if (error)
 933                 return (puterrno4(error));
 934 
 935         /*
 936          * If the vnode is in a pseudo filesystem, or if the security flavor
 937          * used in the request is valid but not an explicitly shared flavor,
 938          * or the access bit indicates that this is a limited access,
 939          * check whether this vnode is visible.
 940          */
 941         if (!different_export &&
 942             (PSEUDO(cs->exi) || ! is_exported_sec(cs->nfsflavor, cs->exi) ||
 943             cs->access & CS_ACCESS_LIMITED)) {
 944                 if (! nfs_visible(cs->exi, vp, &different_export)) {
 945                         VN_RELE(vp);
 946                         return (puterrno4(ENOENT));
 947                 }
 948         }
 949 
 950         /*
 951          * If it's a mountpoint, then traverse it.
 952          */
 953         if (vn_ismntpt(vp)) {
 954                 if ((error = traverse(&vp)) != 0) {
 955                         VN_RELE(vp);
 956                         return (puterrno4(error));
 957                 }
 958                 /* remember that we had to traverse mountpoint */
 959                 did_traverse = TRUE;
 960                 different_export = 1;
 961         } else if (vp->v_vfsp != dvp->v_vfsp) {
 962                 /*
 963                  * If vp isn't a mountpoint and the vfs ptrs aren't the same,
 964                  * then vp is probably an LOFS object.  We don't need the
 965                  * realvp, we just need to know that we might have crossed
 966                  * a server fs boundary and need to call checkexport4.
 967                  * (LOFS lookup hides server fs mountpoints, and actually calls
 968                  * traverse)
 969                  */
 970                 different_export = 1;
 971         }
 972 
 973         /*
 974          * Get the export information for it.
 975          */
 976         if (different_export) {
 977 
 978                 bzero(&fid, sizeof (fid));
 979                 fid.fid_len = MAXFIDSZ;
 980                 error = vop_fid_pseudo(vp, &fid);
 981                 if (error) {
 982                         VN_RELE(vp);
 983                         return (puterrno4(error));
 984                 }
 985 
 986                 if (dotdot)
 987                         exi = nfs_vptoexi(NULL, vp, cs->cr, &walk, NULL, TRUE);
 988                 else
 989                         exi = checkexport4(&vp->v_vfsp->vfs_fsid, &fid, vp);
 990 
 991                 if (exi == NULL) {
 992                         if (did_traverse == TRUE) {
 993                                 /*
 994                                  * If this vnode is a mounted-on vnode,
 995                                  * but the mounted-on file system is not
 996                                  * exported, send back the secinfo for
 997                                  * the exported node that the mounted-on
 998                                  * vnode lives in.
 999                                  */
1000                                 exi = cs->exi;
1001                         } else {
1002                                 VN_RELE(vp);
1003                                 return (puterrno4(EACCES));
1004                         }
1005                 }
1006         } else {
1007                 exi = cs->exi;
1008         }
1009         ASSERT(exi != NULL);
1010 
1011 
1012         /*
1013          * Create the secinfo result based on the security information
1014          * from the exportinfo structure (exi).
1015          *
1016          * Return all flavors for a pseudo node.
1017          * For a real export node, return the flavor that the client
1018          * has access with.
1019          */
1020         ASSERT(RW_LOCK_HELD(&exported_lock));
1021         if (PSEUDO(exi)) {
1022                 count = exi->exi_export.ex_seccnt; /* total sec count */
1023                 resok_val = kmem_alloc(count * sizeof (secinfo4), KM_SLEEP);
1024                 secp = exi->exi_export.ex_secinfo;
1025 
1026                 for (i = 0; i < count; i++) {
1027                         si = &secp[i].s_secinfo;
1028                         resok_val[i].flavor = si->sc_rpcnum;
1029                         if (resok_val[i].flavor == RPCSEC_GSS) {
1030                                 rpcsec_gss_info *info;
1031 
1032                                 info = &resok_val[i].flavor_info;
1033                                 info->qop = si->sc_qop;
1034                                 info->service = (rpc_gss_svc_t)si->sc_service;
1035 
1036                                 /* get oid opaque data */
1037                                 info->oid.sec_oid4_len =
1038                                     si->sc_gss_mech_type->length;
1039                                 info->oid.sec_oid4_val = kmem_alloc(
1040                                     si->sc_gss_mech_type->length, KM_SLEEP);
1041                                 bcopy(
1042                                     si->sc_gss_mech_type->elements,
1043                                     info->oid.sec_oid4_val,
1044                                     info->oid.sec_oid4_len);
1045                         }
1046                 }
1047                 resp->SECINFO4resok_len = count;
1048                 resp->SECINFO4resok_val = resok_val;
1049         } else {
1050                 int ret_cnt = 0, k = 0;
1051                 int *flavor_list;
1052 
1053                 count = exi->exi_export.ex_seccnt; /* total sec count */
1054                 secp = exi->exi_export.ex_secinfo;
1055 
1056                 flavor_list = kmem_alloc(count * sizeof (int), KM_SLEEP);
1057                 /* find out which flavors to return */
1058                 for (i = 0; i < count; i ++) {
1059                         int access, flavor, perm;
1060 
1061                         flavor = secp[i].s_secinfo.sc_nfsnum;
1062                         perm = secp[i].s_flags;
1063 
1064                         access = nfsauth4_secinfo_access(exi, cs->req,
1065                             flavor, perm, cs->basecr);
1066 
1067                         if (! (access & NFSAUTH_DENIED) &&
1068                             ! (access & NFSAUTH_WRONGSEC)) {
1069                                 flavor_list[ret_cnt] = flavor;
1070                                 ret_cnt++;
1071                         }
1072                 }
1073 
1074                 /* Create the returning SECINFO value */
1075                 resok_val = kmem_alloc(ret_cnt * sizeof (secinfo4), KM_SLEEP);
1076 
1077                 for (i = 0; i < count; i++) {
1078                         /*
1079                          * If the flavor is in the flavor list,
1080                          * fill in resok_val.
1081                          */
1082                         si = &secp[i].s_secinfo;
1083                         if (in_flavor_list(si->sc_nfsnum,
1084                             flavor_list, ret_cnt)) {
1085                                 resok_val[k].flavor = si->sc_rpcnum;
1086                                 if (resok_val[k].flavor == RPCSEC_GSS) {
1087                                         rpcsec_gss_info *info;
1088 
1089                                         info = &resok_val[k].flavor_info;
1090                                         info->qop = si->sc_qop;
1091                                         info->service = (rpc_gss_svc_t)
1092                                             si->sc_service;
1093 
1094                                         /* get oid opaque data */
1095                                         info->oid.sec_oid4_len =
1096                                             si->sc_gss_mech_type->length;
1097                                         info->oid.sec_oid4_val = kmem_alloc(
1098                                             si->sc_gss_mech_type->length,
1099                                             KM_SLEEP);
1100                                         bcopy(si->sc_gss_mech_type->elements,
1101                                             info->oid.sec_oid4_val,
1102                                             info->oid.sec_oid4_len);
1103                                 }
1104                                 k++;
1105                         }
1106                         if (k >= ret_cnt)
1107                                 break;
1108                 }
1109                 resp->SECINFO4resok_len = ret_cnt;
1110                 resp->SECINFO4resok_val = resok_val;
1111                 kmem_free(flavor_list, count * sizeof (int));
1112         }
1113 
1114         VN_RELE(vp);
1115         return (NFS4_OK);
1116 }
1117 
1118 /*
1119  * SECINFO (Operation 33): Obtain required security information on
1120  * the component name in the format of (security-mechanism-oid, qop, service)
1121  * triplets.
1122  */
1123 /* ARGSUSED */
1124 static void
1125 rfs4_op_secinfo(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1126     struct compound_state *cs)
1127 {
1128         SECINFO4args *args = &argop->nfs_argop4_u.opsecinfo;
1129         SECINFO4res *resp = &resop->nfs_resop4_u.opsecinfo;
1130         utf8string *utfnm = &args->name;
1131         uint_t len;
1132         char *nm;
1133         struct sockaddr *ca;
1134         char *name = NULL;
1135         nfsstat4 status = NFS4_OK;
1136 
1137         DTRACE_NFSV4_2(op__secinfo__start, struct compound_state *, cs,
1138             SECINFO4args *, args);
1139 
1140         /*
1141          * Current file handle (cfh) should have been set before getting
1142          * into this function. If not, return error.
1143          */
1144         if (cs->vp == NULL) {
1145                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1146                 goto out;
1147         }
1148 
1149         if (cs->vp->v_type != VDIR) {
1150                 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
1151                 goto out;
1152         }
1153 
1154         /*
1155          * Verify the component name. If failed, error out, but
1156          * do not error out if the component name is a "..".
1157          * SECINFO will return its parents secinfo data for SECINFO "..".
1158          */
1159         status = utf8_dir_verify(utfnm);
1160         if (status != NFS4_OK) {
1161                 if (utfnm->utf8string_len != 2 ||
1162                     utfnm->utf8string_val[0] != '.' ||
1163                     utfnm->utf8string_val[1] != '.') {
1164                         *cs->statusp = resp->status = status;
1165                         goto out;
1166                 }
1167         }
1168 
1169         nm = utf8_to_str(utfnm, &len, NULL);
1170         if (nm == NULL) {
1171                 *cs->statusp = resp->status = NFS4ERR_INVAL;
1172                 goto out;
1173         }
1174 
1175         if (len > MAXNAMELEN) {
1176                 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
1177                 kmem_free(nm, len);
1178                 goto out;
1179         }
1180 
1181         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
1182         name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
1183             MAXPATHLEN  + 1);
1184 
1185         if (name == NULL) {
1186                 *cs->statusp = resp->status = NFS4ERR_INVAL;
1187                 kmem_free(nm, len);
1188                 goto out;
1189         }
1190 
1191 
1192         *cs->statusp = resp->status = do_rfs4_op_secinfo(cs, name, resp);
1193 
1194         if (name != nm)
1195                 kmem_free(name, MAXPATHLEN + 1);
1196         kmem_free(nm, len);
1197 
1198 out:
1199         DTRACE_NFSV4_2(op__secinfo__done, struct compound_state *, cs,
1200             SECINFO4res *, resp);
1201 }
1202 
1203 /*
1204  * Free SECINFO result.
1205  */
1206 /* ARGSUSED */
1207 static void
1208 rfs4_op_secinfo_free(nfs_resop4 *resop)
1209 {
1210         SECINFO4res *resp = &resop->nfs_resop4_u.opsecinfo;
1211         int count, i;
1212         secinfo4 *resok_val;
1213 
1214         /* If this is not an Ok result, nothing to free. */
1215         if (resp->status != NFS4_OK) {
1216                 return;
1217         }
1218 
1219         count = resp->SECINFO4resok_len;
1220         resok_val = resp->SECINFO4resok_val;
1221 
1222         for (i = 0; i < count; i++) {
1223                 if (resok_val[i].flavor == RPCSEC_GSS) {
1224                         rpcsec_gss_info *info;
1225 
1226                         info = &resok_val[i].flavor_info;
1227                         kmem_free(info->oid.sec_oid4_val,
1228                             info->oid.sec_oid4_len);
1229                 }
1230         }
1231         kmem_free(resok_val, count * sizeof (secinfo4));
1232         resp->SECINFO4resok_len = 0;
1233         resp->SECINFO4resok_val = NULL;
1234 }
1235 
1236 /* ARGSUSED */
1237 static void
1238 rfs4_op_access(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1239     struct compound_state *cs)
1240 {
1241         ACCESS4args *args = &argop->nfs_argop4_u.opaccess;
1242         ACCESS4res *resp = &resop->nfs_resop4_u.opaccess;
1243         int error;
1244         vnode_t *vp;
1245         struct vattr va;
1246         int checkwriteperm;
1247         cred_t *cr = cs->cr;
1248         bslabel_t *clabel, *slabel;
1249         ts_label_t *tslabel;
1250         boolean_t admin_low_client;
1251 
1252         DTRACE_NFSV4_2(op__access__start, struct compound_state *, cs,
1253             ACCESS4args *, args);
1254 
1255 #if 0   /* XXX allow access even if !cs->access. Eventually only pseudo fs */
1256         if (cs->access == CS_ACCESS_DENIED) {
1257                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
1258                 goto out;
1259         }
1260 #endif
1261         if (cs->vp == NULL) {
1262                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1263                 goto out;
1264         }
1265 
1266         ASSERT(cr != NULL);
1267 
1268         vp = cs->vp;
1269 
1270         /*
1271          * If the file system is exported read only, it is not appropriate
1272          * to check write permissions for regular files and directories.
1273          * Special files are interpreted by the client, so the underlying
1274          * permissions are sent back to the client for interpretation.
1275          */
1276         if (rdonly4(req, cs) &&
1277             (vp->v_type == VREG || vp->v_type == VDIR))
1278                 checkwriteperm = 0;
1279         else
1280                 checkwriteperm = 1;
1281 
1282         /*
1283          * XXX
1284          * We need the mode so that we can correctly determine access
1285          * permissions relative to a mandatory lock file.  Access to
1286          * mandatory lock files is denied on the server, so it might
1287          * as well be reflected to the server during the open.
1288          */
1289         va.va_mask = AT_MODE;
1290         error = VOP_GETATTR(vp, &va, 0, cr, NULL);
1291         if (error) {
1292                 *cs->statusp = resp->status = puterrno4(error);
1293                 goto out;
1294         }
1295         resp->access = 0;
1296         resp->supported = 0;
1297 
1298         if (is_system_labeled()) {
1299                 ASSERT(req->rq_label != NULL);
1300                 clabel = req->rq_label;
1301                 DTRACE_PROBE2(tx__rfs4__log__info__opaccess__clabel, char *,
1302                     "got client label from request(1)",
1303                     struct svc_req *, req);
1304                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
1305                         if ((tslabel = nfs_getflabel(vp, cs->exi)) == NULL) {
1306                                 *cs->statusp = resp->status = puterrno4(EACCES);
1307                                 goto out;
1308                         }
1309                         slabel = label2bslabel(tslabel);
1310                         DTRACE_PROBE3(tx__rfs4__log__info__opaccess__slabel,
1311                             char *, "got server label(1) for vp(2)",
1312                             bslabel_t *, slabel, vnode_t *, vp);
1313 
1314                         admin_low_client = B_FALSE;
1315                 } else
1316                         admin_low_client = B_TRUE;
1317         }
1318 
1319         if (args->access & ACCESS4_READ) {
1320                 error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
1321                 if (!error && !MANDLOCK(vp, va.va_mode) &&
1322                     (!is_system_labeled() || admin_low_client ||
1323                     bldominates(clabel, slabel)))
1324                         resp->access |= ACCESS4_READ;
1325                 resp->supported |= ACCESS4_READ;
1326         }
1327         if ((args->access & ACCESS4_LOOKUP) && vp->v_type == VDIR) {
1328                 error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL);
1329                 if (!error && (!is_system_labeled() || admin_low_client ||
1330                     bldominates(clabel, slabel)))
1331                         resp->access |= ACCESS4_LOOKUP;
1332                 resp->supported |= ACCESS4_LOOKUP;
1333         }
1334         if (checkwriteperm &&
1335             (args->access & (ACCESS4_MODIFY|ACCESS4_EXTEND))) {
1336                 error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
1337                 if (!error && !MANDLOCK(vp, va.va_mode) &&
1338                     (!is_system_labeled() || admin_low_client ||
1339                     blequal(clabel, slabel)))
1340                         resp->access |=
1341                             (args->access & (ACCESS4_MODIFY | ACCESS4_EXTEND));
1342                 resp->supported |=
1343                     resp->access & (ACCESS4_MODIFY | ACCESS4_EXTEND);
1344         }
1345 
1346         if (checkwriteperm &&
1347             (args->access & ACCESS4_DELETE) && vp->v_type == VDIR) {
1348                 error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
1349                 if (!error && (!is_system_labeled() || admin_low_client ||
1350                     blequal(clabel, slabel)))
1351                         resp->access |= ACCESS4_DELETE;
1352                 resp->supported |= ACCESS4_DELETE;
1353         }
1354         if (args->access & ACCESS4_EXECUTE && vp->v_type != VDIR) {
1355                 error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL);
1356                 if (!error && !MANDLOCK(vp, va.va_mode) &&
1357                     (!is_system_labeled() || admin_low_client ||
1358                     bldominates(clabel, slabel)))
1359                         resp->access |= ACCESS4_EXECUTE;
1360                 resp->supported |= ACCESS4_EXECUTE;
1361         }
1362 
1363         if (is_system_labeled() && !admin_low_client)
1364                 label_rele(tslabel);
1365 
1366         *cs->statusp = resp->status = NFS4_OK;
1367 out:
1368         DTRACE_NFSV4_2(op__access__done, struct compound_state *, cs,
1369             ACCESS4res *, resp);
1370 }
1371 
1372 /* ARGSUSED */
1373 static void
1374 rfs4_op_commit(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1375     struct compound_state *cs)
1376 {
1377         COMMIT4args *args = &argop->nfs_argop4_u.opcommit;
1378         COMMIT4res *resp = &resop->nfs_resop4_u.opcommit;
1379         int error;
1380         vnode_t *vp = cs->vp;
1381         cred_t *cr = cs->cr;
1382         vattr_t va;
1383 
1384         DTRACE_NFSV4_2(op__commit__start, struct compound_state *, cs,
1385             COMMIT4args *, args);
1386 
1387         if (vp == NULL) {
1388                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1389                 goto out;
1390         }
1391         if (cs->access == CS_ACCESS_DENIED) {
1392                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
1393                 goto out;
1394         }
1395 
1396         if (args->offset + args->count < args->offset) {
1397                 *cs->statusp = resp->status = NFS4ERR_INVAL;
1398                 goto out;
1399         }
1400 
1401         va.va_mask = AT_UID;
1402         error = VOP_GETATTR(vp, &va, 0, cr, NULL);
1403 
1404         /*
1405          * If we can't get the attributes, then we can't do the
1406          * right access checking.  So, we'll fail the request.
1407          */
1408         if (error) {
1409                 *cs->statusp = resp->status = puterrno4(error);
1410                 goto out;
1411         }
1412         if (rdonly4(req, cs)) {
1413                 *cs->statusp = resp->status = NFS4ERR_ROFS;
1414                 goto out;
1415         }
1416 
1417         if (vp->v_type != VREG) {
1418                 if (vp->v_type == VDIR)
1419                         resp->status = NFS4ERR_ISDIR;
1420                 else
1421                         resp->status = NFS4ERR_INVAL;
1422                 *cs->statusp = resp->status;
1423                 goto out;
1424         }
1425 
1426         if (crgetuid(cr) != va.va_uid &&
1427             (error = VOP_ACCESS(vp, VWRITE, 0, cs->cr, NULL))) {
1428                 *cs->statusp = resp->status = puterrno4(error);
1429                 goto out;
1430         }
1431 
1432         error = VOP_FSYNC(vp, FSYNC, cr, NULL);
1433 
1434         if (error) {
1435                 *cs->statusp = resp->status = puterrno4(error);
1436                 goto out;
1437         }
1438 
1439         *cs->statusp = resp->status = NFS4_OK;
1440         resp->writeverf = Write4verf;
1441 out:
1442         DTRACE_NFSV4_2(op__commit__done, struct compound_state *, cs,
1443             COMMIT4res *, resp);
1444 }
1445 
1446 /*
1447  * rfs4_op_mknod is called from rfs4_op_create after all initial verification
1448  * was completed. It does the nfsv4 create for special files.
1449  */
1450 /* ARGSUSED */
1451 static vnode_t *
1452 do_rfs4_op_mknod(CREATE4args *args, CREATE4res *resp, struct svc_req *req,
1453     struct compound_state *cs, vattr_t *vap, char *nm)
1454 {
1455         int error;
1456         cred_t *cr = cs->cr;
1457         vnode_t *dvp = cs->vp;
1458         vnode_t *vp = NULL;
1459         int mode;
1460         enum vcexcl excl;
1461 
1462         switch (args->type) {
1463         case NF4CHR:
1464         case NF4BLK:
1465                 if (secpolicy_sys_devices(cr) != 0) {
1466                         *cs->statusp = resp->status = NFS4ERR_PERM;
1467                         return (NULL);
1468                 }
1469                 if (args->type == NF4CHR)
1470                         vap->va_type = VCHR;
1471                 else
1472                         vap->va_type = VBLK;
1473                 vap->va_rdev = makedevice(args->ftype4_u.devdata.specdata1,
1474                     args->ftype4_u.devdata.specdata2);
1475                 vap->va_mask |= AT_RDEV;
1476                 break;
1477         case NF4SOCK:
1478                 vap->va_type = VSOCK;
1479                 break;
1480         case NF4FIFO:
1481                 vap->va_type = VFIFO;
1482                 break;
1483         default:
1484                 *cs->statusp = resp->status = NFS4ERR_BADTYPE;
1485                 return (NULL);
1486         }
1487 
1488         /*
1489          * Must specify the mode.
1490          */
1491         if (!(vap->va_mask & AT_MODE)) {
1492                 *cs->statusp = resp->status = NFS4ERR_INVAL;
1493                 return (NULL);
1494         }
1495 
1496         excl = EXCL;
1497 
1498         mode = 0;
1499 
1500         error = VOP_CREATE(dvp, nm, vap, excl, mode, &vp, cr, 0, NULL, NULL);
1501         if (error) {
1502                 *cs->statusp = resp->status = puterrno4(error);
1503                 return (NULL);
1504         }
1505         return (vp);
1506 }
1507 
1508 /*
1509  * nfsv4 create is used to create non-regular files. For regular files,
1510  * use nfsv4 open.
1511  */
1512 /* ARGSUSED */
1513 static void
1514 rfs4_op_create(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1515     struct compound_state *cs)
1516 {
1517         CREATE4args *args = &argop->nfs_argop4_u.opcreate;
1518         CREATE4res *resp = &resop->nfs_resop4_u.opcreate;
1519         int error;
1520         struct vattr bva, iva, iva2, ava, *vap;
1521         cred_t *cr = cs->cr;
1522         vnode_t *dvp = cs->vp;
1523         vnode_t *vp = NULL;
1524         vnode_t *realvp;
1525         char *nm, *lnm;
1526         uint_t len, llen;
1527         int syncval = 0;
1528         struct nfs4_svgetit_arg sarg;
1529         struct nfs4_ntov_table ntov;
1530         struct statvfs64 sb;
1531         nfsstat4 status;
1532         struct sockaddr *ca;
1533         char *name = NULL;
1534         char *lname = NULL;
1535 
1536         DTRACE_NFSV4_2(op__create__start, struct compound_state *, cs,
1537             CREATE4args *, args);
1538 
1539         resp->attrset = 0;
1540 
1541         if (dvp == NULL) {
1542                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1543                 goto out;
1544         }
1545 
1546         /*
1547          * If there is an unshared filesystem mounted on this vnode,
1548          * do not allow to create an object in this directory.
1549          */
1550         if (vn_ismntpt(dvp)) {
1551                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
1552                 goto out;
1553         }
1554 
1555         /* Verify that type is correct */
1556         switch (args->type) {
1557         case NF4LNK:
1558         case NF4BLK:
1559         case NF4CHR:
1560         case NF4SOCK:
1561         case NF4FIFO:
1562         case NF4DIR:
1563                 break;
1564         default:
1565                 *cs->statusp = resp->status = NFS4ERR_BADTYPE;
1566                 goto out;
1567         };
1568 
1569         if (cs->access == CS_ACCESS_DENIED) {
1570                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
1571                 goto out;
1572         }
1573         if (dvp->v_type != VDIR) {
1574                 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
1575                 goto out;
1576         }
1577         status = utf8_dir_verify(&args->objname);
1578         if (status != NFS4_OK) {
1579                 *cs->statusp = resp->status = status;
1580                 goto out;
1581         }
1582 
1583         if (rdonly4(req, cs)) {
1584                 *cs->statusp = resp->status = NFS4ERR_ROFS;
1585                 goto out;
1586         }
1587 
1588         /*
1589          * Name of newly created object
1590          */
1591         nm = utf8_to_fn(&args->objname, &len, NULL);
1592         if (nm == NULL) {
1593                 *cs->statusp = resp->status = NFS4ERR_INVAL;
1594                 goto out;
1595         }
1596 
1597         if (len > MAXNAMELEN) {
1598                 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
1599                 kmem_free(nm, len);
1600                 goto out;
1601         }
1602 
1603         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
1604         name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
1605             MAXPATHLEN  + 1);
1606 
1607         if (name == NULL) {
1608                 *cs->statusp = resp->status = NFS4ERR_INVAL;
1609                 kmem_free(nm, len);
1610                 goto out;
1611         }
1612 
1613         resp->attrset = 0;
1614 
1615         sarg.sbp = &sb;
1616         sarg.is_referral = B_FALSE;
1617         nfs4_ntov_table_init(&ntov);
1618 
1619         status = do_rfs4_set_attrs(&resp->attrset,
1620             &args->createattrs, cs, &sarg, &ntov, NFS4ATTR_SETIT);
1621 
1622         if (sarg.vap->va_mask == 0 && status == NFS4_OK)
1623                 status = NFS4ERR_INVAL;
1624 
1625         if (status != NFS4_OK) {
1626                 *cs->statusp = resp->status = status;
1627                 if (name != nm)
1628                         kmem_free(name, MAXPATHLEN + 1);
1629                 kmem_free(nm, len);
1630                 nfs4_ntov_table_free(&ntov, &sarg);
1631                 resp->attrset = 0;
1632                 goto out;
1633         }
1634 
1635         /* Get "before" change value */
1636         bva.va_mask = AT_CTIME|AT_SEQ|AT_MODE;
1637         error = VOP_GETATTR(dvp, &bva, 0, cr, NULL);
1638         if (error) {
1639                 *cs->statusp = resp->status = puterrno4(error);
1640                 if (name != nm)
1641                         kmem_free(name, MAXPATHLEN + 1);
1642                 kmem_free(nm, len);
1643                 nfs4_ntov_table_free(&ntov, &sarg);
1644                 resp->attrset = 0;
1645                 goto out;
1646         }
1647         NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bva.va_ctime)
1648 
1649         vap = sarg.vap;
1650 
1651         /*
1652          * Set the default initial values for attributes when the parent
1653          * directory does not have the VSUID/VSGID bit set and they have
1654          * not been specified in createattrs.
1655          */
1656         if (!(bva.va_mode & VSUID) && (vap->va_mask & AT_UID) == 0) {
1657                 vap->va_uid = crgetuid(cr);
1658                 vap->va_mask |= AT_UID;
1659         }
1660         if (!(bva.va_mode & VSGID) && (vap->va_mask & AT_GID) == 0) {
1661                 vap->va_gid = crgetgid(cr);
1662                 vap->va_mask |= AT_GID;
1663         }
1664 
1665         vap->va_mask |= AT_TYPE;
1666         switch (args->type) {
1667         case NF4DIR:
1668                 vap->va_type = VDIR;
1669                 if ((vap->va_mask & AT_MODE) == 0) {
1670                         vap->va_mode = 0700; /* default: owner rwx only */
1671                         vap->va_mask |= AT_MODE;
1672                 }
1673                 error = VOP_MKDIR(dvp, name, vap, &vp, cr, NULL, 0, NULL);
1674                 if (error)
1675                         break;
1676 
1677                 /*
1678                  * Get the initial "after" sequence number, if it fails,
1679                  * set to zero
1680                  */
1681                 iva.va_mask = AT_SEQ;
1682                 if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL))
1683                         iva.va_seq = 0;
1684                 break;
1685         case NF4LNK:
1686                 vap->va_type = VLNK;
1687                 if ((vap->va_mask & AT_MODE) == 0) {
1688                         vap->va_mode = 0700; /* default: owner rwx only */
1689                         vap->va_mask |= AT_MODE;
1690                 }
1691 
1692                 /*
1693                  * symlink names must be treated as data
1694                  */
1695                 lnm = utf8_to_str((utf8string *)&args->ftype4_u.linkdata,
1696                     &llen, NULL);
1697 
1698                 if (lnm == NULL) {
1699                         *cs->statusp = resp->status = NFS4ERR_INVAL;
1700                         if (name != nm)
1701                                 kmem_free(name, MAXPATHLEN + 1);
1702                         kmem_free(nm, len);
1703                         nfs4_ntov_table_free(&ntov, &sarg);
1704                         resp->attrset = 0;
1705                         goto out;
1706                 }
1707 
1708                 if (llen > MAXPATHLEN) {
1709                         *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
1710                         if (name != nm)
1711                                 kmem_free(name, MAXPATHLEN + 1);
1712                         kmem_free(nm, len);
1713                         kmem_free(lnm, llen);
1714                         nfs4_ntov_table_free(&ntov, &sarg);
1715                         resp->attrset = 0;
1716                         goto out;
1717                 }
1718 
1719                 lname = nfscmd_convname(ca, cs->exi, lnm,
1720                     NFSCMD_CONV_INBOUND, MAXPATHLEN  + 1);
1721 
1722                 if (lname == NULL) {
1723                         *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
1724                         if (name != nm)
1725                                 kmem_free(name, MAXPATHLEN + 1);
1726                         kmem_free(nm, len);
1727                         kmem_free(lnm, llen);
1728                         nfs4_ntov_table_free(&ntov, &sarg);
1729                         resp->attrset = 0;
1730                         goto out;
1731                 }
1732 
1733                 error = VOP_SYMLINK(dvp, name, vap, lname, cr, NULL, 0);
1734                 if (lname != lnm)
1735                         kmem_free(lname, MAXPATHLEN + 1);
1736                 kmem_free(lnm, llen);
1737                 if (error)
1738                         break;
1739 
1740                 /*
1741                  * Get the initial "after" sequence number, if it fails,
1742                  * set to zero
1743                  */
1744                 iva.va_mask = AT_SEQ;
1745                 if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL))
1746                         iva.va_seq = 0;
1747 
1748                 error = VOP_LOOKUP(dvp, name, &vp, NULL, 0, NULL, cr,
1749                     NULL, NULL, NULL);
1750                 if (error)
1751                         break;
1752 
1753                 /*
1754                  * va_seq is not safe over VOP calls, check it again
1755                  * if it has changed zero out iva to force atomic = FALSE.
1756                  */
1757                 iva2.va_mask = AT_SEQ;
1758                 if (VOP_GETATTR(dvp, &iva2, 0, cs->cr, NULL) ||
1759                     iva2.va_seq != iva.va_seq)
1760                         iva.va_seq = 0;
1761                 break;
1762         default:
1763                 /*
1764                  * probably a special file.
1765                  */
1766                 if ((vap->va_mask & AT_MODE) == 0) {
1767                         vap->va_mode = 0600; /* default: owner rw only */
1768                         vap->va_mask |= AT_MODE;
1769                 }
1770                 syncval = FNODSYNC;
1771                 /*
1772                  * We know this will only generate one VOP call
1773                  */
1774                 vp = do_rfs4_op_mknod(args, resp, req, cs, vap, name);
1775 
1776                 if (vp == NULL) {
1777                         if (name != nm)
1778                                 kmem_free(name, MAXPATHLEN + 1);
1779                         kmem_free(nm, len);
1780                         nfs4_ntov_table_free(&ntov, &sarg);
1781                         resp->attrset = 0;
1782                         goto out;
1783                 }
1784 
1785                 /*
1786                  * Get the initial "after" sequence number, if it fails,
1787                  * set to zero
1788                  */
1789                 iva.va_mask = AT_SEQ;
1790                 if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL))
1791                         iva.va_seq = 0;
1792 
1793                 break;
1794         }
1795         if (name != nm)
1796                 kmem_free(name, MAXPATHLEN + 1);
1797         kmem_free(nm, len);
1798 
1799         if (error) {
1800                 *cs->statusp = resp->status = puterrno4(error);
1801         }
1802 
1803         /*
1804          * Force modified data and metadata out to stable storage.
1805          */
1806         (void) VOP_FSYNC(dvp, 0, cr, NULL);
1807 
1808         if (resp->status != NFS4_OK) {
1809                 if (vp != NULL)
1810                         VN_RELE(vp);
1811                 nfs4_ntov_table_free(&ntov, &sarg);
1812                 resp->attrset = 0;
1813                 goto out;
1814         }
1815 
1816         /*
1817          * Finish setup of cinfo response, "before" value already set.
1818          * Get "after" change value, if it fails, simply return the
1819          * before value.
1820          */
1821         ava.va_mask = AT_CTIME|AT_SEQ;
1822         if (VOP_GETATTR(dvp, &ava, 0, cr, NULL)) {
1823                 ava.va_ctime = bva.va_ctime;
1824                 ava.va_seq = 0;
1825         }
1826         NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, ava.va_ctime);
1827 
1828         /*
1829          * True verification that object was created with correct
1830          * attrs is impossible.  The attrs could have been changed
1831          * immediately after object creation.  If attributes did
1832          * not verify, the only recourse for the server is to
1833          * destroy the object.  Maybe if some attrs (like gid)
1834          * are set incorrectly, the object should be destroyed;
1835          * however, seems bad as a default policy.  Do we really
1836          * want to destroy an object over one of the times not
1837          * verifying correctly?  For these reasons, the server
1838          * currently sets bits in attrset for createattrs
1839          * that were set; however, no verification is done.
1840          *
1841          * vmask_to_nmask accounts for vattr bits set on create
1842          *      [do_rfs4_set_attrs() only sets resp bits for
1843          *       non-vattr/vfs bits.]
1844          * Mask off any bits set by default so as not to return
1845          * more attrset bits than were requested in createattrs
1846          */
1847         nfs4_vmask_to_nmask(sarg.vap->va_mask, &resp->attrset);
1848         resp->attrset &= args->createattrs.attrmask;
1849         nfs4_ntov_table_free(&ntov, &sarg);
1850 
1851         error = makefh4(&cs->fh, vp, cs->exi);
1852         if (error) {
1853                 *cs->statusp = resp->status = puterrno4(error);
1854         }
1855 
1856         /*
1857          * The cinfo.atomic = TRUE only if we got no errors, we have
1858          * non-zero va_seq's, and it has incremented by exactly one
1859          * during the creation and it didn't change during the VOP_LOOKUP
1860          * or VOP_FSYNC.
1861          */
1862         if (!error && bva.va_seq && iva.va_seq && ava.va_seq &&
1863             iva.va_seq == (bva.va_seq + 1) && iva.va_seq == ava.va_seq)
1864                 resp->cinfo.atomic = TRUE;
1865         else
1866                 resp->cinfo.atomic = FALSE;
1867 
1868         /*
1869          * Force modified metadata out to stable storage.
1870          *
1871          * if a underlying vp exists, pass it to VOP_FSYNC
1872          */
1873         if (VOP_REALVP(vp, &realvp, NULL) == 0)
1874                 (void) VOP_FSYNC(realvp, syncval, cr, NULL);
1875         else
1876                 (void) VOP_FSYNC(vp, syncval, cr, NULL);
1877 
1878         if (resp->status != NFS4_OK) {
1879                 VN_RELE(vp);
1880                 goto out;
1881         }
1882         if (cs->vp)
1883                 VN_RELE(cs->vp);
1884 
1885         cs->vp = vp;
1886         *cs->statusp = resp->status = NFS4_OK;
1887 out:
1888         DTRACE_NFSV4_2(op__create__done, struct compound_state *, cs,
1889             CREATE4res *, resp);
1890 }
1891 
1892 /*ARGSUSED*/
1893 static void
1894 rfs4_op_delegpurge(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1895     struct compound_state *cs)
1896 {
1897         DTRACE_NFSV4_2(op__delegpurge__start, struct compound_state *, cs,
1898             DELEGPURGE4args *, &argop->nfs_argop4_u.opdelegpurge);
1899 
1900         rfs4_op_inval(argop, resop, req, cs);
1901 
1902         DTRACE_NFSV4_2(op__delegpurge__done, struct compound_state *, cs,
1903             DELEGPURGE4res *, &resop->nfs_resop4_u.opdelegpurge);
1904 }
1905 
1906 /*ARGSUSED*/
1907 static void
1908 rfs4_op_delegreturn(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1909     struct compound_state *cs)
1910 {
1911         DELEGRETURN4args *args = &argop->nfs_argop4_u.opdelegreturn;
1912         DELEGRETURN4res *resp = &resop->nfs_resop4_u.opdelegreturn;
1913         rfs4_deleg_state_t *dsp;
1914         nfsstat4 status;
1915 
1916         DTRACE_NFSV4_2(op__delegreturn__start, struct compound_state *, cs,
1917             DELEGRETURN4args *, args);
1918 
1919         status = rfs4_get_deleg_state(&args->deleg_stateid, &dsp);
1920         resp->status = *cs->statusp = status;
1921         if (status != NFS4_OK)
1922                 goto out;
1923 
1924         /* Ensure specified filehandle matches */
1925         if (cs->vp != dsp->rds_finfo->rf_vp) {
1926                 resp->status = *cs->statusp = NFS4ERR_BAD_STATEID;
1927         } else
1928                 rfs4_return_deleg(dsp, FALSE);
1929 
1930         rfs4_update_lease(dsp->rds_client);
1931 
1932         rfs4_deleg_state_rele(dsp);
1933 out:
1934         DTRACE_NFSV4_2(op__delegreturn__done, struct compound_state *, cs,
1935             DELEGRETURN4res *, resp);
1936 }
1937 
1938 /*
1939  * Check to see if a given "flavor" is an explicitly shared flavor.
1940  * The assumption of this routine is the "flavor" is already a valid
1941  * flavor in the secinfo list of "exi".
1942  *
1943  *      e.g.
1944  *              # share -o sec=flavor1 /export
1945  *              # share -o sec=flavor2 /export/home
1946  *
1947  *              flavor2 is not an explicitly shared flavor for /export,
1948  *              however it is in the secinfo list for /export thru the
1949  *              server namespace setup.
1950  */
1951 int
1952 is_exported_sec(int flavor, struct exportinfo *exi)
1953 {
1954         int     i;
1955         struct secinfo *sp;
1956 
1957         sp = exi->exi_export.ex_secinfo;
1958         for (i = 0; i < exi->exi_export.ex_seccnt; i++) {
1959                 if (flavor == sp[i].s_secinfo.sc_nfsnum ||
1960                     sp[i].s_secinfo.sc_nfsnum == AUTH_NONE) {
1961                         return (SEC_REF_EXPORTED(&sp[i]));
1962                 }
1963         }
1964 
1965         /* Should not reach this point based on the assumption */
1966         return (0);
1967 }
1968 
1969 /*
1970  * Check if the security flavor used in the request matches what is
1971  * required at the export point or at the root pseudo node (exi_root).
1972  *
1973  * returns 1 if there's a match or if exported with AUTH_NONE; 0 otherwise.
1974  *
1975  */
1976 static int
1977 secinfo_match_or_authnone(struct compound_state *cs)
1978 {
1979         int     i;
1980         struct secinfo *sp;
1981 
1982         /*
1983          * Check cs->nfsflavor (from the request) against
1984          * the current export data in cs->exi.
1985          */
1986         sp = cs->exi->exi_export.ex_secinfo;
1987         for (i = 0; i < cs->exi->exi_export.ex_seccnt; i++) {
1988                 if (cs->nfsflavor == sp[i].s_secinfo.sc_nfsnum ||
1989                     sp[i].s_secinfo.sc_nfsnum == AUTH_NONE)
1990                         return (1);
1991         }
1992 
1993         return (0);
1994 }
1995 
1996 /*
1997  * Check the access authority for the client and return the correct error.
1998  */
1999 nfsstat4
2000 call_checkauth4(struct compound_state *cs, struct svc_req *req)
2001 {
2002         int     authres;
2003 
2004         /*
2005          * First, check if the security flavor used in the request
2006          * are among the flavors set in the server namespace.
2007          */
2008         if (!secinfo_match_or_authnone(cs)) {
2009                 *cs->statusp = NFS4ERR_WRONGSEC;
2010                 return (*cs->statusp);
2011         }
2012 
2013         authres = checkauth4(cs, req);
2014 
2015         if (authres > 0) {
2016                 *cs->statusp = NFS4_OK;
2017                 if (! (cs->access & CS_ACCESS_LIMITED))
2018                         cs->access = CS_ACCESS_OK;
2019         } else if (authres == 0) {
2020                 *cs->statusp = NFS4ERR_ACCESS;
2021         } else if (authres == -2) {
2022                 *cs->statusp = NFS4ERR_WRONGSEC;
2023         } else {
2024                 *cs->statusp = NFS4ERR_DELAY;
2025         }
2026         return (*cs->statusp);
2027 }
2028 
2029 /*
2030  * bitmap4_to_attrmask is called by getattr and readdir.
2031  * It sets up the vattr mask and determines whether vfsstat call is needed
2032  * based on the input bitmap.
2033  * Returns nfsv4 status.
2034  */
2035 static nfsstat4
2036 bitmap4_to_attrmask(bitmap4 breq, struct nfs4_svgetit_arg *sargp)
2037 {
2038         int i;
2039         uint_t  va_mask;
2040         struct statvfs64 *sbp = sargp->sbp;
2041 
2042         sargp->sbp = NULL;
2043         sargp->flag = 0;
2044         sargp->rdattr_error = NFS4_OK;
2045         sargp->mntdfid_set = FALSE;
2046         if (sargp->cs->vp)
2047                 sargp->xattr = get_fh4_flag(&sargp->cs->fh,
2048                     FH4_ATTRDIR | FH4_NAMEDATTR);
2049         else
2050                 sargp->xattr = 0;
2051 
2052         /*
2053          * Set rdattr_error_req to true if return error per
2054          * failed entry rather than fail the readdir.
2055          */
2056         if (breq & FATTR4_RDATTR_ERROR_MASK)
2057                 sargp->rdattr_error_req = 1;
2058         else
2059                 sargp->rdattr_error_req = 0;
2060 
2061         /*
2062          * generate the va_mask
2063          * Handle the easy cases first
2064          */
2065         switch (breq) {
2066         case NFS4_NTOV_ATTR_MASK:
2067                 sargp->vap->va_mask = NFS4_NTOV_ATTR_AT_MASK;
2068                 return (NFS4_OK);
2069 
2070         case NFS4_FS_ATTR_MASK:
2071                 sargp->vap->va_mask = NFS4_FS_ATTR_AT_MASK;
2072                 sargp->sbp = sbp;
2073                 return (NFS4_OK);
2074 
2075         case NFS4_NTOV_ATTR_CACHE_MASK:
2076                 sargp->vap->va_mask = NFS4_NTOV_ATTR_CACHE_AT_MASK;
2077                 return (NFS4_OK);
2078 
2079         case FATTR4_LEASE_TIME_MASK:
2080                 sargp->vap->va_mask = 0;
2081                 return (NFS4_OK);
2082 
2083         default:
2084                 va_mask = 0;
2085                 for (i = 0; i < nfs4_ntov_map_size; i++) {
2086                         if ((breq & nfs4_ntov_map[i].fbit) &&
2087                             nfs4_ntov_map[i].vbit)
2088                                 va_mask |= nfs4_ntov_map[i].vbit;
2089                 }
2090 
2091                 /*
2092                  * Check is vfsstat is needed
2093                  */
2094                 if (breq & NFS4_FS_ATTR_MASK)
2095                         sargp->sbp = sbp;
2096 
2097                 sargp->vap->va_mask = va_mask;
2098                 return (NFS4_OK);
2099         }
2100         /* NOTREACHED */
2101 }
2102 
2103 /*
2104  * bitmap4_get_sysattrs is called by getattr and readdir.
2105  * It calls both VOP_GETATTR and VFS_STATVFS calls to get the attrs.
2106  * Returns nfsv4 status.
2107  */
2108 static nfsstat4
2109 bitmap4_get_sysattrs(struct nfs4_svgetit_arg *sargp)
2110 {
2111         int error;
2112         struct compound_state *cs = sargp->cs;
2113         vnode_t *vp = cs->vp;
2114 
2115         if (sargp->sbp != NULL) {
2116                 if (error = VFS_STATVFS(vp->v_vfsp, sargp->sbp)) {
2117                         sargp->sbp = NULL;   /* to identify error */
2118                         return (puterrno4(error));
2119                 }
2120         }
2121 
2122         return (rfs4_vop_getattr(vp, sargp->vap, 0, cs->cr));
2123 }
2124 
2125 static void
2126 nfs4_ntov_table_init(struct nfs4_ntov_table *ntovp)
2127 {
2128         ntovp->na = kmem_zalloc(sizeof (union nfs4_attr_u) * nfs4_ntov_map_size,
2129             KM_SLEEP);
2130         ntovp->attrcnt = 0;
2131         ntovp->vfsstat = FALSE;
2132 }
2133 
2134 static void
2135 nfs4_ntov_table_free(struct nfs4_ntov_table *ntovp,
2136     struct nfs4_svgetit_arg *sargp)
2137 {
2138         int i;
2139         union nfs4_attr_u *na;
2140         uint8_t *amap;
2141 
2142         /*
2143          * XXX Should do the same checks for whether the bit is set
2144          */
2145         for (i = 0, na = ntovp->na, amap = ntovp->amap;
2146             i < ntovp->attrcnt; i++, na++, amap++) {
2147                 (void) (*nfs4_ntov_map[*amap].sv_getit)(
2148                     NFS4ATTR_FREEIT, sargp, na);
2149         }
2150         if ((sargp->op == NFS4ATTR_SETIT) || (sargp->op == NFS4ATTR_VERIT)) {
2151                 /*
2152                  * xdr_free for getattr will be done later
2153                  */
2154                 for (i = 0, na = ntovp->na, amap = ntovp->amap;
2155                     i < ntovp->attrcnt; i++, na++, amap++) {
2156                         xdr_free(nfs4_ntov_map[*amap].xfunc, (caddr_t)na);
2157                 }
2158         }
2159         kmem_free(ntovp->na, sizeof (union nfs4_attr_u) * nfs4_ntov_map_size);
2160 }
2161 
2162 /*
2163  * do_rfs4_op_getattr gets the system attrs and converts into fattr4.
2164  */
2165 static nfsstat4
2166 do_rfs4_op_getattr(bitmap4 breq, fattr4 *fattrp,
2167     struct nfs4_svgetit_arg *sargp)
2168 {
2169         int error = 0;
2170         int i, k;
2171         struct nfs4_ntov_table ntov;
2172         XDR xdr;
2173         ulong_t xdr_size;
2174         char *xdr_attrs;
2175         nfsstat4 status = NFS4_OK;
2176         nfsstat4 prev_rdattr_error = sargp->rdattr_error;
2177         union nfs4_attr_u *na;
2178         uint8_t *amap;
2179 
2180         sargp->op = NFS4ATTR_GETIT;
2181         sargp->flag = 0;
2182 
2183         fattrp->attrmask = 0;
2184         /* if no bits requested, then return empty fattr4 */
2185         if (breq == 0) {
2186                 fattrp->attrlist4_len = 0;
2187                 fattrp->attrlist4 = NULL;
2188                 return (NFS4_OK);
2189         }
2190 
2191         /*
2192          * return NFS4ERR_INVAL when client requests write-only attrs
2193          */
2194         if (breq & (FATTR4_TIME_ACCESS_SET_MASK | FATTR4_TIME_MODIFY_SET_MASK))
2195                 return (NFS4ERR_INVAL);
2196 
2197         nfs4_ntov_table_init(&ntov);
2198         na = ntov.na;
2199         amap = ntov.amap;
2200 
2201         /*
2202          * Now loop to get or verify the attrs
2203          */
2204         for (i = 0; i < nfs4_ntov_map_size; i++) {
2205                 if (breq & nfs4_ntov_map[i].fbit) {
2206                         if ((*nfs4_ntov_map[i].sv_getit)(
2207                             NFS4ATTR_SUPPORTED, sargp, NULL) == 0) {
2208 
2209                                 error = (*nfs4_ntov_map[i].sv_getit)(
2210                                     NFS4ATTR_GETIT, sargp, na);
2211 
2212                                 /*
2213                                  * Possible error values:
2214                                  * >0 if sv_getit failed to
2215                                  * get the attr; 0 if succeeded;
2216                                  * <0 if rdattr_error and the
2217                                  * attribute cannot be returned.
2218                                  */
2219                                 if (error && !(sargp->rdattr_error_req))
2220                                         goto done;
2221                                 /*
2222                                  * If error then just for entry
2223                                  */
2224                                 if (error == 0) {
2225                                         fattrp->attrmask |=
2226                                             nfs4_ntov_map[i].fbit;
2227                                         *amap++ =
2228                                             (uint8_t)nfs4_ntov_map[i].nval;
2229                                         na++;
2230                                         (ntov.attrcnt)++;
2231                                 } else if ((error > 0) &&
2232                                     (sargp->rdattr_error == NFS4_OK)) {
2233                                         sargp->rdattr_error = puterrno4(error);
2234                                 }
2235                                 error = 0;
2236                         }
2237                 }
2238         }
2239 
2240         /*
2241          * If rdattr_error was set after the return value for it was assigned,
2242          * update it.
2243          */
2244         if (prev_rdattr_error != sargp->rdattr_error) {
2245                 na = ntov.na;
2246                 amap = ntov.amap;
2247                 for (i = 0; i < ntov.attrcnt; i++, na++, amap++) {
2248                         k = *amap;
2249                         if (k < FATTR4_RDATTR_ERROR) {
2250                                 continue;
2251                         }
2252                         if ((k == FATTR4_RDATTR_ERROR) &&
2253                             ((*nfs4_ntov_map[k].sv_getit)(
2254                             NFS4ATTR_SUPPORTED, sargp, NULL) == 0)) {
2255 
2256                                 (void) (*nfs4_ntov_map[k].sv_getit)(
2257                                     NFS4ATTR_GETIT, sargp, na);
2258                         }
2259                         break;
2260                 }
2261         }
2262 
2263         xdr_size = 0;
2264         na = ntov.na;
2265         amap = ntov.amap;
2266         for (i = 0; i < ntov.attrcnt; i++, na++, amap++) {
2267                 xdr_size += xdr_sizeof(nfs4_ntov_map[*amap].xfunc, na);
2268         }
2269 
2270         fattrp->attrlist4_len = xdr_size;
2271         if (xdr_size) {
2272                 /* freed by rfs4_op_getattr_free() */
2273                 fattrp->attrlist4 = xdr_attrs = kmem_zalloc(xdr_size, KM_SLEEP);
2274 
2275                 xdrmem_create(&xdr, xdr_attrs, xdr_size, XDR_ENCODE);
2276 
2277                 na = ntov.na;
2278                 amap = ntov.amap;
2279                 for (i = 0; i < ntov.attrcnt; i++, na++, amap++) {
2280                         if (!(*nfs4_ntov_map[*amap].xfunc)(&xdr, na)) {
2281                                 DTRACE_PROBE1(nfss__e__getattr4_encfail,
2282                                     int, *amap);
2283                                 status = NFS4ERR_SERVERFAULT;
2284                                 break;
2285                         }
2286                 }
2287                 /* xdrmem_destroy(&xdrs); */        /* NO-OP */
2288         } else {
2289                 fattrp->attrlist4 = NULL;
2290         }
2291 done:
2292 
2293         nfs4_ntov_table_free(&ntov, sargp);
2294 
2295         if (error != 0)
2296                 status = puterrno4(error);
2297 
2298         return (status);
2299 }
2300 
2301 /* ARGSUSED */
2302 static void
2303 rfs4_op_getattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2304     struct compound_state *cs)
2305 {
2306         GETATTR4args *args = &argop->nfs_argop4_u.opgetattr;
2307         GETATTR4res *resp = &resop->nfs_resop4_u.opgetattr;
2308         struct nfs4_svgetit_arg sarg;
2309         struct statvfs64 sb;
2310         nfsstat4 status;
2311 
2312         DTRACE_NFSV4_2(op__getattr__start, struct compound_state *, cs,
2313             GETATTR4args *, args);
2314 
2315         if (cs->vp == NULL) {
2316                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2317                 goto out;
2318         }
2319 
2320         if (cs->access == CS_ACCESS_DENIED) {
2321                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
2322                 goto out;
2323         }
2324 
2325         sarg.sbp = &sb;
2326         sarg.cs = cs;
2327         sarg.is_referral = B_FALSE;
2328 
2329         status = bitmap4_to_attrmask(args->attr_request, &sarg);
2330         if (status == NFS4_OK) {
2331 
2332                 status = bitmap4_get_sysattrs(&sarg);
2333                 if (status == NFS4_OK) {
2334 
2335                         /* Is this a referral? */
2336                         if (vn_is_nfs_reparse(cs->vp, cs->cr)) {
2337                                 /* Older V4 Solaris client sees a link */
2338                                 if (client_is_downrev(req))
2339                                         sarg.vap->va_type = VLNK;
2340                                 else
2341                                         sarg.is_referral = B_TRUE;
2342                         }
2343 
2344                         status = do_rfs4_op_getattr(args->attr_request,
2345                             &resp->obj_attributes, &sarg);
2346                 }
2347         }
2348         *cs->statusp = resp->status = status;
2349 out:
2350         DTRACE_NFSV4_2(op__getattr__done, struct compound_state *, cs,
2351             GETATTR4res *, resp);
2352 }
2353 
2354 static void
2355 rfs4_op_getattr_free(nfs_resop4 *resop)
2356 {
2357         GETATTR4res *resp = &resop->nfs_resop4_u.opgetattr;
2358 
2359         nfs4_fattr4_free(&resp->obj_attributes);
2360 }
2361 
2362 /* ARGSUSED */
2363 static void
2364 rfs4_op_getfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2365     struct compound_state *cs)
2366 {
2367         GETFH4res *resp = &resop->nfs_resop4_u.opgetfh;
2368 
2369         DTRACE_NFSV4_1(op__getfh__start, struct compound_state *, cs);
2370 
2371         if (cs->vp == NULL) {
2372                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2373                 goto out;
2374         }
2375         if (cs->access == CS_ACCESS_DENIED) {
2376                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
2377                 goto out;
2378         }
2379 
2380         /* check for reparse point at the share point */
2381         if (cs->exi->exi_moved || vn_is_nfs_reparse(cs->exi->exi_vp, cs->cr)) {
2382                 /* it's all bad */
2383                 cs->exi->exi_moved = 1;
2384                 *cs->statusp = resp->status = NFS4ERR_MOVED;
2385                 DTRACE_PROBE2(nfs4serv__func__referral__shared__moved,
2386                     vnode_t *, cs->vp, char *, "rfs4_op_getfh");
2387                 return;
2388         }
2389 
2390         /* check for reparse point at vp */
2391         if (vn_is_nfs_reparse(cs->vp, cs->cr) && !client_is_downrev(req)) {
2392                 /* it's not all bad */
2393                 *cs->statusp = resp->status = NFS4ERR_MOVED;
2394                 DTRACE_PROBE2(nfs4serv__func__referral__moved,
2395                     vnode_t *, cs->vp, char *, "rfs4_op_getfh");
2396                 return;
2397         }
2398 
2399         resp->object.nfs_fh4_val =
2400             kmem_alloc(cs->fh.nfs_fh4_len, KM_SLEEP);
2401         nfs_fh4_copy(&cs->fh, &resp->object);
2402         *cs->statusp = resp->status = NFS4_OK;
2403 out:
2404         DTRACE_NFSV4_2(op__getfh__done, struct compound_state *, cs,
2405             GETFH4res *, resp);
2406 }
2407 
2408 static void
2409 rfs4_op_getfh_free(nfs_resop4 *resop)
2410 {
2411         GETFH4res *resp = &resop->nfs_resop4_u.opgetfh;
2412 
2413         if (resp->status == NFS4_OK &&
2414             resp->object.nfs_fh4_val != NULL) {
2415                 kmem_free(resp->object.nfs_fh4_val, resp->object.nfs_fh4_len);
2416                 resp->object.nfs_fh4_val = NULL;
2417                 resp->object.nfs_fh4_len = 0;
2418         }
2419 }
2420 
2421 /*
2422  * illegal: args: void
2423  *          res : status (NFS4ERR_OP_ILLEGAL)
2424  */
2425 /* ARGSUSED */
2426 static void
2427 rfs4_op_illegal(nfs_argop4 *argop, nfs_resop4 *resop,
2428     struct svc_req *req, struct compound_state *cs)
2429 {
2430         ILLEGAL4res *resp = &resop->nfs_resop4_u.opillegal;
2431 
2432         resop->resop = OP_ILLEGAL;
2433         *cs->statusp = resp->status = NFS4ERR_OP_ILLEGAL;
2434 }
2435 
2436 /*
2437  * link: args: SAVED_FH: file, CURRENT_FH: target directory
2438  *       res: status. If success - CURRENT_FH unchanged, return change_info
2439  */
2440 /* ARGSUSED */
2441 static void
2442 rfs4_op_link(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2443     struct compound_state *cs)
2444 {
2445         LINK4args *args = &argop->nfs_argop4_u.oplink;
2446         LINK4res *resp = &resop->nfs_resop4_u.oplink;
2447         int error;
2448         vnode_t *vp;
2449         vnode_t *dvp;
2450         struct vattr bdva, idva, adva;
2451         char *nm;
2452         uint_t  len;
2453         struct sockaddr *ca;
2454         char *name = NULL;
2455         nfsstat4 status;
2456 
2457         DTRACE_NFSV4_2(op__link__start, struct compound_state *, cs,
2458             LINK4args *, args);
2459 
2460         /* SAVED_FH: source object */
2461         vp = cs->saved_vp;
2462         if (vp == NULL) {
2463                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2464                 goto out;
2465         }
2466 
2467         /* CURRENT_FH: target directory */
2468         dvp = cs->vp;
2469         if (dvp == NULL) {
2470                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2471                 goto out;
2472         }
2473 
2474         /*
2475          * If there is a non-shared filesystem mounted on this vnode,
2476          * do not allow to link any file in this directory.
2477          */
2478         if (vn_ismntpt(dvp)) {
2479                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
2480                 goto out;
2481         }
2482 
2483         if (cs->access == CS_ACCESS_DENIED) {
2484                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
2485                 goto out;
2486         }
2487 
2488         /* Check source object's type validity */
2489         if (vp->v_type == VDIR) {
2490                 *cs->statusp = resp->status = NFS4ERR_ISDIR;
2491                 goto out;
2492         }
2493 
2494         /* Check target directory's type */
2495         if (dvp->v_type != VDIR) {
2496                 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
2497                 goto out;
2498         }
2499 
2500         if (cs->saved_exi != cs->exi) {
2501                 *cs->statusp = resp->status = NFS4ERR_XDEV;
2502                 goto out;
2503         }
2504 
2505         status = utf8_dir_verify(&args->newname);
2506         if (status != NFS4_OK) {
2507                 *cs->statusp = resp->status = status;
2508                 goto out;
2509         }
2510 
2511         nm = utf8_to_fn(&args->newname, &len, NULL);
2512         if (nm == NULL) {
2513                 *cs->statusp = resp->status = NFS4ERR_INVAL;
2514                 goto out;
2515         }
2516 
2517         if (len > MAXNAMELEN) {
2518                 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
2519                 kmem_free(nm, len);
2520                 goto out;
2521         }
2522 
2523         if (rdonly4(req, cs)) {
2524                 *cs->statusp = resp->status = NFS4ERR_ROFS;
2525                 kmem_free(nm, len);
2526                 goto out;
2527         }
2528 
2529         /* Get "before" change value */
2530         bdva.va_mask = AT_CTIME|AT_SEQ;
2531         error = VOP_GETATTR(dvp, &bdva, 0, cs->cr, NULL);
2532         if (error) {
2533                 *cs->statusp = resp->status = puterrno4(error);
2534                 kmem_free(nm, len);
2535                 goto out;
2536         }
2537 
2538         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2539         name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
2540             MAXPATHLEN  + 1);
2541 
2542         if (name == NULL) {
2543                 *cs->statusp = resp->status = NFS4ERR_INVAL;
2544                 kmem_free(nm, len);
2545                 goto out;
2546         }
2547 
2548         NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bdva.va_ctime)
2549 
2550         error = VOP_LINK(dvp, vp, name, cs->cr, NULL, 0);
2551 
2552         if (nm != name)
2553                 kmem_free(name, MAXPATHLEN + 1);
2554         kmem_free(nm, len);
2555 
2556         /*
2557          * Get the initial "after" sequence number, if it fails, set to zero
2558          */
2559         idva.va_mask = AT_SEQ;
2560         if (VOP_GETATTR(dvp, &idva, 0, cs->cr, NULL))
2561                 idva.va_seq = 0;
2562 
2563         /*
2564          * Force modified data and metadata out to stable storage.
2565          */
2566         (void) VOP_FSYNC(vp, FNODSYNC, cs->cr, NULL);
2567         (void) VOP_FSYNC(dvp, 0, cs->cr, NULL);
2568 
2569         if (error) {
2570                 *cs->statusp = resp->status = puterrno4(error);
2571                 goto out;
2572         }
2573 
2574         /*
2575          * Get "after" change value, if it fails, simply return the
2576          * before value.
2577          */
2578         adva.va_mask = AT_CTIME|AT_SEQ;
2579         if (VOP_GETATTR(dvp, &adva, 0, cs->cr, NULL)) {
2580                 adva.va_ctime = bdva.va_ctime;
2581                 adva.va_seq = 0;
2582         }
2583 
2584         NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, adva.va_ctime)
2585 
2586         /*
2587          * The cinfo.atomic = TRUE only if we have
2588          * non-zero va_seq's, and it has incremented by exactly one
2589          * during the VOP_LINK and it didn't change during the VOP_FSYNC.
2590          */
2591         if (bdva.va_seq && idva.va_seq && adva.va_seq &&
2592             idva.va_seq == (bdva.va_seq + 1) && idva.va_seq == adva.va_seq)
2593                 resp->cinfo.atomic = TRUE;
2594         else
2595                 resp->cinfo.atomic = FALSE;
2596 
2597         *cs->statusp = resp->status = NFS4_OK;
2598 out:
2599         DTRACE_NFSV4_2(op__link__done, struct compound_state *, cs,
2600             LINK4res *, resp);
2601 }
2602 
2603 /*
2604  * Used by rfs4_op_lookup and rfs4_op_lookupp to do the actual work.
2605  */
2606 
2607 /* ARGSUSED */
2608 static nfsstat4
2609 do_rfs4_op_lookup(char *nm, struct svc_req *req, struct compound_state *cs)
2610 {
2611         int error;
2612         int different_export = 0;
2613         vnode_t *vp, *pre_tvp = NULL, *oldvp = NULL;
2614         struct exportinfo *exi = NULL, *pre_exi = NULL;
2615         nfsstat4 stat;
2616         fid_t fid;
2617         int attrdir, dotdot, walk;
2618         bool_t is_newvp = FALSE;
2619 
2620         if (cs->vp->v_flag & V_XATTRDIR) {
2621                 attrdir = 1;
2622                 ASSERT(get_fh4_flag(&cs->fh, FH4_ATTRDIR));
2623         } else {
2624                 attrdir = 0;
2625                 ASSERT(! get_fh4_flag(&cs->fh, FH4_ATTRDIR));
2626         }
2627 
2628         dotdot = (nm[0] == '.' && nm[1] == '.' && nm[2] == '\0');
2629 
2630         /*
2631          * If dotdotting, then need to check whether it's
2632          * above the root of a filesystem, or above an
2633          * export point.
2634          */
2635         if (dotdot) {
2636 
2637                 /*
2638                  * If dotdotting at the root of a filesystem, then
2639                  * need to traverse back to the mounted-on filesystem
2640                  * and do the dotdot lookup there.
2641                  */
2642                 if (cs->vp->v_flag & VROOT) {
2643 
2644                         /*
2645                          * If at the system root, then can
2646                          * go up no further.
2647                          */
2648                         if (VN_CMP(cs->vp, rootdir))
2649                                 return (puterrno4(ENOENT));
2650 
2651                         /*
2652                          * Traverse back to the mounted-on filesystem
2653                          */
2654                         cs->vp = untraverse(cs->vp);
2655 
2656                         /*
2657                          * Set the different_export flag so we remember
2658                          * to pick up a new exportinfo entry for
2659                          * this new filesystem.
2660                          */
2661                         different_export = 1;
2662                 } else {
2663 
2664                         /*
2665                          * If dotdotting above an export point then set
2666                          * the different_export to get new export info.
2667                          */
2668                         different_export = nfs_exported(cs->exi, cs->vp);
2669                 }
2670         }
2671 
2672         error = VOP_LOOKUP(cs->vp, nm, &vp, NULL, 0, NULL, cs->cr,
2673             NULL, NULL, NULL);
2674         if (error)
2675                 return (puterrno4(error));
2676 
2677         /*
2678          * If the vnode is in a pseudo filesystem, check whether it is visible.
2679          *
2680          * XXX if the vnode is a symlink and it is not visible in
2681          * a pseudo filesystem, return ENOENT (not following symlink).
2682          * V4 client can not mount such symlink. This is a regression
2683          * from V2/V3.
2684          *
2685          * In the same exported filesystem, if the security flavor used
2686          * is not an explicitly shared flavor, limit the view to the visible
2687          * list entries only. This is not a WRONGSEC case because it's already
2688          * checked via PUTROOTFH/PUTPUBFH or PUTFH.
2689          */
2690         if (!different_export &&
2691             (PSEUDO(cs->exi) || ! is_exported_sec(cs->nfsflavor, cs->exi) ||
2692             cs->access & CS_ACCESS_LIMITED)) {
2693                 if (! nfs_visible(cs->exi, vp, &different_export)) {
2694                         VN_RELE(vp);
2695                         return (puterrno4(ENOENT));
2696                 }
2697         }
2698 
2699         /*
2700          * If it's a mountpoint, then traverse it.
2701          */
2702         if (vn_ismntpt(vp)) {
2703                 pre_exi = cs->exi;   /* save pre-traversed exportinfo */
2704                 pre_tvp = vp;           /* save pre-traversed vnode     */
2705 
2706                 /*
2707                  * hold pre_tvp to counteract rele by traverse.  We will
2708                  * need pre_tvp below if checkexport4 fails
2709                  */
2710                 VN_HOLD(pre_tvp);
2711                 if ((error = traverse(&vp)) != 0) {
2712                         VN_RELE(vp);
2713                         VN_RELE(pre_tvp);
2714                         return (puterrno4(error));
2715                 }
2716                 different_export = 1;
2717         } else if (vp->v_vfsp != cs->vp->v_vfsp) {
2718                 /*
2719                  * The vfsp comparison is to handle the case where
2720                  * a LOFS mount is shared.  lo_lookup traverses mount points,
2721                  * and NFS is unaware of local fs transistions because
2722                  * v_vfsmountedhere isn't set.  For this special LOFS case,
2723                  * the dir and the obj returned by lookup will have different
2724                  * vfs ptrs.
2725                  */
2726                 different_export = 1;
2727         }
2728 
2729         if (different_export) {
2730 
2731                 bzero(&fid, sizeof (fid));
2732                 fid.fid_len = MAXFIDSZ;
2733                 error = vop_fid_pseudo(vp, &fid);
2734                 if (error) {
2735                         VN_RELE(vp);
2736                         if (pre_tvp)
2737                                 VN_RELE(pre_tvp);
2738                         return (puterrno4(error));
2739                 }
2740 
2741                 if (dotdot)
2742                         exi = nfs_vptoexi(NULL, vp, cs->cr, &walk, NULL, TRUE);
2743                 else
2744                         exi = checkexport4(&vp->v_vfsp->vfs_fsid, &fid, vp);
2745 
2746                 if (exi == NULL) {
2747                         if (pre_tvp) {
2748                                 /*
2749                                  * If this vnode is a mounted-on vnode,
2750                                  * but the mounted-on file system is not
2751                                  * exported, send back the filehandle for
2752                                  * the mounted-on vnode, not the root of
2753                                  * the mounted-on file system.
2754                                  */
2755                                 VN_RELE(vp);
2756                                 vp = pre_tvp;
2757                                 exi = pre_exi;
2758                         } else {
2759                                 VN_RELE(vp);
2760                                 return (puterrno4(EACCES));
2761                         }
2762                 } else if (pre_tvp) {
2763                         /* we're done with pre_tvp now. release extra hold */
2764                         VN_RELE(pre_tvp);
2765                 }
2766 
2767                 cs->exi = exi;
2768 
2769                 /*
2770                  * Now we do a checkauth4. The reason is that
2771                  * this client/user may not have access to the new
2772                  * exported file system, and if they do,
2773                  * the client/user may be mapped to a different uid.
2774                  *
2775                  * We start with a new cr, because the checkauth4 done
2776                  * in the PUT*FH operation over wrote the cred's uid,
2777                  * gid, etc, and we want the real thing before calling
2778                  * checkauth4()
2779                  */
2780                 crfree(cs->cr);
2781                 cs->cr = crdup(cs->basecr);
2782 
2783                 oldvp = cs->vp;
2784                 cs->vp = vp;
2785                 is_newvp = TRUE;
2786 
2787                 stat = call_checkauth4(cs, req);
2788                 if (stat != NFS4_OK) {
2789                         VN_RELE(cs->vp);
2790                         cs->vp = oldvp;
2791                         return (stat);
2792                 }
2793         }
2794 
2795         /*
2796          * After various NFS checks, do a label check on the path
2797          * component. The label on this path should either be the
2798          * global zone's label or a zone's label. We are only
2799          * interested in the zone's label because exported files
2800          * in global zone is accessible (though read-only) to
2801          * clients. The exportability/visibility check is already
2802          * done before reaching this code.
2803          */
2804         if (is_system_labeled()) {
2805                 bslabel_t *clabel;
2806 
2807                 ASSERT(req->rq_label != NULL);
2808                 clabel = req->rq_label;
2809                 DTRACE_PROBE2(tx__rfs4__log__info__oplookup__clabel, char *,
2810                     "got client label from request(1)", struct svc_req *, req);
2811 
2812                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2813                         if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
2814                             cs->exi)) {
2815                                 error = EACCES;
2816                                 goto err_out;
2817                         }
2818                 } else {
2819                         /*
2820                          * We grant access to admin_low label clients
2821                          * only if the client is trusted, i.e. also
2822                          * running Solaris Trusted Extension.
2823                          */
2824                         struct sockaddr *ca;
2825                         int             addr_type;
2826                         void            *ipaddr;
2827                         tsol_tpc_t      *tp;
2828 
2829                         ca = (struct sockaddr *)svc_getrpccaller(
2830                             req->rq_xprt)->buf;
2831                         if (ca->sa_family == AF_INET) {
2832                                 addr_type = IPV4_VERSION;
2833                                 ipaddr = &((struct sockaddr_in *)ca)->sin_addr;
2834                         } else if (ca->sa_family == AF_INET6) {
2835                                 addr_type = IPV6_VERSION;
2836                                 ipaddr = &((struct sockaddr_in6 *)
2837                                     ca)->sin6_addr;
2838                         }
2839                         tp = find_tpc(ipaddr, addr_type, B_FALSE);
2840                         if (tp == NULL || tp->tpc_tp.tp_doi !=
2841                             l_admin_low->tsl_doi || tp->tpc_tp.host_type !=
2842                             SUN_CIPSO) {
2843                                 if (tp != NULL)
2844                                         TPC_RELE(tp);
2845                                 error = EACCES;
2846                                 goto err_out;
2847                         }
2848                         TPC_RELE(tp);
2849                 }
2850         }
2851 
2852         error = makefh4(&cs->fh, vp, cs->exi);
2853 
2854 err_out:
2855         if (error) {
2856                 if (is_newvp) {
2857                         VN_RELE(cs->vp);
2858                         cs->vp = oldvp;
2859                 } else
2860                         VN_RELE(vp);
2861                 return (puterrno4(error));
2862         }
2863 
2864         if (!is_newvp) {
2865                 if (cs->vp)
2866                         VN_RELE(cs->vp);
2867                 cs->vp = vp;
2868         } else if (oldvp)
2869                 VN_RELE(oldvp);
2870 
2871         /*
2872          * if did lookup on attrdir and didn't lookup .., set named
2873          * attr fh flag
2874          */
2875         if (attrdir && ! dotdot)
2876                 set_fh4_flag(&cs->fh, FH4_NAMEDATTR);
2877 
2878         /* Assume false for now, open proc will set this */
2879         cs->mandlock = FALSE;
2880 
2881         return (NFS4_OK);
2882 }
2883 
2884 /* ARGSUSED */
2885 static void
2886 rfs4_op_lookup(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2887     struct compound_state *cs)
2888 {
2889         LOOKUP4args *args = &argop->nfs_argop4_u.oplookup;
2890         LOOKUP4res *resp = &resop->nfs_resop4_u.oplookup;
2891         char *nm;
2892         uint_t len;
2893         struct sockaddr *ca;
2894         char *name = NULL;
2895         nfsstat4 status;
2896 
2897         DTRACE_NFSV4_2(op__lookup__start, struct compound_state *, cs,
2898             LOOKUP4args *, args);
2899 
2900         if (cs->vp == NULL) {
2901                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2902                 goto out;
2903         }
2904 
2905         if (cs->vp->v_type == VLNK) {
2906                 *cs->statusp = resp->status = NFS4ERR_SYMLINK;
2907                 goto out;
2908         }
2909 
2910         if (cs->vp->v_type != VDIR) {
2911                 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
2912                 goto out;
2913         }
2914 
2915         status = utf8_dir_verify(&args->objname);
2916         if (status != NFS4_OK) {
2917                 *cs->statusp = resp->status = status;
2918                 goto out;
2919         }
2920 
2921         nm = utf8_to_str(&args->objname, &len, NULL);
2922         if (nm == NULL) {
2923                 *cs->statusp = resp->status = NFS4ERR_INVAL;
2924                 goto out;
2925         }
2926 
2927         if (len > MAXNAMELEN) {
2928                 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
2929                 kmem_free(nm, len);
2930                 goto out;
2931         }
2932 
2933         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2934         name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
2935             MAXPATHLEN  + 1);
2936 
2937         if (name == NULL) {
2938                 *cs->statusp = resp->status = NFS4ERR_INVAL;
2939                 kmem_free(nm, len);
2940                 goto out;
2941         }
2942 
2943         *cs->statusp = resp->status = do_rfs4_op_lookup(name, req, cs);
2944 
2945         if (name != nm)
2946                 kmem_free(name, MAXPATHLEN + 1);
2947         kmem_free(nm, len);
2948 
2949 out:
2950         DTRACE_NFSV4_2(op__lookup__done, struct compound_state *, cs,
2951             LOOKUP4res *, resp);
2952 }
2953 
2954 /* ARGSUSED */
2955 static void
2956 rfs4_op_lookupp(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req,
2957     struct compound_state *cs)
2958 {
2959         LOOKUPP4res *resp = &resop->nfs_resop4_u.oplookupp;
2960 
2961         DTRACE_NFSV4_1(op__lookupp__start, struct compound_state *, cs);
2962 
2963         if (cs->vp == NULL) {
2964                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2965                 goto out;
2966         }
2967 
2968         if (cs->vp->v_type != VDIR) {
2969                 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
2970                 goto out;
2971         }
2972 
2973         *cs->statusp = resp->status = do_rfs4_op_lookup("..", req, cs);
2974 
2975         /*
2976          * From NFSV4 Specification, LOOKUPP should not check for
2977          * NFS4ERR_WRONGSEC. Retrun NFS4_OK instead.
2978          */
2979         if (resp->status == NFS4ERR_WRONGSEC) {
2980                 *cs->statusp = resp->status = NFS4_OK;
2981         }
2982 
2983 out:
2984         DTRACE_NFSV4_2(op__lookupp__done, struct compound_state *, cs,
2985             LOOKUPP4res *, resp);
2986 }
2987 
2988 
2989 /*ARGSUSED2*/
2990 static void
2991 rfs4_op_openattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2992     struct compound_state *cs)
2993 {
2994         OPENATTR4args   *args = &argop->nfs_argop4_u.opopenattr;
2995         OPENATTR4res    *resp = &resop->nfs_resop4_u.opopenattr;
2996         vnode_t         *avp = NULL;
2997         int             lookup_flags = LOOKUP_XATTR, error;
2998         int             exp_ro = 0;
2999 
3000         DTRACE_NFSV4_2(op__openattr__start, struct compound_state *, cs,
3001             OPENATTR4args *, args);
3002 
3003         if (cs->vp == NULL) {
3004                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3005                 goto out;
3006         }
3007 
3008         if ((cs->vp->v_vfsp->vfs_flag & VFS_XATTR) == 0 &&
3009             !vfs_has_feature(cs->vp->v_vfsp, VFSFT_SYSATTR_VIEWS)) {
3010                 *cs->statusp = resp->status = puterrno4(ENOTSUP);
3011                 goto out;
3012         }
3013 
3014         /*
3015          * If file system supports passing ACE mask to VOP_ACCESS then
3016          * check for ACE_READ_NAMED_ATTRS, otherwise do legacy checks
3017          */
3018 
3019         if (vfs_has_feature(cs->vp->v_vfsp, VFSFT_ACEMASKONACCESS))
3020                 error = VOP_ACCESS(cs->vp, ACE_READ_NAMED_ATTRS,
3021                     V_ACE_MASK, cs->cr, NULL);
3022         else
3023                 error = ((VOP_ACCESS(cs->vp, VREAD, 0, cs->cr, NULL) != 0) &&
3024                     (VOP_ACCESS(cs->vp, VWRITE, 0, cs->cr, NULL) != 0) &&
3025                     (VOP_ACCESS(cs->vp, VEXEC, 0, cs->cr, NULL) != 0));
3026 
3027         if (error) {
3028                 *cs->statusp = resp->status = puterrno4(EACCES);
3029                 goto out;
3030         }
3031 
3032         /*
3033          * The CREATE_XATTR_DIR VOP flag cannot be specified if
3034          * the file system is exported read-only -- regardless of
3035          * createdir flag.  Otherwise the attrdir would be created
3036          * (assuming server fs isn't mounted readonly locally).  If
3037          * VOP_LOOKUP returns ENOENT in this case, the error will
3038          * be translated into EROFS.  ENOSYS is mapped to ENOTSUP
3039          * because specfs has no VOP_LOOKUP op, so the macro would
3040          * return ENOSYS.  EINVAL is returned by all (current)
3041          * Solaris file system implementations when any of their
3042          * restrictions are violated (xattr(dir) can't have xattrdir).
3043          * Returning NOTSUPP is more appropriate in this case
3044          * because the object will never be able to have an attrdir.
3045          */
3046         if (args->createdir && ! (exp_ro = rdonly4(req, cs)))
3047                 lookup_flags |= CREATE_XATTR_DIR;
3048 
3049         error = VOP_LOOKUP(cs->vp, "", &avp, NULL, lookup_flags, NULL, cs->cr,
3050             NULL, NULL, NULL);
3051 
3052         if (error) {
3053                 if (error == ENOENT && args->createdir && exp_ro)
3054                         *cs->statusp = resp->status = puterrno4(EROFS);
3055                 else if (error == EINVAL || error == ENOSYS)
3056                         *cs->statusp = resp->status = puterrno4(ENOTSUP);
3057                 else
3058                         *cs->statusp = resp->status = puterrno4(error);
3059                 goto out;
3060         }
3061 
3062         ASSERT(avp->v_flag & V_XATTRDIR);
3063 
3064         error = makefh4(&cs->fh, avp, cs->exi);
3065 
3066         if (error) {
3067                 VN_RELE(avp);
3068                 *cs->statusp = resp->status = puterrno4(error);
3069                 goto out;
3070         }
3071 
3072         VN_RELE(cs->vp);
3073         cs->vp = avp;
3074 
3075         /*
3076          * There is no requirement for an attrdir fh flag
3077          * because the attrdir has a vnode flag to distinguish
3078          * it from regular (non-xattr) directories.  The
3079          * FH4_ATTRDIR flag is set for future sanity checks.
3080          */
3081         set_fh4_flag(&cs->fh, FH4_ATTRDIR);
3082         *cs->statusp = resp->status = NFS4_OK;
3083 
3084 out:
3085         DTRACE_NFSV4_2(op__openattr__done, struct compound_state *, cs,
3086             OPENATTR4res *, resp);
3087 }
3088 
3089 static int
3090 do_io(int direction, vnode_t *vp, struct uio *uio, int ioflag, cred_t *cred,
3091     caller_context_t *ct)
3092 {
3093         int error;
3094         int i;
3095         clock_t delaytime;
3096 
3097         delaytime = MSEC_TO_TICK_ROUNDUP(rfs4_lock_delay);
3098 
3099         /*
3100          * Don't block on mandatory locks. If this routine returns
3101          * EAGAIN, the caller should return NFS4ERR_LOCKED.
3102          */
3103         uio->uio_fmode = FNONBLOCK;
3104 
3105         for (i = 0; i < rfs4_maxlock_tries; i++) {
3106 
3107 
3108                 if (direction == FREAD) {
3109                         (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, ct);
3110                         error = VOP_READ(vp, uio, ioflag, cred, ct);
3111                         VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, ct);
3112                 } else {
3113                         (void) VOP_RWLOCK(vp, V_WRITELOCK_TRUE, ct);
3114                         error = VOP_WRITE(vp, uio, ioflag, cred, ct);
3115                         VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, ct);
3116                 }
3117 
3118                 if (error != EAGAIN)
3119                         break;
3120 
3121                 if (i < rfs4_maxlock_tries - 1) {
3122                         delay(delaytime);
3123                         delaytime *= 2;
3124                 }
3125         }
3126 
3127         return (error);
3128 }
3129 
3130 /* ARGSUSED */
3131 static void
3132 rfs4_op_read(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3133     struct compound_state *cs)
3134 {
3135         READ4args *args = &argop->nfs_argop4_u.opread;
3136         READ4res *resp = &resop->nfs_resop4_u.opread;
3137         int error;
3138         int verror;
3139         vnode_t *vp;
3140         struct vattr va;
3141         struct iovec iov, *iovp = NULL;
3142         int iovcnt;
3143         struct uio uio;
3144         u_offset_t offset;
3145         bool_t *deleg = &cs->deleg;
3146         nfsstat4 stat;
3147         int in_crit = 0;
3148         mblk_t *mp = NULL;
3149         int alloc_err = 0;
3150         int rdma_used = 0;
3151         int loaned_buffers;
3152         caller_context_t ct;
3153         struct uio *uiop;
3154 
3155         DTRACE_NFSV4_2(op__read__start, struct compound_state *, cs,
3156             READ4args, args);
3157 
3158         vp = cs->vp;
3159         if (vp == NULL) {
3160                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3161                 goto out;
3162         }
3163         if (cs->access == CS_ACCESS_DENIED) {
3164                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
3165                 goto out;
3166         }
3167 
3168         if ((stat = rfs4_check_stateid(FREAD, vp, &args->stateid, FALSE,
3169             deleg, TRUE, &ct)) != NFS4_OK) {
3170                 *cs->statusp = resp->status = stat;
3171                 goto out;
3172         }
3173 
3174         /*
3175          * Enter the critical region before calling VOP_RWLOCK
3176          * to avoid a deadlock with write requests.
3177          */
3178         if (nbl_need_check(vp)) {
3179                 nbl_start_crit(vp, RW_READER);
3180                 in_crit = 1;
3181                 if (nbl_conflict(vp, NBL_READ, args->offset, args->count, 0,
3182                     &ct)) {
3183                         *cs->statusp = resp->status = NFS4ERR_LOCKED;
3184                         goto out;
3185                 }
3186         }
3187 
3188         if (args->wlist) {
3189                 if (args->count > clist_len(args->wlist)) {
3190                         *cs->statusp = resp->status = NFS4ERR_INVAL;
3191                         goto out;
3192                 }
3193                 rdma_used = 1;
3194         }
3195 
3196         /* use loaned buffers for TCP */
3197         loaned_buffers = (nfs_loaned_buffers && !rdma_used) ? 1 : 0;
3198 
3199         va.va_mask = AT_MODE|AT_SIZE|AT_UID;
3200         verror = VOP_GETATTR(vp, &va, 0, cs->cr, &ct);
3201 
3202         /*
3203          * If we can't get the attributes, then we can't do the
3204          * right access checking.  So, we'll fail the request.
3205          */
3206         if (verror) {
3207                 *cs->statusp = resp->status = puterrno4(verror);
3208                 goto out;
3209         }
3210 
3211         if (vp->v_type != VREG) {
3212                 *cs->statusp = resp->status =
3213                     ((vp->v_type == VDIR) ? NFS4ERR_ISDIR : NFS4ERR_INVAL);
3214                 goto out;
3215         }
3216 
3217         if (crgetuid(cs->cr) != va.va_uid &&
3218             (error = VOP_ACCESS(vp, VREAD, 0, cs->cr, &ct)) &&
3219             (error = VOP_ACCESS(vp, VEXEC, 0, cs->cr, &ct))) {
3220                 *cs->statusp = resp->status = puterrno4(error);
3221                 goto out;
3222         }
3223 
3224         if (MANDLOCK(vp, va.va_mode)) { /* XXX - V4 supports mand locking */
3225                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
3226                 goto out;
3227         }
3228 
3229         offset = args->offset;
3230         if (offset >= va.va_size) {
3231                 *cs->statusp = resp->status = NFS4_OK;
3232                 resp->eof = TRUE;
3233                 resp->data_len = 0;
3234                 resp->data_val = NULL;
3235                 resp->mblk = NULL;
3236                 /* RDMA */
3237                 resp->wlist = args->wlist;
3238                 resp->wlist_len = resp->data_len;
3239                 *cs->statusp = resp->status = NFS4_OK;
3240                 if (resp->wlist)
3241                         clist_zero_len(resp->wlist);
3242                 goto out;
3243         }
3244 
3245         if (args->count == 0) {
3246                 *cs->statusp = resp->status = NFS4_OK;
3247                 resp->eof = FALSE;
3248                 resp->data_len = 0;
3249                 resp->data_val = NULL;
3250                 resp->mblk = NULL;
3251                 /* RDMA */
3252                 resp->wlist = args->wlist;
3253                 resp->wlist_len = resp->data_len;
3254                 if (resp->wlist)
3255                         clist_zero_len(resp->wlist);
3256                 goto out;
3257         }
3258 
3259         /*
3260          * Do not allocate memory more than maximum allowed
3261          * transfer size
3262          */
3263         if (args->count > rfs4_tsize(req))
3264                 args->count = rfs4_tsize(req);
3265 
3266         if (loaned_buffers) {
3267                 uiop = (uio_t *)rfs_setup_xuio(vp);
3268                 ASSERT(uiop != NULL);
3269                 uiop->uio_segflg = UIO_SYSSPACE;
3270                 uiop->uio_loffset = args->offset;
3271                 uiop->uio_resid = args->count;
3272 
3273                 /* Jump to do the read if successful */
3274                 if (!VOP_REQZCBUF(vp, UIO_READ, (xuio_t *)uiop, cs->cr, &ct)) {
3275                         /*
3276                          * Need to hold the vnode until after VOP_RETZCBUF()
3277                          * is called.
3278                          */
3279                         VN_HOLD(vp);
3280                         goto doio_read;
3281                 }
3282 
3283                 DTRACE_PROBE2(nfss__i__reqzcbuf_failed, int,
3284                     uiop->uio_loffset, int, uiop->uio_resid);
3285 
3286                 uiop->uio_extflg = 0;
3287 
3288                 /* failure to setup for zero copy */
3289                 rfs_free_xuio((void *)uiop);
3290                 loaned_buffers = 0;
3291         }
3292 
3293         /*
3294          * If returning data via RDMA Write, then grab the chunk list. If we
3295          * aren't returning READ data w/RDMA_WRITE, then grab a mblk.
3296          */
3297         if (rdma_used) {
3298                 mp = NULL;
3299                 (void) rdma_get_wchunk(req, &iov, args->wlist);
3300                 uio.uio_iov = &iov;
3301                 uio.uio_iovcnt = 1;
3302         } else {
3303                 /*
3304                  * mp will contain the data to be sent out in the read reply.
3305                  * It will be freed after the reply has been sent.
3306                  */
3307                 mp = rfs_read_alloc(args->count, &iovp, &iovcnt);
3308                 ASSERT(mp != NULL);
3309                 ASSERT(alloc_err == 0);
3310                 uio.uio_iov = iovp;
3311                 uio.uio_iovcnt = iovcnt;
3312         }
3313 
3314         uio.uio_segflg = UIO_SYSSPACE;
3315         uio.uio_extflg = UIO_COPY_CACHED;
3316         uio.uio_loffset = args->offset;
3317         uio.uio_resid = args->count;
3318         uiop = &uio;
3319 
3320 doio_read:
3321         error = do_io(FREAD, vp, uiop, 0, cs->cr, &ct);
3322 
3323         va.va_mask = AT_SIZE;
3324         verror = VOP_GETATTR(vp, &va, 0, cs->cr, &ct);
3325 
3326         if (error) {
3327                 if (mp)
3328                         freemsg(mp);
3329                 *cs->statusp = resp->status = puterrno4(error);
3330                 goto out;
3331         }
3332 
3333         /* make mblk using zc buffers */
3334         if (loaned_buffers) {
3335                 mp = uio_to_mblk(uiop);
3336                 ASSERT(mp != NULL);
3337         }
3338 
3339         *cs->statusp = resp->status = NFS4_OK;
3340 
3341         ASSERT(uiop->uio_resid >= 0);
3342         resp->data_len = args->count - uiop->uio_resid;
3343         if (mp) {
3344                 resp->data_val = (char *)mp->b_datap->db_base;
3345                 rfs_rndup_mblks(mp, resp->data_len, loaned_buffers);
3346         } else {
3347                 resp->data_val = (caddr_t)iov.iov_base;
3348         }
3349 
3350         resp->mblk = mp;
3351 
3352         if (!verror && offset + resp->data_len == va.va_size)
3353                 resp->eof = TRUE;
3354         else
3355                 resp->eof = FALSE;
3356 
3357         if (rdma_used) {
3358                 if (!rdma_setup_read_data4(args, resp)) {
3359                         *cs->statusp = resp->status = NFS4ERR_INVAL;
3360                 }
3361         } else {
3362                 resp->wlist = NULL;
3363         }
3364 
3365 out:
3366         if (in_crit)
3367                 nbl_end_crit(vp);
3368 
3369         if (iovp != NULL)
3370                 kmem_free(iovp, iovcnt * sizeof (struct iovec));
3371 
3372         DTRACE_NFSV4_2(op__read__done, struct compound_state *, cs,
3373             READ4res *, resp);
3374 }
3375 
3376 static void
3377 rfs4_op_read_free(nfs_resop4 *resop)
3378 {
3379         READ4res        *resp = &resop->nfs_resop4_u.opread;
3380 
3381         if (resp->status == NFS4_OK && resp->mblk != NULL) {
3382                 freemsg(resp->mblk);
3383                 resp->mblk = NULL;
3384                 resp->data_val = NULL;
3385                 resp->data_len = 0;
3386         }
3387 }
3388 
3389 static void
3390 rfs4_op_readdir_free(nfs_resop4 * resop)
3391 {
3392         READDIR4res    *resp = &resop->nfs_resop4_u.opreaddir;
3393 
3394         if (resp->status == NFS4_OK && resp->mblk != NULL) {
3395                 freeb(resp->mblk);
3396                 resp->mblk = NULL;
3397                 resp->data_len = 0;
3398         }
3399 }
3400 
3401 
3402 /* ARGSUSED */
3403 static void
3404 rfs4_op_putpubfh(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req,
3405     struct compound_state *cs)
3406 {
3407         PUTPUBFH4res    *resp = &resop->nfs_resop4_u.opputpubfh;
3408         int             error;
3409         vnode_t         *vp;
3410         struct exportinfo *exi, *sav_exi;
3411         nfs_fh4_fmt_t   *fh_fmtp;
3412 
3413         DTRACE_NFSV4_1(op__putpubfh__start, struct compound_state *, cs);
3414 
3415         if (cs->vp) {
3416                 VN_RELE(cs->vp);
3417                 cs->vp = NULL;
3418         }
3419 
3420         if (cs->cr)
3421                 crfree(cs->cr);
3422 
3423         cs->cr = crdup(cs->basecr);
3424 
3425         vp = exi_public->exi_vp;
3426         if (vp == NULL) {
3427                 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
3428                 goto out;
3429         }
3430 
3431         error = makefh4(&cs->fh, vp, exi_public);
3432         if (error != 0) {
3433                 *cs->statusp = resp->status = puterrno4(error);
3434                 goto out;
3435         }
3436         sav_exi = cs->exi;
3437         if (exi_public == exi_root) {
3438                 /*
3439                  * No filesystem is actually shared public, so we default
3440                  * to exi_root. In this case, we must check whether root
3441                  * is exported.
3442                  */
3443                 fh_fmtp = (nfs_fh4_fmt_t *)cs->fh.nfs_fh4_val;
3444 
3445                 /*
3446                  * if root filesystem is exported, the exportinfo struct that we
3447                  * should use is what checkexport4 returns, because root_exi is
3448                  * actually a mostly empty struct.
3449                  */
3450                 exi = checkexport4(&fh_fmtp->fh4_fsid,
3451                     (fid_t *)&fh_fmtp->fh4_xlen, NULL);
3452                 cs->exi = ((exi != NULL) ? exi : exi_public);
3453         } else {
3454                 /*
3455                  * it's a properly shared filesystem
3456                  */
3457                 cs->exi = exi_public;
3458         }
3459 
3460         if (is_system_labeled()) {
3461                 bslabel_t *clabel;
3462 
3463                 ASSERT(req->rq_label != NULL);
3464                 clabel = req->rq_label;
3465                 DTRACE_PROBE2(tx__rfs4__log__info__opputpubfh__clabel, char *,
3466                     "got client label from request(1)",
3467                     struct svc_req *, req);
3468                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3469                         if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3470                             cs->exi)) {
3471                                 *cs->statusp = resp->status =
3472                                     NFS4ERR_SERVERFAULT;
3473                                 goto out;
3474                         }
3475                 }
3476         }
3477 
3478         VN_HOLD(vp);
3479         cs->vp = vp;
3480 
3481         if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) {
3482                 VN_RELE(cs->vp);
3483                 cs->vp = NULL;
3484                 cs->exi = sav_exi;
3485                 goto out;
3486         }
3487 
3488         *cs->statusp = resp->status = NFS4_OK;
3489 out:
3490         DTRACE_NFSV4_2(op__putpubfh__done, struct compound_state *, cs,
3491             PUTPUBFH4res *, resp);
3492 }
3493 
3494 /*
3495  * XXX - issue with put*fh operations. Suppose /export/home is exported.
3496  * Suppose an NFS client goes to mount /export/home/joe. If /export, home,
3497  * or joe have restrictive search permissions, then we shouldn't let
3498  * the client get a file handle. This is easy to enforce. However, we
3499  * don't know what security flavor should be used until we resolve the
3500  * path name. Another complication is uid mapping. If root is
3501  * the user, then it will be mapped to the anonymous user by default,
3502  * but we won't know that till we've resolved the path name. And we won't
3503  * know what the anonymous user is.
3504  * Luckily, SECINFO is specified to take a full filename.
3505  * So what we will have to in rfs4_op_lookup is check that flavor of
3506  * the target object matches that of the request, and if root was the
3507  * caller, check for the root= and anon= options, and if necessary,
3508  * repeat the lookup using the right cred_t. But that's not done yet.
3509  */
3510 /* ARGSUSED */
3511 static void
3512 rfs4_op_putfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3513     struct compound_state *cs)
3514 {
3515         PUTFH4args *args = &argop->nfs_argop4_u.opputfh;
3516         PUTFH4res *resp = &resop->nfs_resop4_u.opputfh;
3517         nfs_fh4_fmt_t *fh_fmtp;
3518 
3519         DTRACE_NFSV4_2(op__putfh__start, struct compound_state *, cs,
3520             PUTFH4args *, args);
3521 
3522         if (cs->vp) {
3523                 VN_RELE(cs->vp);
3524                 cs->vp = NULL;
3525         }
3526 
3527         if (cs->cr) {
3528                 crfree(cs->cr);
3529                 cs->cr = NULL;
3530         }
3531 
3532 
3533         if (args->object.nfs_fh4_len < NFS_FH4_LEN) {
3534                 *cs->statusp = resp->status = NFS4ERR_BADHANDLE;
3535                 goto out;
3536         }
3537 
3538         fh_fmtp = (nfs_fh4_fmt_t *)args->object.nfs_fh4_val;
3539         cs->exi = checkexport4(&fh_fmtp->fh4_fsid, (fid_t *)&fh_fmtp->fh4_xlen,
3540             NULL);
3541 
3542         if (cs->exi == NULL) {
3543                 *cs->statusp = resp->status = NFS4ERR_STALE;
3544                 goto out;
3545         }
3546 
3547         cs->cr = crdup(cs->basecr);
3548 
3549         ASSERT(cs->cr != NULL);
3550 
3551         if (! (cs->vp = nfs4_fhtovp(&args->object, cs->exi, &resp->status))) {
3552                 *cs->statusp = resp->status;
3553                 goto out;
3554         }
3555 
3556         if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) {
3557                 VN_RELE(cs->vp);
3558                 cs->vp = NULL;
3559                 goto out;
3560         }
3561 
3562         nfs_fh4_copy(&args->object, &cs->fh);
3563         *cs->statusp = resp->status = NFS4_OK;
3564         cs->deleg = FALSE;
3565 
3566 out:
3567         DTRACE_NFSV4_2(op__putfh__done, struct compound_state *, cs,
3568             PUTFH4res *, resp);
3569 }
3570 
3571 /* ARGSUSED */
3572 static void
3573 rfs4_op_putrootfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3574     struct compound_state *cs)
3575 {
3576         PUTROOTFH4res *resp = &resop->nfs_resop4_u.opputrootfh;
3577         int error;
3578         fid_t fid;
3579         struct exportinfo *exi, *sav_exi;
3580 
3581         DTRACE_NFSV4_1(op__putrootfh__start, struct compound_state *, cs);
3582 
3583         if (cs->vp) {
3584                 VN_RELE(cs->vp);
3585                 cs->vp = NULL;
3586         }
3587 
3588         if (cs->cr)
3589                 crfree(cs->cr);
3590 
3591         cs->cr = crdup(cs->basecr);
3592 
3593         /*
3594          * Using rootdir, the system root vnode,
3595          * get its fid.
3596          */
3597         bzero(&fid, sizeof (fid));
3598         fid.fid_len = MAXFIDSZ;
3599         error = vop_fid_pseudo(rootdir, &fid);
3600         if (error != 0) {
3601                 *cs->statusp = resp->status = puterrno4(error);
3602                 goto out;
3603         }
3604 
3605         /*
3606          * Then use the root fsid & fid it to find out if it's exported
3607          *
3608          * If the server root isn't exported directly, then
3609          * it should at least be a pseudo export based on
3610          * one or more exports further down in the server's
3611          * file tree.
3612          */
3613         exi = checkexport4(&rootdir->v_vfsp->vfs_fsid, &fid, NULL);
3614         if (exi == NULL || exi->exi_export.ex_flags & EX_PUBLIC) {
3615                 NFS4_DEBUG(rfs4_debug,
3616                     (CE_WARN, "rfs4_op_putrootfh: export check failure"));
3617                 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
3618                 goto out;
3619         }
3620 
3621         /*
3622          * Now make a filehandle based on the root
3623          * export and root vnode.
3624          */
3625         error = makefh4(&cs->fh, rootdir, exi);
3626         if (error != 0) {
3627                 *cs->statusp = resp->status = puterrno4(error);
3628                 goto out;
3629         }
3630 
3631         sav_exi = cs->exi;
3632         cs->exi = exi;
3633 
3634         VN_HOLD(rootdir);
3635         cs->vp = rootdir;
3636 
3637         if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) {
3638                 VN_RELE(rootdir);
3639                 cs->vp = NULL;
3640                 cs->exi = sav_exi;
3641                 goto out;
3642         }
3643 
3644         *cs->statusp = resp->status = NFS4_OK;
3645         cs->deleg = FALSE;
3646 out:
3647         DTRACE_NFSV4_2(op__putrootfh__done, struct compound_state *, cs,
3648             PUTROOTFH4res *, resp);
3649 }
3650 
3651 /*
3652  * readlink: args: CURRENT_FH.
3653  *      res: status. If success - CURRENT_FH unchanged, return linktext.
3654  */
3655 
3656 /* ARGSUSED */
3657 static void
3658 rfs4_op_readlink(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3659     struct compound_state *cs)
3660 {
3661         READLINK4res *resp = &resop->nfs_resop4_u.opreadlink;
3662         int error;
3663         vnode_t *vp;
3664         struct iovec iov;
3665         struct vattr va;
3666         struct uio uio;
3667         char *data;
3668         struct sockaddr *ca;
3669         char *name = NULL;
3670         int is_referral;
3671 
3672         DTRACE_NFSV4_1(op__readlink__start, struct compound_state *, cs);
3673 
3674         /* CURRENT_FH: directory */
3675         vp = cs->vp;
3676         if (vp == NULL) {
3677                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3678                 goto out;
3679         }
3680 
3681         if (cs->access == CS_ACCESS_DENIED) {
3682                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
3683                 goto out;
3684         }
3685 
3686         /* Is it a referral? */
3687         if (vn_is_nfs_reparse(vp, cs->cr) && client_is_downrev(req)) {
3688 
3689                 is_referral = 1;
3690 
3691         } else {
3692 
3693                 is_referral = 0;
3694 
3695                 if (vp->v_type == VDIR) {
3696                         *cs->statusp = resp->status = NFS4ERR_ISDIR;
3697                         goto out;
3698                 }
3699 
3700                 if (vp->v_type != VLNK) {
3701                         *cs->statusp = resp->status = NFS4ERR_INVAL;
3702                         goto out;
3703                 }
3704 
3705         }
3706 
3707         va.va_mask = AT_MODE;
3708         error = VOP_GETATTR(vp, &va, 0, cs->cr, NULL);
3709         if (error) {
3710                 *cs->statusp = resp->status = puterrno4(error);
3711                 goto out;
3712         }
3713 
3714         if (MANDLOCK(vp, va.va_mode)) {
3715                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
3716                 goto out;
3717         }
3718 
3719         data = kmem_alloc(MAXPATHLEN + 1, KM_SLEEP);
3720 
3721         if (is_referral) {
3722                 char *s;
3723                 size_t strsz;
3724 
3725                 /* Get an artificial symlink based on a referral */
3726                 s = build_symlink(vp, cs->cr, &strsz);
3727                 global_svstat_ptr[4][NFS_REFERLINKS].value.ui64++;
3728                 DTRACE_PROBE2(nfs4serv__func__referral__reflink,
3729                     vnode_t *, vp, char *, s);
3730                 if (s == NULL)
3731                         error = EINVAL;
3732                 else {
3733                         error = 0;
3734                         (void) strlcpy(data, s, MAXPATHLEN + 1);
3735                         kmem_free(s, strsz);
3736                 }
3737 
3738         } else {
3739 
3740                 iov.iov_base = data;
3741                 iov.iov_len = MAXPATHLEN;
3742                 uio.uio_iov = &iov;
3743                 uio.uio_iovcnt = 1;
3744                 uio.uio_segflg = UIO_SYSSPACE;
3745                 uio.uio_extflg = UIO_COPY_CACHED;
3746                 uio.uio_loffset = 0;
3747                 uio.uio_resid = MAXPATHLEN;
3748 
3749                 error = VOP_READLINK(vp, &uio, cs->cr, NULL);
3750 
3751                 if (!error)
3752                         *(data + MAXPATHLEN - uio.uio_resid) = '\0';
3753         }
3754 
3755         if (error) {
3756                 kmem_free((caddr_t)data, (uint_t)MAXPATHLEN + 1);
3757                 *cs->statusp = resp->status = puterrno4(error);
3758                 goto out;
3759         }
3760 
3761         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
3762         name = nfscmd_convname(ca, cs->exi, data, NFSCMD_CONV_OUTBOUND,
3763             MAXPATHLEN  + 1);
3764 
3765         if (name == NULL) {
3766                 /*
3767                  * Even though the conversion failed, we return
3768                  * something. We just don't translate it.
3769                  */
3770                 name = data;
3771         }
3772 
3773         /*
3774          * treat link name as data
3775          */
3776         (void) str_to_utf8(name, (utf8string *)&resp->link);
3777 
3778         if (name != data)
3779                 kmem_free(name, MAXPATHLEN + 1);
3780         kmem_free((caddr_t)data, (uint_t)MAXPATHLEN + 1);
3781         *cs->statusp = resp->status = NFS4_OK;
3782 
3783 out:
3784         DTRACE_NFSV4_2(op__readlink__done, struct compound_state *, cs,
3785             READLINK4res *, resp);
3786 }
3787 
3788 static void
3789 rfs4_op_readlink_free(nfs_resop4 *resop)
3790 {
3791         READLINK4res *resp = &resop->nfs_resop4_u.opreadlink;
3792         utf8string *symlink = (utf8string *)&resp->link;
3793 
3794         if (symlink->utf8string_val) {
3795                 UTF8STRING_FREE(*symlink)
3796         }
3797 }
3798 
3799 /*
3800  * release_lockowner:
3801  *      Release any state associated with the supplied
3802  *      lockowner. Note if any lo_state is holding locks we will not
3803  *      rele that lo_state and thus the lockowner will not be destroyed.
3804  *      A client using lock after the lock owner stateid has been released
3805  *      will suffer the consequence of NFS4ERR_BAD_STATEID and would have
3806  *      to reissue the lock with new_lock_owner set to TRUE.
3807  *      args: lock_owner
3808  *      res:  status
3809  */
3810 /* ARGSUSED */
3811 static void
3812 rfs4_op_release_lockowner(nfs_argop4 *argop, nfs_resop4 *resop,
3813     struct svc_req *req, struct compound_state *cs)
3814 {
3815         RELEASE_LOCKOWNER4args *ap = &argop->nfs_argop4_u.oprelease_lockowner;
3816         RELEASE_LOCKOWNER4res *resp = &resop->nfs_resop4_u.oprelease_lockowner;
3817         rfs4_lockowner_t *lo;
3818         rfs4_openowner_t *oo;
3819         rfs4_state_t *sp;
3820         rfs4_lo_state_t *lsp;
3821         rfs4_client_t *cp;
3822         bool_t create = FALSE;
3823         locklist_t *llist;
3824         sysid_t sysid;
3825 
3826         DTRACE_NFSV4_2(op__release__lockowner__start, struct compound_state *,
3827             cs, RELEASE_LOCKOWNER4args *, ap);
3828 
3829         /* Make sure there is a clientid around for this request */
3830         cp = rfs4_findclient_by_id(ap->lock_owner.clientid, FALSE);
3831 
3832         if (cp == NULL) {
3833                 *cs->statusp = resp->status =
3834                     rfs4_check_clientid(&ap->lock_owner.clientid, 0);
3835                 goto out;
3836         }
3837         rfs4_client_rele(cp);
3838 
3839         lo = rfs4_findlockowner(&ap->lock_owner, &create);
3840         if (lo == NULL) {
3841                 *cs->statusp = resp->status = NFS4_OK;
3842                 goto out;
3843         }
3844         ASSERT(lo->rl_client != NULL);
3845 
3846         /*
3847          * Check for EXPIRED client. If so will reap state with in a lease
3848          * period or on next set_clientid_confirm step
3849          */
3850         if (rfs4_lease_expired(lo->rl_client)) {
3851                 rfs4_lockowner_rele(lo);
3852                 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
3853                 goto out;
3854         }
3855 
3856         /*
3857          * If no sysid has been assigned, then no locks exist; just return.
3858          */
3859         rfs4_dbe_lock(lo->rl_client->rc_dbe);
3860         if (lo->rl_client->rc_sysidt == LM_NOSYSID) {
3861                 rfs4_lockowner_rele(lo);
3862                 rfs4_dbe_unlock(lo->rl_client->rc_dbe);
3863                 goto out;
3864         }
3865 
3866         sysid = lo->rl_client->rc_sysidt;
3867         rfs4_dbe_unlock(lo->rl_client->rc_dbe);
3868 
3869         /*
3870          * Mark the lockowner invalid.
3871          */
3872         rfs4_dbe_hide(lo->rl_dbe);
3873 
3874         /*
3875          * sysid-pid pair should now not be used since the lockowner is
3876          * invalid. If the client were to instantiate the lockowner again
3877          * it would be assigned a new pid. Thus we can get the list of
3878          * current locks.
3879          */
3880 
3881         llist = flk_get_active_locks(sysid, lo->rl_pid);
3882         /* If we are still holding locks fail */
3883         if (llist != NULL) {
3884 
3885                 *cs->statusp = resp->status = NFS4ERR_LOCKS_HELD;
3886 
3887                 flk_free_locklist(llist);
3888                 /*
3889                  * We need to unhide the lockowner so the client can
3890                  * try it again. The bad thing here is if the client
3891                  * has a logic error that took it here in the first place
3892                  * they probably have lost accounting of the locks that it
3893                  * is holding. So we may have dangling state until the
3894                  * open owner state is reaped via close. One scenario
3895                  * that could possibly occur is that the client has
3896                  * sent the unlock request(s) in separate threads
3897                  * and has not waited for the replies before sending the
3898                  * RELEASE_LOCKOWNER request. Presumably, it would expect
3899                  * and deal appropriately with NFS4ERR_LOCKS_HELD, by
3900                  * reissuing the request.
3901                  */
3902                 rfs4_dbe_unhide(lo->rl_dbe);
3903                 rfs4_lockowner_rele(lo);
3904                 goto out;
3905         }
3906 
3907         /*
3908          * For the corresponding client we need to check each open
3909          * owner for any opens that have lockowner state associated
3910          * with this lockowner.
3911          */
3912 
3913         rfs4_dbe_lock(lo->rl_client->rc_dbe);
3914         for (oo = list_head(&lo->rl_client->rc_openownerlist); oo != NULL;
3915             oo = list_next(&lo->rl_client->rc_openownerlist, oo)) {
3916 
3917                 rfs4_dbe_lock(oo->ro_dbe);
3918                 for (sp = list_head(&oo->ro_statelist); sp != NULL;
3919                     sp = list_next(&oo->ro_statelist, sp)) {
3920 
3921                         rfs4_dbe_lock(sp->rs_dbe);
3922                         for (lsp = list_head(&sp->rs_lostatelist);
3923                             lsp != NULL;
3924                             lsp = list_next(&sp->rs_lostatelist, lsp)) {
3925                                 if (lsp->rls_locker == lo) {
3926                                         rfs4_dbe_lock(lsp->rls_dbe);
3927                                         rfs4_dbe_invalidate(lsp->rls_dbe);
3928                                         rfs4_dbe_unlock(lsp->rls_dbe);
3929                                 }
3930                         }
3931                         rfs4_dbe_unlock(sp->rs_dbe);
3932                 }
3933                 rfs4_dbe_unlock(oo->ro_dbe);
3934         }
3935         rfs4_dbe_unlock(lo->rl_client->rc_dbe);
3936 
3937         rfs4_lockowner_rele(lo);
3938 
3939         *cs->statusp = resp->status = NFS4_OK;
3940 
3941 out:
3942         DTRACE_NFSV4_2(op__release__lockowner__done, struct compound_state *,
3943             cs, RELEASE_LOCKOWNER4res *, resp);
3944 }
3945 
3946 /*
3947  * short utility function to lookup a file and recall the delegation
3948  */
3949 static rfs4_file_t *
3950 rfs4_lookup_and_findfile(vnode_t *dvp, char *nm, vnode_t **vpp,
3951     int *lkup_error, cred_t *cr)
3952 {
3953         vnode_t *vp;
3954         rfs4_file_t *fp = NULL;
3955         bool_t fcreate = FALSE;
3956         int error;
3957 
3958         if (vpp)
3959                 *vpp = NULL;
3960 
3961         if ((error = VOP_LOOKUP(dvp, nm, &vp, NULL, 0, NULL, cr, NULL, NULL,
3962             NULL)) == 0) {
3963                 if (vp->v_type == VREG)
3964                         fp = rfs4_findfile(vp, NULL, &fcreate);
3965                 if (vpp)
3966                         *vpp = vp;
3967                 else
3968                         VN_RELE(vp);
3969         }
3970 
3971         if (lkup_error)
3972                 *lkup_error = error;
3973 
3974         return (fp);
3975 }
3976 
3977 /*
3978  * remove: args: CURRENT_FH: directory; name.
3979  *      res: status. If success - CURRENT_FH unchanged, return change_info
3980  *              for directory.
3981  */
3982 /* ARGSUSED */
3983 static void
3984 rfs4_op_remove(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3985     struct compound_state *cs)
3986 {
3987         REMOVE4args *args = &argop->nfs_argop4_u.opremove;
3988         REMOVE4res *resp = &resop->nfs_resop4_u.opremove;
3989         int error;
3990         vnode_t *dvp, *vp;
3991         struct vattr bdva, idva, adva;
3992         char *nm;
3993         uint_t len;
3994         rfs4_file_t *fp;
3995         int in_crit = 0;
3996         bslabel_t *clabel;
3997         struct sockaddr *ca;
3998         char *name = NULL;
3999         nfsstat4 status;
4000 
4001         DTRACE_NFSV4_2(op__remove__start, struct compound_state *, cs,
4002             REMOVE4args *, args);
4003 
4004         /* CURRENT_FH: directory */
4005         dvp = cs->vp;
4006         if (dvp == NULL) {
4007                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4008                 goto out;
4009         }
4010 
4011         if (cs->access == CS_ACCESS_DENIED) {
4012                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4013                 goto out;
4014         }
4015 
4016         /*
4017          * If there is an unshared filesystem mounted on this vnode,
4018          * Do not allow to remove anything in this directory.
4019          */
4020         if (vn_ismntpt(dvp)) {
4021                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4022                 goto out;
4023         }
4024 
4025         if (dvp->v_type != VDIR) {
4026                 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
4027                 goto out;
4028         }
4029 
4030         status = utf8_dir_verify(&args->target);
4031         if (status != NFS4_OK) {
4032                 *cs->statusp = resp->status = status;
4033                 goto out;
4034         }
4035 
4036         /*
4037          * Lookup the file so that we can check if it's a directory
4038          */
4039         nm = utf8_to_fn(&args->target, &len, NULL);
4040         if (nm == NULL) {
4041                 *cs->statusp = resp->status = NFS4ERR_INVAL;
4042                 goto out;
4043         }
4044 
4045         if (len > MAXNAMELEN) {
4046                 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
4047                 kmem_free(nm, len);
4048                 goto out;
4049         }
4050 
4051         if (rdonly4(req, cs)) {
4052                 *cs->statusp = resp->status = NFS4ERR_ROFS;
4053                 kmem_free(nm, len);
4054                 goto out;
4055         }
4056 
4057         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
4058         name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
4059             MAXPATHLEN  + 1);
4060 
4061         if (name == NULL) {
4062                 *cs->statusp = resp->status = NFS4ERR_INVAL;
4063                 kmem_free(nm, len);
4064                 goto out;
4065         }
4066 
4067         /*
4068          * Lookup the file to determine type and while we are see if
4069          * there is a file struct around and check for delegation.
4070          * We don't need to acquire va_seq before this lookup, if
4071          * it causes an update, cinfo.before will not match, which will
4072          * trigger a cache flush even if atomic is TRUE.
4073          */
4074         if (fp = rfs4_lookup_and_findfile(dvp, name, &vp, &error, cs->cr)) {
4075                 if (rfs4_check_delegated_byfp(FWRITE, fp, TRUE, TRUE, TRUE,
4076                     NULL)) {
4077                         VN_RELE(vp);
4078                         rfs4_file_rele(fp);
4079                         *cs->statusp = resp->status = NFS4ERR_DELAY;
4080                         if (nm != name)
4081                                 kmem_free(name, MAXPATHLEN + 1);
4082                         kmem_free(nm, len);
4083                         goto out;
4084                 }
4085         }
4086 
4087         /* Didn't find anything to remove */
4088         if (vp == NULL) {
4089                 *cs->statusp = resp->status = error;
4090                 if (nm != name)
4091                         kmem_free(name, MAXPATHLEN + 1);
4092                 kmem_free(nm, len);
4093                 goto out;
4094         }
4095 
4096         if (nbl_need_check(vp)) {
4097                 nbl_start_crit(vp, RW_READER);
4098                 in_crit = 1;
4099                 if (nbl_conflict(vp, NBL_REMOVE, 0, 0, 0, NULL)) {
4100                         *cs->statusp = resp->status = NFS4ERR_FILE_OPEN;
4101                         if (nm != name)
4102                                 kmem_free(name, MAXPATHLEN + 1);
4103                         kmem_free(nm, len);
4104                         nbl_end_crit(vp);
4105                         VN_RELE(vp);
4106                         if (fp) {
4107                                 rfs4_clear_dont_grant(fp);
4108                                 rfs4_file_rele(fp);
4109                         }
4110                         goto out;
4111                 }
4112         }
4113 
4114         /* check label before allowing removal */
4115         if (is_system_labeled()) {
4116                 ASSERT(req->rq_label != NULL);
4117                 clabel = req->rq_label;
4118                 DTRACE_PROBE2(tx__rfs4__log__info__opremove__clabel, char *,
4119                     "got client label from request(1)",
4120                     struct svc_req *, req);
4121                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
4122                         if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
4123                             cs->exi)) {
4124                                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4125                                 if (name != nm)
4126                                         kmem_free(name, MAXPATHLEN + 1);
4127                                 kmem_free(nm, len);
4128                                 if (in_crit)
4129                                         nbl_end_crit(vp);
4130                                 VN_RELE(vp);
4131                                 if (fp) {
4132                                         rfs4_clear_dont_grant(fp);
4133                                         rfs4_file_rele(fp);
4134                                 }
4135                                 goto out;
4136                         }
4137                 }
4138         }
4139 
4140         /* Get dir "before" change value */
4141         bdva.va_mask = AT_CTIME|AT_SEQ;
4142         error = VOP_GETATTR(dvp, &bdva, 0, cs->cr, NULL);
4143         if (error) {
4144                 *cs->statusp = resp->status = puterrno4(error);
4145                 if (nm != name)
4146                         kmem_free(name, MAXPATHLEN + 1);
4147                 kmem_free(nm, len);
4148                 if (in_crit)
4149                         nbl_end_crit(vp);
4150                 VN_RELE(vp);
4151                 if (fp) {
4152                         rfs4_clear_dont_grant(fp);
4153                         rfs4_file_rele(fp);
4154                 }
4155                 goto out;
4156         }
4157         NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bdva.va_ctime)
4158 
4159         /* Actually do the REMOVE operation */
4160         if (vp->v_type == VDIR) {
4161                 /*
4162                  * Can't remove a directory that has a mounted-on filesystem.
4163                  */
4164                 if (vn_ismntpt(vp)) {
4165                         error = EACCES;
4166                 } else {
4167                         /*
4168                          * System V defines rmdir to return EEXIST,
4169                          * not ENOTEMPTY, if the directory is not
4170                          * empty.  A System V NFS server needs to map
4171                          * NFS4ERR_EXIST to NFS4ERR_NOTEMPTY to
4172                          * transmit over the wire.
4173                          */
4174                         if ((error = VOP_RMDIR(dvp, name, rootdir, cs->cr,
4175                             NULL, 0)) == EEXIST)
4176                                 error = ENOTEMPTY;
4177                 }
4178         } else {
4179                 if ((error = VOP_REMOVE(dvp, name, cs->cr, NULL, 0)) == 0 &&
4180                     fp != NULL) {
4181                         struct vattr va;
4182                         vnode_t *tvp;
4183 
4184                         rfs4_dbe_lock(fp->rf_dbe);
4185                         tvp = fp->rf_vp;
4186                         if (tvp)
4187                                 VN_HOLD(tvp);
4188                         rfs4_dbe_unlock(fp->rf_dbe);
4189 
4190                         if (tvp) {
4191                                 /*
4192                                  * This is va_seq safe because we are not
4193                                  * manipulating dvp.
4194                                  */
4195                                 va.va_mask = AT_NLINK;
4196                                 if (!VOP_GETATTR(tvp, &va, 0, cs->cr, NULL) &&
4197                                     va.va_nlink == 0) {
4198                                         /* Remove state on file remove */
4199                                         if (in_crit) {
4200                                                 nbl_end_crit(vp);
4201                                                 in_crit = 0;
4202                                         }
4203                                         rfs4_close_all_state(fp);
4204                                 }
4205                                 VN_RELE(tvp);
4206                         }
4207                 }
4208         }
4209 
4210         if (in_crit)
4211                 nbl_end_crit(vp);
4212         VN_RELE(vp);
4213 
4214         if (fp) {
4215                 rfs4_clear_dont_grant(fp);
4216                 rfs4_file_rele(fp);
4217         }
4218         if (nm != name)
4219                 kmem_free(name, MAXPATHLEN + 1);
4220         kmem_free(nm, len);
4221 
4222         if (error) {
4223                 *cs->statusp = resp->status = puterrno4(error);
4224                 goto out;
4225         }
4226 
4227         /*
4228          * Get the initial "after" sequence number, if it fails, set to zero
4229          */
4230         idva.va_mask = AT_SEQ;
4231         if (VOP_GETATTR(dvp, &idva, 0, cs->cr, NULL))
4232                 idva.va_seq = 0;
4233 
4234         /*
4235          * Force modified data and metadata out to stable storage.
4236          */
4237         (void) VOP_FSYNC(dvp, 0, cs->cr, NULL);
4238 
4239         /*
4240          * Get "after" change value, if it fails, simply return the
4241          * before value.
4242          */
4243         adva.va_mask = AT_CTIME|AT_SEQ;
4244         if (VOP_GETATTR(dvp, &adva, 0, cs->cr, NULL)) {
4245                 adva.va_ctime = bdva.va_ctime;
4246                 adva.va_seq = 0;
4247         }
4248 
4249         NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, adva.va_ctime)
4250 
4251         /*
4252          * The cinfo.atomic = TRUE only if we have
4253          * non-zero va_seq's, and it has incremented by exactly one
4254          * during the VOP_REMOVE/RMDIR and it didn't change during
4255          * the VOP_FSYNC.
4256          */
4257         if (bdva.va_seq && idva.va_seq && adva.va_seq &&
4258             idva.va_seq == (bdva.va_seq + 1) && idva.va_seq == adva.va_seq)
4259                 resp->cinfo.atomic = TRUE;
4260         else
4261                 resp->cinfo.atomic = FALSE;
4262 
4263         *cs->statusp = resp->status = NFS4_OK;
4264 
4265 out:
4266         DTRACE_NFSV4_2(op__remove__done, struct compound_state *, cs,
4267             REMOVE4res *, resp);
4268 }
4269 
4270 /*
4271  * rename: args: SAVED_FH: from directory, CURRENT_FH: target directory,
4272  *              oldname and newname.
4273  *      res: status. If success - CURRENT_FH unchanged, return change_info
4274  *              for both from and target directories.
4275  */
4276 /* ARGSUSED */
4277 static void
4278 rfs4_op_rename(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4279     struct compound_state *cs)
4280 {
4281         RENAME4args *args = &argop->nfs_argop4_u.oprename;
4282         RENAME4res *resp = &resop->nfs_resop4_u.oprename;
4283         int error;
4284         vnode_t *odvp;
4285         vnode_t *ndvp;
4286         vnode_t *srcvp, *targvp;
4287         struct vattr obdva, oidva, oadva;
4288         struct vattr nbdva, nidva, nadva;
4289         char *onm, *nnm;
4290         uint_t olen, nlen;
4291         rfs4_file_t *fp, *sfp;
4292         int in_crit_src, in_crit_targ;
4293         int fp_rele_grant_hold, sfp_rele_grant_hold;
4294         bslabel_t *clabel;
4295         struct sockaddr *ca;
4296         char *converted_onm = NULL;
4297         char *converted_nnm = NULL;
4298         nfsstat4 status;
4299 
4300         DTRACE_NFSV4_2(op__rename__start, struct compound_state *, cs,
4301             RENAME4args *, args);
4302 
4303         fp = sfp = NULL;
4304         srcvp = targvp = NULL;
4305         in_crit_src = in_crit_targ = 0;
4306         fp_rele_grant_hold = sfp_rele_grant_hold = 0;
4307 
4308         /* CURRENT_FH: target directory */
4309         ndvp = cs->vp;
4310         if (ndvp == NULL) {
4311                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4312                 goto out;
4313         }
4314 
4315         /* SAVED_FH: from directory */
4316         odvp = cs->saved_vp;
4317         if (odvp == NULL) {
4318                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4319                 goto out;
4320         }
4321 
4322         if (cs->access == CS_ACCESS_DENIED) {
4323                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4324                 goto out;
4325         }
4326 
4327         /*
4328          * If there is an unshared filesystem mounted on this vnode,
4329          * do not allow to rename objects in this directory.
4330          */
4331         if (vn_ismntpt(odvp)) {
4332                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4333                 goto out;
4334         }
4335 
4336         /*
4337          * If there is an unshared filesystem mounted on this vnode,
4338          * do not allow to rename to this directory.
4339          */
4340         if (vn_ismntpt(ndvp)) {
4341                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4342                 goto out;
4343         }
4344 
4345         if (odvp->v_type != VDIR || ndvp->v_type != VDIR) {
4346                 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
4347                 goto out;
4348         }
4349 
4350         if (cs->saved_exi != cs->exi) {
4351                 *cs->statusp = resp->status = NFS4ERR_XDEV;
4352                 goto out;
4353         }
4354 
4355         status = utf8_dir_verify(&args->oldname);
4356         if (status != NFS4_OK) {
4357                 *cs->statusp = resp->status = status;
4358                 goto out;
4359         }
4360 
4361         status = utf8_dir_verify(&args->newname);
4362         if (status != NFS4_OK) {
4363                 *cs->statusp = resp->status = status;
4364                 goto out;
4365         }
4366 
4367         onm = utf8_to_fn(&args->oldname, &olen, NULL);
4368         if (onm == NULL) {
4369                 *cs->statusp = resp->status = NFS4ERR_INVAL;
4370                 goto out;
4371         }
4372         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
4373         nlen = MAXPATHLEN + 1;
4374         converted_onm = nfscmd_convname(ca, cs->exi, onm, NFSCMD_CONV_INBOUND,
4375             nlen);
4376 
4377         if (converted_onm == NULL) {
4378                 *cs->statusp = resp->status = NFS4ERR_INVAL;
4379                 kmem_free(onm, olen);
4380                 goto out;
4381         }
4382 
4383         nnm = utf8_to_fn(&args->newname, &nlen, NULL);
4384         if (nnm == NULL) {
4385                 *cs->statusp = resp->status = NFS4ERR_INVAL;
4386                 if (onm != converted_onm)
4387                         kmem_free(converted_onm, MAXPATHLEN + 1);
4388                 kmem_free(onm, olen);
4389                 goto out;
4390         }
4391         converted_nnm = nfscmd_convname(ca, cs->exi, nnm, NFSCMD_CONV_INBOUND,
4392             MAXPATHLEN  + 1);
4393 
4394         if (converted_nnm == NULL) {
4395                 *cs->statusp = resp->status = NFS4ERR_INVAL;
4396                 kmem_free(nnm, nlen);
4397                 nnm = NULL;
4398                 if (onm != converted_onm)
4399                         kmem_free(converted_onm, MAXPATHLEN + 1);
4400                 kmem_free(onm, olen);
4401                 goto out;
4402         }
4403 
4404 
4405         if (olen > MAXNAMELEN || nlen > MAXNAMELEN) {
4406                 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
4407                 kmem_free(onm, olen);
4408                 kmem_free(nnm, nlen);
4409                 goto out;
4410         }
4411 
4412 
4413         if (rdonly4(req, cs)) {
4414                 *cs->statusp = resp->status = NFS4ERR_ROFS;
4415                 if (onm != converted_onm)
4416                         kmem_free(converted_onm, MAXPATHLEN + 1);
4417                 kmem_free(onm, olen);
4418                 if (nnm != converted_nnm)
4419                         kmem_free(converted_nnm, MAXPATHLEN + 1);
4420                 kmem_free(nnm, nlen);
4421                 goto out;
4422         }
4423 
4424         /* check label of the target dir */
4425         if (is_system_labeled()) {
4426                 ASSERT(req->rq_label != NULL);
4427                 clabel = req->rq_label;
4428                 DTRACE_PROBE2(tx__rfs4__log__info__oprename__clabel, char *,
4429                     "got client label from request(1)",
4430                     struct svc_req *, req);
4431                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
4432                         if (!do_rfs_label_check(clabel, ndvp,
4433                             EQUALITY_CHECK, cs->exi)) {
4434                                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4435                                 goto err_out;
4436                         }
4437                 }
4438         }
4439 
4440         /*
4441          * Is the source a file and have a delegation?
4442          * We don't need to acquire va_seq before these lookups, if
4443          * it causes an update, cinfo.before will not match, which will
4444          * trigger a cache flush even if atomic is TRUE.
4445          */
4446         if (sfp = rfs4_lookup_and_findfile(odvp, converted_onm, &srcvp,
4447             &error, cs->cr)) {
4448                 if (rfs4_check_delegated_byfp(FWRITE, sfp, TRUE, TRUE, TRUE,
4449                     NULL)) {
4450                         *cs->statusp = resp->status = NFS4ERR_DELAY;
4451                         goto err_out;
4452                 }
4453         }
4454 
4455         if (srcvp == NULL) {
4456                 *cs->statusp = resp->status = puterrno4(error);
4457                 if (onm != converted_onm)
4458                         kmem_free(converted_onm, MAXPATHLEN + 1);
4459                 kmem_free(onm, olen);
4460                 if (nnm != converted_nnm)
4461                         kmem_free(converted_nnm, MAXPATHLEN + 1);
4462                 kmem_free(nnm, nlen);
4463                 goto out;
4464         }
4465 
4466         sfp_rele_grant_hold = 1;
4467 
4468         /* Does the destination exist and a file and have a delegation? */
4469         if (fp = rfs4_lookup_and_findfile(ndvp, converted_nnm, &targvp,
4470             NULL, cs->cr)) {
4471                 if (rfs4_check_delegated_byfp(FWRITE, fp, TRUE, TRUE, TRUE,
4472                     NULL)) {
4473                         *cs->statusp = resp->status = NFS4ERR_DELAY;
4474                         goto err_out;
4475                 }
4476         }
4477         fp_rele_grant_hold = 1;
4478 
4479 
4480         /* Check for NBMAND lock on both source and target */
4481         if (nbl_need_check(srcvp)) {
4482                 nbl_start_crit(srcvp, RW_READER);
4483                 in_crit_src = 1;
4484                 if (nbl_conflict(srcvp, NBL_RENAME, 0, 0, 0, NULL)) {
4485                         *cs->statusp = resp->status = NFS4ERR_FILE_OPEN;
4486                         goto err_out;
4487                 }
4488         }
4489 
4490         if (targvp && nbl_need_check(targvp)) {
4491                 nbl_start_crit(targvp, RW_READER);
4492                 in_crit_targ = 1;
4493                 if (nbl_conflict(targvp, NBL_REMOVE, 0, 0, 0, NULL)) {
4494                         *cs->statusp = resp->status = NFS4ERR_FILE_OPEN;
4495                         goto err_out;
4496                 }
4497         }
4498 
4499         /* Get source "before" change value */
4500         obdva.va_mask = AT_CTIME|AT_SEQ;
4501         error = VOP_GETATTR(odvp, &obdva, 0, cs->cr, NULL);
4502         if (!error) {
4503                 nbdva.va_mask = AT_CTIME|AT_SEQ;
4504                 error = VOP_GETATTR(ndvp, &nbdva, 0, cs->cr, NULL);
4505         }
4506         if (error) {
4507                 *cs->statusp = resp->status = puterrno4(error);
4508                 goto err_out;
4509         }
4510 
4511         NFS4_SET_FATTR4_CHANGE(resp->source_cinfo.before, obdva.va_ctime)
4512         NFS4_SET_FATTR4_CHANGE(resp->target_cinfo.before, nbdva.va_ctime)
4513 
4514         if ((error = VOP_RENAME(odvp, converted_onm, ndvp, converted_nnm,
4515             cs->cr, NULL, 0)) == 0 && fp != NULL) {
4516                 struct vattr va;
4517                 vnode_t *tvp;
4518 
4519                 rfs4_dbe_lock(fp->rf_dbe);
4520                 tvp = fp->rf_vp;
4521                 if (tvp)
4522                         VN_HOLD(tvp);
4523                 rfs4_dbe_unlock(fp->rf_dbe);
4524 
4525                 if (tvp) {
4526                         va.va_mask = AT_NLINK;
4527                         if (!VOP_GETATTR(tvp, &va, 0, cs->cr, NULL) &&
4528                             va.va_nlink == 0) {
4529                                 /* The file is gone and so should the state */
4530                                 if (in_crit_targ) {
4531                                         nbl_end_crit(targvp);
4532                                         in_crit_targ = 0;
4533                                 }
4534                                 rfs4_close_all_state(fp);
4535                         }
4536                         VN_RELE(tvp);
4537                 }
4538         }
4539         if (error == 0)
4540                 vn_renamepath(ndvp, srcvp, nnm, nlen - 1);
4541 
4542         if (in_crit_src)
4543                 nbl_end_crit(srcvp);
4544         if (srcvp)
4545                 VN_RELE(srcvp);
4546         if (in_crit_targ)
4547                 nbl_end_crit(targvp);
4548         if (targvp)
4549                 VN_RELE(targvp);
4550 
4551         if (sfp) {
4552                 rfs4_clear_dont_grant(sfp);
4553                 rfs4_file_rele(sfp);
4554         }
4555         if (fp) {
4556                 rfs4_clear_dont_grant(fp);
4557                 rfs4_file_rele(fp);
4558         }
4559 
4560         if (converted_onm != onm)
4561                 kmem_free(converted_onm, MAXPATHLEN + 1);
4562         kmem_free(onm, olen);
4563         if (converted_nnm != nnm)
4564                 kmem_free(converted_nnm, MAXPATHLEN + 1);
4565         kmem_free(nnm, nlen);
4566 
4567         /*
4568          * Get the initial "after" sequence number, if it fails, set to zero
4569          */
4570         oidva.va_mask = AT_SEQ;
4571         if (VOP_GETATTR(odvp, &oidva, 0, cs->cr, NULL))
4572                 oidva.va_seq = 0;
4573 
4574         nidva.va_mask = AT_SEQ;
4575         if (VOP_GETATTR(ndvp, &nidva, 0, cs->cr, NULL))
4576                 nidva.va_seq = 0;
4577 
4578         /*
4579          * Force modified data and metadata out to stable storage.
4580          */
4581         (void) VOP_FSYNC(odvp, 0, cs->cr, NULL);
4582         (void) VOP_FSYNC(ndvp, 0, cs->cr, NULL);
4583 
4584         if (error) {
4585                 *cs->statusp = resp->status = puterrno4(error);
4586                 goto out;
4587         }
4588 
4589         /*
4590          * Get "after" change values, if it fails, simply return the
4591          * before value.
4592          */
4593         oadva.va_mask = AT_CTIME|AT_SEQ;
4594         if (VOP_GETATTR(odvp, &oadva, 0, cs->cr, NULL)) {
4595                 oadva.va_ctime = obdva.va_ctime;
4596                 oadva.va_seq = 0;
4597         }
4598 
4599         nadva.va_mask = AT_CTIME|AT_SEQ;
4600         if (VOP_GETATTR(odvp, &nadva, 0, cs->cr, NULL)) {
4601                 nadva.va_ctime = nbdva.va_ctime;
4602                 nadva.va_seq = 0;
4603         }
4604 
4605         NFS4_SET_FATTR4_CHANGE(resp->source_cinfo.after, oadva.va_ctime)
4606         NFS4_SET_FATTR4_CHANGE(resp->target_cinfo.after, nadva.va_ctime)
4607 
4608         /*
4609          * The cinfo.atomic = TRUE only if we have
4610          * non-zero va_seq's, and it has incremented by exactly one
4611          * during the VOP_RENAME and it didn't change during the VOP_FSYNC.
4612          */
4613         if (obdva.va_seq && oidva.va_seq && oadva.va_seq &&
4614             oidva.va_seq == (obdva.va_seq + 1) && oidva.va_seq == oadva.va_seq)
4615                 resp->source_cinfo.atomic = TRUE;
4616         else
4617                 resp->source_cinfo.atomic = FALSE;
4618 
4619         if (nbdva.va_seq && nidva.va_seq && nadva.va_seq &&
4620             nidva.va_seq == (nbdva.va_seq + 1) && nidva.va_seq == nadva.va_seq)
4621                 resp->target_cinfo.atomic = TRUE;
4622         else
4623                 resp->target_cinfo.atomic = FALSE;
4624 
4625 #ifdef  VOLATILE_FH_TEST
4626         {
4627         extern void add_volrnm_fh(struct exportinfo *, vnode_t *);
4628 
4629         /*
4630          * Add the renamed file handle to the volatile rename list
4631          */
4632         if (cs->exi->exi_export.ex_flags & EX_VOLRNM) {
4633                 /* file handles may expire on rename */
4634                 vnode_t *vp;
4635 
4636                 nnm = utf8_to_fn(&args->newname, &nlen, NULL);
4637                 /*
4638                  * Already know that nnm will be a valid string
4639                  */
4640                 error = VOP_LOOKUP(ndvp, nnm, &vp, NULL, 0, NULL, cs->cr,
4641                     NULL, NULL, NULL);
4642                 kmem_free(nnm, nlen);
4643                 if (!error) {
4644                         add_volrnm_fh(cs->exi, vp);
4645                         VN_RELE(vp);
4646                 }
4647         }
4648         }
4649 #endif  /* VOLATILE_FH_TEST */
4650 
4651         *cs->statusp = resp->status = NFS4_OK;
4652 out:
4653         DTRACE_NFSV4_2(op__rename__done, struct compound_state *, cs,
4654             RENAME4res *, resp);
4655         return;
4656 
4657 err_out:
4658         if (onm != converted_onm)
4659                 kmem_free(converted_onm, MAXPATHLEN + 1);
4660         if (onm != NULL)
4661                 kmem_free(onm, olen);
4662         if (nnm != converted_nnm)
4663                 kmem_free(converted_nnm, MAXPATHLEN + 1);
4664         if (nnm != NULL)
4665                 kmem_free(nnm, nlen);
4666 
4667         if (in_crit_src) nbl_end_crit(srcvp);
4668         if (in_crit_targ) nbl_end_crit(targvp);
4669         if (targvp) VN_RELE(targvp);
4670         if (srcvp) VN_RELE(srcvp);
4671         if (sfp) {
4672                 if (sfp_rele_grant_hold) rfs4_clear_dont_grant(sfp);
4673                 rfs4_file_rele(sfp);
4674         }
4675         if (fp) {
4676                 if (fp_rele_grant_hold) rfs4_clear_dont_grant(fp);
4677                 rfs4_file_rele(fp);
4678         }
4679 
4680         DTRACE_NFSV4_2(op__rename__done, struct compound_state *, cs,
4681             RENAME4res *, resp);
4682 }
4683 
4684 /* ARGSUSED */
4685 static void
4686 rfs4_op_renew(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4687     struct compound_state *cs)
4688 {
4689         RENEW4args *args = &argop->nfs_argop4_u.oprenew;
4690         RENEW4res *resp = &resop->nfs_resop4_u.oprenew;
4691         rfs4_client_t *cp;
4692 
4693         DTRACE_NFSV4_2(op__renew__start, struct compound_state *, cs,
4694             RENEW4args *, args);
4695 
4696         if ((cp = rfs4_findclient_by_id(args->clientid, FALSE)) == NULL) {
4697                 *cs->statusp = resp->status =
4698                     rfs4_check_clientid(&args->clientid, 0);
4699                 goto out;
4700         }
4701 
4702         if (rfs4_lease_expired(cp)) {
4703                 rfs4_client_rele(cp);
4704                 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
4705                 goto out;
4706         }
4707 
4708         rfs4_update_lease(cp);
4709 
4710         mutex_enter(cp->rc_cbinfo.cb_lock);
4711         if (cp->rc_cbinfo.cb_notified_of_cb_path_down == FALSE) {
4712                 cp->rc_cbinfo.cb_notified_of_cb_path_down = TRUE;
4713                 *cs->statusp = resp->status = NFS4ERR_CB_PATH_DOWN;
4714         } else {
4715                 *cs->statusp = resp->status = NFS4_OK;
4716         }
4717         mutex_exit(cp->rc_cbinfo.cb_lock);
4718 
4719         rfs4_client_rele(cp);
4720 
4721 out:
4722         DTRACE_NFSV4_2(op__renew__done, struct compound_state *, cs,
4723             RENEW4res *, resp);
4724 }
4725 
4726 /* ARGSUSED */
4727 static void
4728 rfs4_op_restorefh(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req,
4729     struct compound_state *cs)
4730 {
4731         RESTOREFH4res *resp = &resop->nfs_resop4_u.oprestorefh;
4732 
4733         DTRACE_NFSV4_1(op__restorefh__start, struct compound_state *, cs);
4734 
4735         /* No need to check cs->access - we are not accessing any object */
4736         if ((cs->saved_vp == NULL) || (cs->saved_fh.nfs_fh4_val == NULL)) {
4737                 *cs->statusp = resp->status = NFS4ERR_RESTOREFH;
4738                 goto out;
4739         }
4740         if (cs->vp != NULL) {
4741                 VN_RELE(cs->vp);
4742         }
4743         cs->vp = cs->saved_vp;
4744         cs->saved_vp = NULL;
4745         cs->exi = cs->saved_exi;
4746         nfs_fh4_copy(&cs->saved_fh, &cs->fh);
4747         *cs->statusp = resp->status = NFS4_OK;
4748         cs->deleg = FALSE;
4749 
4750 out:
4751         DTRACE_NFSV4_2(op__restorefh__done, struct compound_state *, cs,
4752             RESTOREFH4res *, resp);
4753 }
4754 
4755 /* ARGSUSED */
4756 static void
4757 rfs4_op_savefh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4758     struct compound_state *cs)
4759 {
4760         SAVEFH4res *resp = &resop->nfs_resop4_u.opsavefh;
4761 
4762         DTRACE_NFSV4_1(op__savefh__start, struct compound_state *, cs);
4763 
4764         /* No need to check cs->access - we are not accessing any object */
4765         if (cs->vp == NULL) {
4766                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4767                 goto out;
4768         }
4769         if (cs->saved_vp != NULL) {
4770                 VN_RELE(cs->saved_vp);
4771         }
4772         cs->saved_vp = cs->vp;
4773         VN_HOLD(cs->saved_vp);
4774         cs->saved_exi = cs->exi;
4775         /*
4776          * since SAVEFH is fairly rare, don't alloc space for its fh
4777          * unless necessary.
4778          */
4779         if (cs->saved_fh.nfs_fh4_val == NULL) {
4780                 cs->saved_fh.nfs_fh4_val = kmem_alloc(NFS4_FHSIZE, KM_SLEEP);
4781         }
4782         nfs_fh4_copy(&cs->fh, &cs->saved_fh);
4783         *cs->statusp = resp->status = NFS4_OK;
4784 
4785 out:
4786         DTRACE_NFSV4_2(op__savefh__done, struct compound_state *, cs,
4787             SAVEFH4res *, resp);
4788 }
4789 
4790 /*
4791  * rfs4_verify_attr is called when nfsv4 Setattr failed, but we wish to
4792  * return the bitmap of attrs that were set successfully. It is also
4793  * called by Verify/Nverify to test the vattr/vfsstat attrs. It should
4794  * always be called only after rfs4_do_set_attrs().
4795  *
4796  * Verify that the attributes are same as the expected ones. sargp->vap
4797  * and sargp->sbp contain the input attributes as translated from fattr4.
4798  *
4799  * This function verifies only the attrs that correspond to a vattr or
4800  * vfsstat struct. That is because of the extra step needed to get the
4801  * corresponding system structs. Other attributes have already been set or
4802  * verified by do_rfs4_set_attrs.
4803  *
4804  * Return 0 if all attrs match, -1 if some don't, error if error processing.
4805  */
4806 static int
4807 rfs4_verify_attr(struct nfs4_svgetit_arg *sargp,
4808     bitmap4 *resp, struct nfs4_ntov_table *ntovp)
4809 {
4810         int error, ret_error = 0;
4811         int i, k;
4812         uint_t sva_mask = sargp->vap->va_mask;
4813         uint_t vbit;
4814         union nfs4_attr_u *na;
4815         uint8_t *amap;
4816         bool_t getsb = ntovp->vfsstat;
4817 
4818         if (sva_mask != 0) {
4819                 /*
4820                  * Okay to overwrite sargp->vap because we verify based
4821                  * on the incoming values.
4822                  */
4823                 ret_error = VOP_GETATTR(sargp->cs->vp, sargp->vap, 0,
4824                     sargp->cs->cr, NULL);
4825                 if (ret_error) {
4826                         if (resp == NULL)
4827                                 return (ret_error);
4828                         /*
4829                          * Must return bitmap of successful attrs
4830                          */
4831                         sva_mask = 0;   /* to prevent checking vap later */
4832                 } else {
4833                         /*
4834                          * Some file systems clobber va_mask. it is probably
4835                          * wrong of them to do so, nonethless we practice
4836                          * defensive coding.
4837                          * See bug id 4276830.
4838                          */
4839                         sargp->vap->va_mask = sva_mask;
4840                 }
4841         }
4842 
4843         if (getsb) {
4844                 /*
4845                  * Now get the superblock and loop on the bitmap, as there is
4846                  * no simple way of translating from superblock to bitmap4.
4847                  */
4848                 ret_error = VFS_STATVFS(sargp->cs->vp->v_vfsp, sargp->sbp);
4849                 if (ret_error) {
4850                         if (resp == NULL)
4851                                 goto errout;
4852                         getsb = FALSE;
4853                 }
4854         }
4855 
4856         /*
4857          * Now loop and verify each attribute which getattr returned
4858          * whether it's the same as the input.
4859          */
4860         if (resp == NULL && !getsb && (sva_mask == 0))
4861                 goto errout;
4862 
4863         na = ntovp->na;
4864         amap = ntovp->amap;
4865         k = 0;
4866         for (i = 0; i < ntovp->attrcnt; i++, na++, amap++) {
4867                 k = *amap;
4868                 ASSERT(nfs4_ntov_map[k].nval == k);
4869                 vbit = nfs4_ntov_map[k].vbit;
4870 
4871                 /*
4872                  * If vattr attribute but VOP_GETATTR failed, or it's
4873                  * superblock attribute but VFS_STATVFS failed, skip
4874                  */
4875                 if (vbit) {
4876                         if ((vbit & sva_mask) == 0)
4877                                 continue;
4878                 } else if (!(getsb && nfs4_ntov_map[k].vfsstat)) {
4879                         continue;
4880                 }
4881                 error = (*nfs4_ntov_map[k].sv_getit)(NFS4ATTR_VERIT, sargp, na);
4882                 if (resp != NULL) {
4883                         if (error)
4884                                 ret_error = -1; /* not all match */
4885                         else    /* update response bitmap */
4886                                 *resp |= nfs4_ntov_map[k].fbit;
4887                         continue;
4888                 }
4889                 if (error) {
4890                         ret_error = -1; /* not all match */
4891                         break;
4892                 }
4893         }
4894 errout:
4895         return (ret_error);
4896 }
4897 
4898 /*
4899  * Decode the attribute to be set/verified. If the attr requires a sys op
4900  * (VOP_GETATTR, VFS_VFSSTAT), and the request is to verify, then don't
4901  * call the sv_getit function for it, because the sys op hasn't yet been done.
4902  * Return 0 for success, error code if failed.
4903  *
4904  * Note: the decoded arg is not freed here but in nfs4_ntov_table_free.
4905  */
4906 static int
4907 decode_fattr4_attr(nfs4_attr_cmd_t cmd, struct nfs4_svgetit_arg *sargp,
4908     int k, XDR *xdrp, bitmap4 *resp_bval, union nfs4_attr_u *nap)
4909 {
4910         int error = 0;
4911         bool_t set_later;
4912 
4913         sargp->vap->va_mask |= nfs4_ntov_map[k].vbit;
4914 
4915         if ((*nfs4_ntov_map[k].xfunc)(xdrp, nap)) {
4916                 set_later = nfs4_ntov_map[k].vbit || nfs4_ntov_map[k].vfsstat;
4917                 /*
4918                  * don't verify yet if a vattr or sb dependent attr,
4919                  * because we don't have their sys values yet.
4920                  * Will be done later.
4921                  */
4922                 if (! (set_later && (cmd == NFS4ATTR_VERIT))) {
4923                         /*
4924                          * ACLs are a special case, since setting the MODE
4925                          * conflicts with setting the ACL.  We delay setting
4926                          * the ACL until all other attributes have been set.
4927                          * The ACL gets set in do_rfs4_op_setattr().
4928                          */
4929                         if (nfs4_ntov_map[k].fbit != FATTR4_ACL_MASK) {
4930                                 error = (*nfs4_ntov_map[k].sv_getit)(cmd,
4931                                     sargp, nap);
4932                                 if (error) {
4933                                         xdr_free(nfs4_ntov_map[k].xfunc,
4934                                             (caddr_t)nap);
4935                                 }
4936                         }
4937                 }
4938         } else {
4939 #ifdef  DEBUG
4940                 cmn_err(CE_NOTE, "decode_fattr4_attr: error "
4941                     "decoding attribute %d\n", k);
4942 #endif
4943                 error = EINVAL;
4944         }
4945         if (!error && resp_bval && !set_later) {
4946                 *resp_bval |= nfs4_ntov_map[k].fbit;
4947         }
4948 
4949         return (error);
4950 }
4951 
4952 /*
4953  * Set vattr based on incoming fattr4 attrs - used by setattr.
4954  * Set response mask. Ignore any values that are not writable vattr attrs.
4955  */
4956 static nfsstat4
4957 do_rfs4_set_attrs(bitmap4 *resp, fattr4 *fattrp, struct compound_state *cs,
4958     struct nfs4_svgetit_arg *sargp, struct nfs4_ntov_table *ntovp,
4959     nfs4_attr_cmd_t cmd)
4960 {
4961         int error = 0;
4962         int i;
4963         char *attrs = fattrp->attrlist4;
4964         uint32_t attrslen = fattrp->attrlist4_len;
4965         XDR xdr;
4966         nfsstat4 status = NFS4_OK;
4967         vnode_t *vp = cs->vp;
4968         union nfs4_attr_u *na;
4969         uint8_t *amap;
4970 
4971 #ifndef lint
4972         /*
4973          * Make sure that maximum attribute number can be expressed as an
4974          * 8 bit quantity.
4975          */
4976         ASSERT(NFS4_MAXNUM_ATTRS <= (UINT8_MAX + 1));
4977 #endif
4978 
4979         if (vp == NULL) {
4980                 if (resp)
4981                         *resp = 0;
4982                 return (NFS4ERR_NOFILEHANDLE);
4983         }
4984         if (cs->access == CS_ACCESS_DENIED) {
4985                 if (resp)
4986                         *resp = 0;
4987                 return (NFS4ERR_ACCESS);
4988         }
4989 
4990         sargp->op = cmd;
4991         sargp->cs = cs;
4992         sargp->flag = 0;     /* may be set later */
4993         sargp->vap->va_mask = 0;
4994         sargp->rdattr_error = NFS4_OK;
4995         sargp->rdattr_error_req = FALSE;
4996         /* sargp->sbp is set by the caller */
4997 
4998         xdrmem_create(&xdr, attrs, attrslen, XDR_DECODE);
4999 
5000         na = ntovp->na;
5001         amap = ntovp->amap;
5002 
5003         /*
5004          * The following loop iterates on the nfs4_ntov_map checking
5005          * if the fbit is set in the requested bitmap.
5006          * If set then we process the arguments using the
5007          * rfs4_fattr4 conversion functions to populate the setattr
5008          * vattr and va_mask. Any settable attrs that are not using vattr
5009          * will be set in this loop.
5010          */
5011         for (i = 0; i < nfs4_ntov_map_size; i++) {
5012                 if (!(fattrp->attrmask & nfs4_ntov_map[i].fbit)) {
5013                         continue;
5014                 }
5015                 /*
5016                  * If setattr, must be a writable attr.
5017                  * If verify/nverify, must be a readable attr.
5018                  */
5019                 if ((error = (*nfs4_ntov_map[i].sv_getit)(
5020                     NFS4ATTR_SUPPORTED, sargp, NULL)) != 0) {
5021                         /*
5022                          * Client tries to set/verify an
5023                          * unsupported attribute, tries to set
5024                          * a read only attr or verify a write
5025                          * only one - error!
5026                          */
5027                         break;
5028                 }
5029                 /*
5030                  * Decode the attribute to set/verify
5031                  */
5032                 error = decode_fattr4_attr(cmd, sargp, nfs4_ntov_map[i].nval,
5033                     &xdr, resp ? resp : NULL, na);
5034                 if (error)
5035                         break;
5036                 *amap++ = (uint8_t)nfs4_ntov_map[i].nval;
5037                 na++;
5038                 (ntovp->attrcnt)++;
5039                 if (nfs4_ntov_map[i].vfsstat)
5040                         ntovp->vfsstat = TRUE;
5041         }
5042 
5043         if (error != 0)
5044                 status = (error == ENOTSUP ? NFS4ERR_ATTRNOTSUPP :
5045                     puterrno4(error));
5046         /* xdrmem_destroy(&xdrs); */        /* NO-OP */
5047         return (status);
5048 }
5049 
5050 static nfsstat4
5051 do_rfs4_op_setattr(bitmap4 *resp, fattr4 *fattrp, struct compound_state *cs,
5052     stateid4 *stateid)
5053 {
5054         int error = 0;
5055         struct nfs4_svgetit_arg sarg;
5056         bool_t trunc;
5057 
5058         nfsstat4 status = NFS4_OK;
5059         cred_t *cr = cs->cr;
5060         vnode_t *vp = cs->vp;
5061         struct nfs4_ntov_table ntov;
5062         struct statvfs64 sb;
5063         struct vattr bva;
5064         struct flock64 bf;
5065         int in_crit = 0;
5066         uint_t saved_mask = 0;
5067         caller_context_t ct;
5068 
5069         *resp = 0;
5070         sarg.sbp = &sb;
5071         sarg.is_referral = B_FALSE;
5072         nfs4_ntov_table_init(&ntov);
5073         status = do_rfs4_set_attrs(resp, fattrp, cs, &sarg, &ntov,
5074             NFS4ATTR_SETIT);
5075         if (status != NFS4_OK) {
5076                 /*
5077                  * failed set attrs
5078                  */
5079                 goto done;
5080         }
5081         if ((sarg.vap->va_mask == 0) &&
5082             (! (fattrp->attrmask & FATTR4_ACL_MASK))) {
5083                 /*
5084                  * no further work to be done
5085                  */
5086                 goto done;
5087         }
5088 
5089         /*
5090          * If we got a request to set the ACL and the MODE, only
5091          * allow changing VSUID, VSGID, and VSVTX.  Attempting
5092          * to change any other bits, along with setting an ACL,
5093          * gives NFS4ERR_INVAL.
5094          */
5095         if ((fattrp->attrmask & FATTR4_ACL_MASK) &&
5096             (fattrp->attrmask & FATTR4_MODE_MASK)) {
5097                 vattr_t va;
5098 
5099                 va.va_mask = AT_MODE;
5100                 error = VOP_GETATTR(vp, &va, 0, cs->cr, NULL);
5101                 if (error) {
5102                         status = puterrno4(error);
5103                         goto done;
5104                 }
5105                 if ((sarg.vap->va_mode ^ va.va_mode) &
5106                     ~(VSUID | VSGID | VSVTX)) {
5107                         status = NFS4ERR_INVAL;
5108                         goto done;
5109                 }
5110         }
5111 
5112         /* Check stateid only if size has been set */
5113         if (sarg.vap->va_mask & AT_SIZE) {
5114                 trunc = (sarg.vap->va_size == 0);
5115                 status = rfs4_check_stateid(FWRITE, cs->vp, stateid,
5116                     trunc, &cs->deleg, sarg.vap->va_mask & AT_SIZE, &ct);
5117                 if (status != NFS4_OK)
5118                         goto done;
5119         } else {
5120                 ct.cc_sysid = 0;
5121                 ct.cc_pid = 0;
5122                 ct.cc_caller_id = nfs4_srv_caller_id;
5123                 ct.cc_flags = CC_DONTBLOCK;
5124         }
5125 
5126         /* XXX start of possible race with delegations */
5127 
5128         /*
5129          * We need to specially handle size changes because it is
5130          * possible for the client to create a file with read-only
5131          * modes, but with the file opened for writing. If the client
5132          * then tries to set the file size, e.g. ftruncate(3C),
5133          * fcntl(F_FREESP), the normal access checking done in
5134          * VOP_SETATTR would prevent the client from doing it even though
5135          * it should be allowed to do so.  To get around this, we do the
5136          * access checking for ourselves and use VOP_SPACE which doesn't
5137          * do the access checking.
5138          * Also the client should not be allowed to change the file
5139          * size if there is a conflicting non-blocking mandatory lock in
5140          * the region of the change.
5141          */
5142         if (vp->v_type == VREG && (sarg.vap->va_mask & AT_SIZE)) {
5143                 u_offset_t offset;
5144                 ssize_t length;
5145 
5146                 /*
5147                  * ufs_setattr clears AT_SIZE from vap->va_mask, but
5148                  * before returning, sarg.vap->va_mask is used to
5149                  * generate the setattr reply bitmap.  We also clear
5150                  * AT_SIZE below before calling VOP_SPACE.  For both
5151                  * of these cases, the va_mask needs to be saved here
5152                  * and restored after calling VOP_SETATTR.
5153                  */
5154                 saved_mask = sarg.vap->va_mask;
5155 
5156                 /*
5157                  * Check any possible conflict due to NBMAND locks.
5158                  * Get into critical region before VOP_GETATTR, so the
5159                  * size attribute is valid when checking conflicts.
5160                  */
5161                 if (nbl_need_check(vp)) {
5162                         nbl_start_crit(vp, RW_READER);
5163                         in_crit = 1;
5164                 }
5165 
5166                 bva.va_mask = AT_UID|AT_SIZE;
5167                 if (error = VOP_GETATTR(vp, &bva, 0, cr, &ct)) {
5168                         status = puterrno4(error);
5169                         goto done;
5170                 }
5171 
5172                 if (in_crit) {
5173                         if (sarg.vap->va_size < bva.va_size) {
5174                                 offset = sarg.vap->va_size;
5175                                 length = bva.va_size - sarg.vap->va_size;
5176                         } else {
5177                                 offset = bva.va_size;
5178                                 length = sarg.vap->va_size - bva.va_size;
5179                         }
5180                         if (nbl_conflict(vp, NBL_WRITE, offset, length, 0,
5181                             &ct)) {
5182                                 status = NFS4ERR_LOCKED;
5183                                 goto done;
5184                         }
5185                 }
5186 
5187                 if (crgetuid(cr) == bva.va_uid) {
5188                         sarg.vap->va_mask &= ~AT_SIZE;
5189                         bf.l_type = F_WRLCK;
5190                         bf.l_whence = 0;
5191                         bf.l_start = (off64_t)sarg.vap->va_size;
5192                         bf.l_len = 0;
5193                         bf.l_sysid = 0;
5194                         bf.l_pid = 0;
5195                         error = VOP_SPACE(vp, F_FREESP, &bf, FWRITE,
5196                             (offset_t)sarg.vap->va_size, cr, &ct);
5197                 }
5198         }
5199 
5200         if (!error && sarg.vap->va_mask != 0)
5201                 error = VOP_SETATTR(vp, sarg.vap, sarg.flag, cr, &ct);
5202 
5203         /* restore va_mask -- ufs_setattr clears AT_SIZE */
5204         if (saved_mask & AT_SIZE)
5205                 sarg.vap->va_mask |= AT_SIZE;
5206 
5207         /*
5208          * If an ACL was being set, it has been delayed until now,
5209          * in order to set the mode (via the VOP_SETATTR() above) first.
5210          */
5211         if ((! error) && (fattrp->attrmask & FATTR4_ACL_MASK)) {
5212                 int i;
5213 
5214                 for (i = 0; i < NFS4_MAXNUM_ATTRS; i++)
5215                         if (ntov.amap[i] == FATTR4_ACL)
5216                                 break;
5217                 if (i < NFS4_MAXNUM_ATTRS) {
5218                         error = (*nfs4_ntov_map[FATTR4_ACL].sv_getit)(
5219                             NFS4ATTR_SETIT, &sarg, &ntov.na[i]);
5220                         if (error == 0) {
5221                                 *resp |= FATTR4_ACL_MASK;
5222                         } else if (error == ENOTSUP) {
5223                                 (void) rfs4_verify_attr(&sarg, resp, &ntov);
5224                                 status = NFS4ERR_ATTRNOTSUPP;
5225                                 goto done;
5226                         }
5227                 } else {
5228                         NFS4_DEBUG(rfs4_debug,
5229                             (CE_NOTE, "do_rfs4_op_setattr: "
5230                             "unable to find ACL in fattr4"));
5231                         error = EINVAL;
5232                 }
5233         }
5234 
5235         if (error) {
5236                 /* check if a monitor detected a delegation conflict */
5237                 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK))
5238                         status = NFS4ERR_DELAY;
5239                 else
5240                         status = puterrno4(error);
5241 
5242                 /*
5243                  * Set the response bitmap when setattr failed.
5244                  * If VOP_SETATTR partially succeeded, test by doing a
5245                  * VOP_GETATTR on the object and comparing the data
5246                  * to the setattr arguments.
5247                  */
5248                 (void) rfs4_verify_attr(&sarg, resp, &ntov);
5249         } else {
5250                 /*
5251                  * Force modified metadata out to stable storage.
5252                  */
5253                 (void) VOP_FSYNC(vp, FNODSYNC, cr, &ct);
5254                 /*
5255                  * Set response bitmap
5256                  */
5257                 nfs4_vmask_to_nmask_set(sarg.vap->va_mask, resp);
5258         }
5259 
5260 /* Return early and already have a NFSv4 error */
5261 done:
5262         /*
5263          * Except for nfs4_vmask_to_nmask_set(), vattr --> fattr
5264          * conversion sets both readable and writeable NFS4 attrs
5265          * for AT_MTIME and AT_ATIME.  The line below masks out
5266          * unrequested attrs from the setattr result bitmap.  This
5267          * is placed after the done: label to catch the ATTRNOTSUP
5268          * case.
5269          */
5270         *resp &= fattrp->attrmask;
5271 
5272         if (in_crit)
5273                 nbl_end_crit(vp);
5274 
5275         nfs4_ntov_table_free(&ntov, &sarg);
5276 
5277         return (status);
5278 }
5279 
5280 /* ARGSUSED */
5281 static void
5282 rfs4_op_setattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5283     struct compound_state *cs)
5284 {
5285         SETATTR4args *args = &argop->nfs_argop4_u.opsetattr;
5286         SETATTR4res *resp = &resop->nfs_resop4_u.opsetattr;
5287         bslabel_t *clabel;
5288 
5289         DTRACE_NFSV4_2(op__setattr__start, struct compound_state *, cs,
5290             SETATTR4args *, args);
5291 
5292         if (cs->vp == NULL) {
5293                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5294                 goto out;
5295         }
5296 
5297         /*
5298          * If there is an unshared filesystem mounted on this vnode,
5299          * do not allow to setattr on this vnode.
5300          */
5301         if (vn_ismntpt(cs->vp)) {
5302                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
5303                 goto out;
5304         }
5305 
5306         resp->attrsset = 0;
5307 
5308         if (rdonly4(req, cs)) {
5309                 *cs->statusp = resp->status = NFS4ERR_ROFS;
5310                 goto out;
5311         }
5312 
5313         /* check label before setting attributes */
5314         if (is_system_labeled()) {
5315                 ASSERT(req->rq_label != NULL);
5316                 clabel = req->rq_label;
5317                 DTRACE_PROBE2(tx__rfs4__log__info__opsetattr__clabel, char *,
5318                     "got client label from request(1)",
5319                     struct svc_req *, req);
5320                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
5321                         if (!do_rfs_label_check(clabel, cs->vp,
5322                             EQUALITY_CHECK, cs->exi)) {
5323                                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
5324                                 goto out;
5325                         }
5326                 }
5327         }
5328 
5329         *cs->statusp = resp->status =
5330             do_rfs4_op_setattr(&resp->attrsset, &args->obj_attributes, cs,
5331             &args->stateid);
5332 
5333 out:
5334         DTRACE_NFSV4_2(op__setattr__done, struct compound_state *, cs,
5335             SETATTR4res *, resp);
5336 }
5337 
5338 /* ARGSUSED */
5339 static void
5340 rfs4_op_verify(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5341     struct compound_state *cs)
5342 {
5343         /*
5344          * verify and nverify are exactly the same, except that nverify
5345          * succeeds when some argument changed, and verify succeeds when
5346          * when none changed.
5347          */
5348 
5349         VERIFY4args  *args = &argop->nfs_argop4_u.opverify;
5350         VERIFY4res *resp = &resop->nfs_resop4_u.opverify;
5351 
5352         int error;
5353         struct nfs4_svgetit_arg sarg;
5354         struct statvfs64 sb;
5355         struct nfs4_ntov_table ntov;
5356 
5357         DTRACE_NFSV4_2(op__verify__start, struct compound_state *, cs,
5358             VERIFY4args *, args);
5359 
5360         if (cs->vp == NULL) {
5361                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5362                 goto out;
5363         }
5364 
5365         sarg.sbp = &sb;
5366         sarg.is_referral = B_FALSE;
5367         nfs4_ntov_table_init(&ntov);
5368         resp->status = do_rfs4_set_attrs(NULL, &args->obj_attributes, cs,
5369             &sarg, &ntov, NFS4ATTR_VERIT);
5370         if (resp->status != NFS4_OK) {
5371                 /*
5372                  * do_rfs4_set_attrs will try to verify systemwide attrs,
5373                  * so could return -1 for "no match".
5374                  */
5375                 if (resp->status == -1)
5376                         resp->status = NFS4ERR_NOT_SAME;
5377                 goto done;
5378         }
5379         error = rfs4_verify_attr(&sarg, NULL, &ntov);
5380         switch (error) {
5381         case 0:
5382                 resp->status = NFS4_OK;
5383                 break;
5384         case -1:
5385                 resp->status = NFS4ERR_NOT_SAME;
5386                 break;
5387         default:
5388                 resp->status = puterrno4(error);
5389                 break;
5390         }
5391 done:
5392         *cs->statusp = resp->status;
5393         nfs4_ntov_table_free(&ntov, &sarg);
5394 out:
5395         DTRACE_NFSV4_2(op__verify__done, struct compound_state *, cs,
5396             VERIFY4res *, resp);
5397 }
5398 
5399 /* ARGSUSED */
5400 static void
5401 rfs4_op_nverify(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5402     struct compound_state *cs)
5403 {
5404         /*
5405          * verify and nverify are exactly the same, except that nverify
5406          * succeeds when some argument changed, and verify succeeds when
5407          * when none changed.
5408          */
5409 
5410         NVERIFY4args  *args = &argop->nfs_argop4_u.opnverify;
5411         NVERIFY4res *resp = &resop->nfs_resop4_u.opnverify;
5412 
5413         int error;
5414         struct nfs4_svgetit_arg sarg;
5415         struct statvfs64 sb;
5416         struct nfs4_ntov_table ntov;
5417 
5418         DTRACE_NFSV4_2(op__nverify__start, struct compound_state *, cs,
5419             NVERIFY4args *, args);
5420 
5421         if (cs->vp == NULL) {
5422                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5423                 DTRACE_NFSV4_2(op__nverify__done, struct compound_state *, cs,
5424                     NVERIFY4res *, resp);
5425                 return;
5426         }
5427         sarg.sbp = &sb;
5428         sarg.is_referral = B_FALSE;
5429         nfs4_ntov_table_init(&ntov);
5430         resp->status = do_rfs4_set_attrs(NULL, &args->obj_attributes, cs,
5431             &sarg, &ntov, NFS4ATTR_VERIT);
5432         if (resp->status != NFS4_OK) {
5433                 /*
5434                  * do_rfs4_set_attrs will try to verify systemwide attrs,
5435                  * so could return -1 for "no match".
5436                  */
5437                 if (resp->status == -1)
5438                         resp->status = NFS4_OK;
5439                 goto done;
5440         }
5441         error = rfs4_verify_attr(&sarg, NULL, &ntov);
5442         switch (error) {
5443         case 0:
5444                 resp->status = NFS4ERR_SAME;
5445                 break;
5446         case -1:
5447                 resp->status = NFS4_OK;
5448                 break;
5449         default:
5450                 resp->status = puterrno4(error);
5451                 break;
5452         }
5453 done:
5454         *cs->statusp = resp->status;
5455         nfs4_ntov_table_free(&ntov, &sarg);
5456 
5457         DTRACE_NFSV4_2(op__nverify__done, struct compound_state *, cs,
5458             NVERIFY4res *, resp);
5459 }
5460 
5461 /*
5462  * XXX - This should live in an NFS header file.
5463  */
5464 #define MAX_IOVECS      12
5465 
5466 /* ARGSUSED */
5467 static void
5468 rfs4_op_write(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5469     struct compound_state *cs)
5470 {
5471         WRITE4args *args = &argop->nfs_argop4_u.opwrite;
5472         WRITE4res *resp = &resop->nfs_resop4_u.opwrite;
5473         int error;
5474         vnode_t *vp;
5475         struct vattr bva;
5476         u_offset_t rlimit;
5477         struct uio uio;
5478         struct iovec iov[MAX_IOVECS];
5479         struct iovec *iovp;
5480         int iovcnt;
5481         int ioflag;
5482         cred_t *savecred, *cr;
5483         bool_t *deleg = &cs->deleg;
5484         nfsstat4 stat;
5485         int in_crit = 0;
5486         caller_context_t ct;
5487 
5488         DTRACE_NFSV4_2(op__write__start, struct compound_state *, cs,
5489             WRITE4args *, args);
5490 
5491         vp = cs->vp;
5492         if (vp == NULL) {
5493                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5494                 goto out;
5495         }
5496         if (cs->access == CS_ACCESS_DENIED) {
5497                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
5498                 goto out;
5499         }
5500 
5501         cr = cs->cr;
5502 
5503         if ((stat = rfs4_check_stateid(FWRITE, vp, &args->stateid, FALSE,
5504             deleg, TRUE, &ct)) != NFS4_OK) {
5505                 *cs->statusp = resp->status = stat;
5506                 goto out;
5507         }
5508 
5509         /*
5510          * We have to enter the critical region before calling VOP_RWLOCK
5511          * to avoid a deadlock with ufs.
5512          */
5513         if (nbl_need_check(vp)) {
5514                 nbl_start_crit(vp, RW_READER);
5515                 in_crit = 1;
5516                 if (nbl_conflict(vp, NBL_WRITE,
5517                     args->offset, args->data_len, 0, &ct)) {
5518                         *cs->statusp = resp->status = NFS4ERR_LOCKED;
5519                         goto out;
5520                 }
5521         }
5522 
5523         bva.va_mask = AT_MODE | AT_UID;
5524         error = VOP_GETATTR(vp, &bva, 0, cr, &ct);
5525 
5526         /*
5527          * If we can't get the attributes, then we can't do the
5528          * right access checking.  So, we'll fail the request.
5529          */
5530         if (error) {
5531                 *cs->statusp = resp->status = puterrno4(error);
5532                 goto out;
5533         }
5534 
5535         if (rdonly4(req, cs)) {
5536                 *cs->statusp = resp->status = NFS4ERR_ROFS;
5537                 goto out;
5538         }
5539 
5540         if (vp->v_type != VREG) {
5541                 *cs->statusp = resp->status =
5542                     ((vp->v_type == VDIR) ? NFS4ERR_ISDIR : NFS4ERR_INVAL);
5543                 goto out;
5544         }
5545 
5546         if (crgetuid(cr) != bva.va_uid &&
5547             (error = VOP_ACCESS(vp, VWRITE, 0, cr, &ct))) {
5548                 *cs->statusp = resp->status = puterrno4(error);
5549                 goto out;
5550         }
5551 
5552         if (MANDLOCK(vp, bva.va_mode)) {
5553                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
5554                 goto out;
5555         }
5556 
5557         if (args->data_len == 0) {
5558                 *cs->statusp = resp->status = NFS4_OK;
5559                 resp->count = 0;
5560                 resp->committed = args->stable;
5561                 resp->writeverf = Write4verf;
5562                 goto out;
5563         }
5564 
5565         if (args->mblk != NULL) {
5566                 mblk_t *m;
5567                 uint_t bytes, round_len;
5568 
5569                 iovcnt = 0;
5570                 bytes = 0;
5571                 round_len = roundup(args->data_len, BYTES_PER_XDR_UNIT);
5572                 for (m = args->mblk;
5573                     m != NULL && bytes < round_len;
5574                     m = m->b_cont) {
5575                         iovcnt++;
5576                         bytes += MBLKL(m);
5577                 }
5578 #ifdef DEBUG
5579                 /* should have ended on an mblk boundary */
5580                 if (bytes != round_len) {
5581                         printf("bytes=0x%x, round_len=0x%x, req len=0x%x\n",
5582                             bytes, round_len, args->data_len);
5583                         printf("args=%p, args->mblk=%p, m=%p", (void *)args,
5584                             (void *)args->mblk, (void *)m);
5585                         ASSERT(bytes == round_len);
5586                 }
5587 #endif
5588                 if (iovcnt <= MAX_IOVECS) {
5589                         iovp = iov;
5590                 } else {
5591                         iovp = kmem_alloc(sizeof (*iovp) * iovcnt, KM_SLEEP);
5592                 }
5593                 mblk_to_iov(args->mblk, iovcnt, iovp);
5594         } else if (args->rlist != NULL) {
5595                 iovcnt = 1;
5596                 iovp = iov;
5597                 iovp->iov_base = (char *)((args->rlist)->u.c_daddr3);
5598                 iovp->iov_len = args->data_len;
5599         } else {
5600                 iovcnt = 1;
5601                 iovp = iov;
5602                 iovp->iov_base = args->data_val;
5603                 iovp->iov_len = args->data_len;
5604         }
5605 
5606         uio.uio_iov = iovp;
5607         uio.uio_iovcnt = iovcnt;
5608 
5609         uio.uio_segflg = UIO_SYSSPACE;
5610         uio.uio_extflg = UIO_COPY_DEFAULT;
5611         uio.uio_loffset = args->offset;
5612         uio.uio_resid = args->data_len;
5613         uio.uio_llimit = curproc->p_fsz_ctl;
5614         rlimit = uio.uio_llimit - args->offset;
5615         if (rlimit < (u_offset_t)uio.uio_resid)
5616                 uio.uio_resid = (int)rlimit;
5617 
5618         if (args->stable == UNSTABLE4)
5619                 ioflag = 0;
5620         else if (args->stable == FILE_SYNC4)
5621                 ioflag = FSYNC;
5622         else if (args->stable == DATA_SYNC4)
5623                 ioflag = FDSYNC;
5624         else {
5625                 if (iovp != iov)
5626                         kmem_free(iovp, sizeof (*iovp) * iovcnt);
5627                 *cs->statusp = resp->status = NFS4ERR_INVAL;
5628                 goto out;
5629         }
5630 
5631         /*
5632          * We're changing creds because VM may fault and we need
5633          * the cred of the current thread to be used if quota
5634          * checking is enabled.
5635          */
5636         savecred = curthread->t_cred;
5637         curthread->t_cred = cr;
5638         error = do_io(FWRITE, vp, &uio, ioflag, cr, &ct);
5639         curthread->t_cred = savecred;
5640 
5641         if (iovp != iov)
5642                 kmem_free(iovp, sizeof (*iovp) * iovcnt);
5643 
5644         if (error) {
5645                 *cs->statusp = resp->status = puterrno4(error);
5646                 goto out;
5647         }
5648 
5649         *cs->statusp = resp->status = NFS4_OK;
5650         resp->count = args->data_len - uio.uio_resid;
5651 
5652         if (ioflag == 0)
5653                 resp->committed = UNSTABLE4;
5654         else
5655                 resp->committed = FILE_SYNC4;
5656 
5657         resp->writeverf = Write4verf;
5658 
5659 out:
5660         if (in_crit)
5661                 nbl_end_crit(vp);
5662 
5663         DTRACE_NFSV4_2(op__write__done, struct compound_state *, cs,
5664             WRITE4res *, resp);
5665 }
5666 
5667 
5668 /* XXX put in a header file */
5669 extern int      sec_svc_getcred(struct svc_req *, cred_t *,  caddr_t *, int *);
5670 
5671 void
5672 rfs4_compound(COMPOUND4args *args, COMPOUND4res *resp, struct exportinfo *exi,
5673     struct svc_req *req, cred_t *cr, int *rv)
5674 {
5675         uint_t i;
5676         struct compound_state cs;
5677 
5678         if (rv != NULL)
5679                 *rv = 0;
5680         rfs4_init_compound_state(&cs);
5681         /*
5682          * Form a reply tag by copying over the reqeuest tag.
5683          */
5684         resp->tag.utf8string_val =
5685             kmem_alloc(args->tag.utf8string_len, KM_SLEEP);
5686         resp->tag.utf8string_len = args->tag.utf8string_len;
5687         bcopy(args->tag.utf8string_val, resp->tag.utf8string_val,
5688             resp->tag.utf8string_len);
5689 
5690         cs.statusp = &resp->status;
5691         cs.req = req;
5692         resp->array = NULL;
5693         resp->array_len = 0;
5694 
5695         /*
5696          * XXX for now, minorversion should be zero
5697          */
5698         if (args->minorversion != NFS4_MINORVERSION) {
5699                 DTRACE_NFSV4_2(compound__start, struct compound_state *,
5700                     &cs, COMPOUND4args *, args);
5701                 resp->status = NFS4ERR_MINOR_VERS_MISMATCH;
5702                 DTRACE_NFSV4_2(compound__done, struct compound_state *,
5703                     &cs, COMPOUND4res *, resp);
5704                 return;
5705         }
5706 
5707         if (args->array_len == 0) {
5708                 resp->status = NFS4_OK;
5709                 return;
5710         }
5711 
5712         ASSERT(exi == NULL);
5713         ASSERT(cr == NULL);
5714 
5715         cr = crget();
5716         ASSERT(cr != NULL);
5717 
5718         if (sec_svc_getcred(req, cr, &cs.principal, &cs.nfsflavor) == 0) {
5719                 DTRACE_NFSV4_2(compound__start, struct compound_state *,
5720                     &cs, COMPOUND4args *, args);
5721                 crfree(cr);
5722                 DTRACE_NFSV4_2(compound__done, struct compound_state *,
5723                     &cs, COMPOUND4res *, resp);
5724                 svcerr_badcred(req->rq_xprt);
5725                 if (rv != NULL)
5726                         *rv = 1;
5727                 return;
5728         }
5729         resp->array_len = args->array_len;
5730         resp->array = kmem_zalloc(args->array_len * sizeof (nfs_resop4),
5731             KM_SLEEP);
5732 
5733         cs.basecr = cr;
5734 
5735         DTRACE_NFSV4_2(compound__start, struct compound_state *, &cs,
5736             COMPOUND4args *, args);
5737 
5738         /*
5739          * For now, NFS4 compound processing must be protected by
5740          * exported_lock because it can access more than one exportinfo
5741          * per compound and share/unshare can now change multiple
5742          * exinfo structs.  The NFS2/3 code only refs 1 exportinfo
5743          * per proc (excluding public exinfo), and exi_count design
5744          * is sufficient to protect concurrent execution of NFS2/3
5745          * ops along with unexport.  This lock will be removed as
5746          * part of the NFSv4 phase 2 namespace redesign work.
5747          */
5748         rw_enter(&exported_lock, RW_READER);
5749 
5750         /*
5751          * If this is the first compound we've seen, we need to start all
5752          * new instances' grace periods.
5753          */
5754         if (rfs4_seen_first_compound == 0) {
5755                 rfs4_grace_start_new();
5756                 /*
5757                  * This must be set after rfs4_grace_start_new(), otherwise
5758                  * another thread could proceed past here before the former
5759                  * is finished.
5760                  */
5761                 rfs4_seen_first_compound = 1;
5762         }
5763 
5764         for (i = 0; i < args->array_len && cs.cont; i++) {
5765                 nfs_argop4 *argop;
5766                 nfs_resop4 *resop;
5767                 uint_t op;
5768 
5769                 argop = &args->array[i];
5770                 resop = &resp->array[i];
5771                 resop->resop = argop->argop;
5772                 op = (uint_t)resop->resop;
5773 
5774                 if (op < rfsv4disp_cnt) {
5775                         /*
5776                          * Count the individual ops here; NULL and COMPOUND
5777                          * are counted in common_dispatch()
5778                          */
5779                         rfsproccnt_v4_ptr[op].value.ui64++;
5780 
5781                         NFS4_DEBUG(rfs4_debug > 1,
5782                             (CE_NOTE, "Executing %s", rfs4_op_string[op]));
5783                         (*rfsv4disptab[op].dis_proc)(argop, resop, req, &cs);
5784                         NFS4_DEBUG(rfs4_debug > 1, (CE_NOTE, "%s returned %d",
5785                             rfs4_op_string[op], *cs.statusp));
5786                         if (*cs.statusp != NFS4_OK)
5787                                 cs.cont = FALSE;
5788                 } else {
5789                         /*
5790                          * This is effectively dead code since XDR code
5791                          * will have already returned BADXDR if op doesn't
5792                          * decode to legal value.  This only done for a
5793                          * day when XDR code doesn't verify v4 opcodes.
5794                          */
5795                         op = OP_ILLEGAL;
5796                         rfsproccnt_v4_ptr[OP_ILLEGAL_IDX].value.ui64++;
5797 
5798                         rfs4_op_illegal(argop, resop, req, &cs);
5799                         cs.cont = FALSE;
5800                 }
5801 
5802                 /*
5803                  * If not at last op, and if we are to stop, then
5804                  * compact the results array.
5805                  */
5806                 if ((i + 1) < args->array_len && !cs.cont) {
5807                         nfs_resop4 *new_res = kmem_alloc(
5808                             (i+1) * sizeof (nfs_resop4), KM_SLEEP);
5809                         bcopy(resp->array,
5810                             new_res, (i+1) * sizeof (nfs_resop4));
5811                         kmem_free(resp->array,
5812                             args->array_len * sizeof (nfs_resop4));
5813 
5814                         resp->array_len =  i + 1;
5815                         resp->array = new_res;
5816                 }
5817         }
5818 
5819         rw_exit(&exported_lock);
5820 
5821         DTRACE_NFSV4_2(compound__done, struct compound_state *, &cs,
5822             COMPOUND4res *, resp);
5823 
5824         if (cs.vp)
5825                 VN_RELE(cs.vp);
5826         if (cs.saved_vp)
5827                 VN_RELE(cs.saved_vp);
5828         if (cs.saved_fh.nfs_fh4_val)
5829                 kmem_free(cs.saved_fh.nfs_fh4_val, NFS4_FHSIZE);
5830 
5831         if (cs.basecr)
5832                 crfree(cs.basecr);
5833         if (cs.cr)
5834                 crfree(cs.cr);
5835         /*
5836          * done with this compound request, free the label
5837          */
5838 
5839         if (req->rq_label != NULL) {
5840                 kmem_free(req->rq_label, sizeof (bslabel_t));
5841                 req->rq_label = NULL;
5842         }
5843 }
5844 
5845 /*
5846  * XXX because of what appears to be duplicate calls to rfs4_compound_free
5847  * XXX zero out the tag and array values. Need to investigate why the
5848  * XXX calls occur, but at least prevent the panic for now.
5849  */
5850 void
5851 rfs4_compound_free(COMPOUND4res *resp)
5852 {
5853         uint_t i;
5854 
5855         if (resp->tag.utf8string_val) {
5856                 UTF8STRING_FREE(resp->tag)
5857         }
5858 
5859         for (i = 0; i < resp->array_len; i++) {
5860                 nfs_resop4 *resop;
5861                 uint_t op;
5862 
5863                 resop = &resp->array[i];
5864                 op = (uint_t)resop->resop;
5865                 if (op < rfsv4disp_cnt) {
5866                         (*rfsv4disptab[op].dis_resfree)(resop);
5867                 }
5868         }
5869         if (resp->array != NULL) {
5870                 kmem_free(resp->array, resp->array_len * sizeof (nfs_resop4));
5871         }
5872 }
5873 
5874 /*
5875  * Process the value of the compound request rpc flags, as a bit-AND
5876  * of the individual per-op flags (idempotent, allowork, publicfh_ok)
5877  */
5878 void
5879 rfs4_compound_flagproc(COMPOUND4args *args, int *flagp)
5880 {
5881         int i;
5882         int flag = RPC_ALL;
5883 
5884         for (i = 0; flag && i < args->array_len; i++) {
5885                 uint_t op;
5886 
5887                 op = (uint_t)args->array[i].argop;
5888 
5889                 if (op < rfsv4disp_cnt)
5890                         flag &= rfsv4disptab[op].dis_flags;
5891                 else
5892                         flag = 0;
5893         }
5894         *flagp = flag;
5895 }
5896 
5897 nfsstat4
5898 rfs4_client_sysid(rfs4_client_t *cp, sysid_t *sp)
5899 {
5900         nfsstat4 e;
5901 
5902         rfs4_dbe_lock(cp->rc_dbe);
5903 
5904         if (cp->rc_sysidt != LM_NOSYSID) {
5905                 *sp = cp->rc_sysidt;
5906                 e = NFS4_OK;
5907 
5908         } else if ((cp->rc_sysidt = lm_alloc_sysidt()) != LM_NOSYSID) {
5909                 *sp = cp->rc_sysidt;
5910                 e = NFS4_OK;
5911 
5912                 NFS4_DEBUG(rfs4_debug, (CE_NOTE,
5913                     "rfs4_client_sysid: allocated 0x%x\n", *sp));
5914         } else
5915                 e = NFS4ERR_DELAY;
5916 
5917         rfs4_dbe_unlock(cp->rc_dbe);
5918         return (e);
5919 }
5920 
5921 #if defined(DEBUG) && ! defined(lint)
5922 static void lock_print(char *str, int operation, struct flock64 *flk)
5923 {
5924         char *op, *type;
5925 
5926         switch (operation) {
5927         case F_GETLK: op = "F_GETLK";
5928                 break;
5929         case F_SETLK: op = "F_SETLK";
5930                 break;
5931         case F_SETLK_NBMAND: op = "F_SETLK_NBMAND";
5932                 break;
5933         default: op = "F_UNKNOWN";
5934                 break;
5935         }
5936         switch (flk->l_type) {
5937         case F_UNLCK: type = "F_UNLCK";
5938                 break;
5939         case F_RDLCK: type = "F_RDLCK";
5940                 break;
5941         case F_WRLCK: type = "F_WRLCK";
5942                 break;
5943         default: type = "F_UNKNOWN";
5944                 break;
5945         }
5946 
5947         ASSERT(flk->l_whence == 0);
5948         cmn_err(CE_NOTE, "%s:  %s, type = %s, off = %llx len = %llx pid = %d",
5949             str, op, type, (longlong_t)flk->l_start,
5950             flk->l_len ? (longlong_t)flk->l_len : ~0LL, flk->l_pid);
5951 }
5952 
5953 #define LOCK_PRINT(d, s, t, f) if (d) lock_print(s, t, f)
5954 #else
5955 #define LOCK_PRINT(d, s, t, f)
5956 #endif
5957 
5958 /*ARGSUSED*/
5959 static bool_t
5960 creds_ok(cred_set_t cr_set, struct svc_req *req, struct compound_state *cs)
5961 {
5962         return (TRUE);
5963 }
5964 
5965 /*
5966  * Look up the pathname using the vp in cs as the directory vnode.
5967  * cs->vp will be the vnode for the file on success
5968  */
5969 
5970 static nfsstat4
5971 rfs4_lookup(component4 *component, struct svc_req *req,
5972     struct compound_state *cs)
5973 {
5974         char *nm;
5975         uint32_t len;
5976         nfsstat4 status;
5977         struct sockaddr *ca;
5978         char *name;
5979 
5980         if (cs->vp == NULL) {
5981                 return (NFS4ERR_NOFILEHANDLE);
5982         }
5983         if (cs->vp->v_type != VDIR) {
5984                 return (NFS4ERR_NOTDIR);
5985         }
5986 
5987         status = utf8_dir_verify(component);
5988         if (status != NFS4_OK)
5989                 return (status);
5990 
5991         nm = utf8_to_fn(component, &len, NULL);
5992         if (nm == NULL) {
5993                 return (NFS4ERR_INVAL);
5994         }
5995 
5996         if (len > MAXNAMELEN) {
5997                 kmem_free(nm, len);
5998                 return (NFS4ERR_NAMETOOLONG);
5999         }
6000 
6001         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
6002         name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
6003             MAXPATHLEN + 1);
6004 
6005         if (name == NULL) {
6006                 kmem_free(nm, len);
6007                 return (NFS4ERR_INVAL);
6008         }
6009 
6010         status = do_rfs4_op_lookup(name, req, cs);
6011 
6012         if (name != nm)
6013                 kmem_free(name, MAXPATHLEN + 1);
6014 
6015         kmem_free(nm, len);
6016 
6017         return (status);
6018 }
6019 
6020 static nfsstat4
6021 rfs4_lookupfile(component4 *component, struct svc_req *req,
6022     struct compound_state *cs, uint32_t access, change_info4 *cinfo)
6023 {
6024         nfsstat4 status;
6025         vnode_t *dvp = cs->vp;
6026         vattr_t bva, ava, fva;
6027         int error;
6028 
6029         /* Get "before" change value */
6030         bva.va_mask = AT_CTIME|AT_SEQ;
6031         error = VOP_GETATTR(dvp, &bva, 0, cs->cr, NULL);
6032         if (error)
6033                 return (puterrno4(error));
6034 
6035         /* rfs4_lookup may VN_RELE directory */
6036         VN_HOLD(dvp);
6037 
6038         status = rfs4_lookup(component, req, cs);
6039         if (status != NFS4_OK) {
6040                 VN_RELE(dvp);
6041                 return (status);
6042         }
6043 
6044         /*
6045          * Get "after" change value, if it fails, simply return the
6046          * before value.
6047          */
6048         ava.va_mask = AT_CTIME|AT_SEQ;
6049         if (VOP_GETATTR(dvp, &ava, 0, cs->cr, NULL)) {
6050                 ava.va_ctime = bva.va_ctime;
6051                 ava.va_seq = 0;
6052         }
6053         VN_RELE(dvp);
6054 
6055         /*
6056          * Validate the file is a file
6057          */
6058         fva.va_mask = AT_TYPE|AT_MODE;
6059         error = VOP_GETATTR(cs->vp, &fva, 0, cs->cr, NULL);
6060         if (error)
6061                 return (puterrno4(error));
6062 
6063         if (fva.va_type != VREG) {
6064                 if (fva.va_type == VDIR)
6065                         return (NFS4ERR_ISDIR);
6066                 if (fva.va_type == VLNK)
6067                         return (NFS4ERR_SYMLINK);
6068                 return (NFS4ERR_INVAL);
6069         }
6070 
6071         NFS4_SET_FATTR4_CHANGE(cinfo->before, bva.va_ctime);
6072         NFS4_SET_FATTR4_CHANGE(cinfo->after, ava.va_ctime);
6073 
6074         /*
6075          * It is undefined if VOP_LOOKUP will change va_seq, so
6076          * cinfo.atomic = TRUE only if we have
6077          * non-zero va_seq's, and they have not changed.
6078          */
6079         if (bva.va_seq && ava.va_seq && ava.va_seq == bva.va_seq)
6080                 cinfo->atomic = TRUE;
6081         else
6082                 cinfo->atomic = FALSE;
6083 
6084         /* Check for mandatory locking */
6085         cs->mandlock = MANDLOCK(cs->vp, fva.va_mode);
6086         return (check_open_access(access, cs, req));
6087 }
6088 
6089 static nfsstat4
6090 create_vnode(vnode_t *dvp, char *nm,  vattr_t *vap, createmode4 mode,
6091     cred_t *cr, vnode_t **vpp, bool_t *created)
6092 {
6093         int error;
6094         nfsstat4 status = NFS4_OK;
6095         vattr_t va;
6096 
6097 tryagain:
6098 
6099         /*
6100          * The file open mode used is VWRITE.  If the client needs
6101          * some other semantic, then it should do the access checking
6102          * itself.  It would have been nice to have the file open mode
6103          * passed as part of the arguments.
6104          */
6105 
6106         *created = TRUE;
6107         error = VOP_CREATE(dvp, nm, vap, EXCL, VWRITE, vpp, cr, 0, NULL, NULL);
6108 
6109         if (error) {
6110                 *created = FALSE;
6111 
6112                 /*
6113                  * If we got something other than file already exists
6114                  * then just return this error.  Otherwise, we got
6115                  * EEXIST.  If we were doing a GUARDED create, then
6116                  * just return this error.  Otherwise, we need to
6117                  * make sure that this wasn't a duplicate of an
6118                  * exclusive create request.
6119                  *
6120                  * The assumption is made that a non-exclusive create
6121                  * request will never return EEXIST.
6122                  */
6123 
6124                 if (error != EEXIST || mode == GUARDED4) {
6125                         status = puterrno4(error);
6126                         return (status);
6127                 }
6128                 error = VOP_LOOKUP(dvp, nm, vpp, NULL, 0, NULL, cr,
6129                     NULL, NULL, NULL);
6130 
6131                 if (error) {
6132                         /*
6133                          * We couldn't find the file that we thought that
6134                          * we just created.  So, we'll just try creating
6135                          * it again.
6136                          */
6137                         if (error == ENOENT)
6138                                 goto tryagain;
6139 
6140                         status = puterrno4(error);
6141                         return (status);
6142                 }
6143 
6144                 if (mode == UNCHECKED4) {
6145                         /* existing object must be regular file */
6146                         if ((*vpp)->v_type != VREG) {
6147                                 if ((*vpp)->v_type == VDIR)
6148                                         status = NFS4ERR_ISDIR;
6149                                 else if ((*vpp)->v_type == VLNK)
6150                                         status = NFS4ERR_SYMLINK;
6151                                 else
6152                                         status = NFS4ERR_INVAL;
6153                                 VN_RELE(*vpp);
6154                                 return (status);
6155                         }
6156 
6157                         return (NFS4_OK);
6158                 }
6159 
6160                 /* Check for duplicate request */
6161                 va.va_mask = AT_MTIME;
6162                 error = VOP_GETATTR(*vpp, &va, 0, cr, NULL);
6163                 if (!error) {
6164                         /* We found the file */
6165                         const timestruc_t *mtime = &vap->va_mtime;
6166 
6167                         if (va.va_mtime.tv_sec != mtime->tv_sec ||
6168                             va.va_mtime.tv_nsec != mtime->tv_nsec) {
6169                                 /* but its not our creation */
6170                                 VN_RELE(*vpp);
6171                                 return (NFS4ERR_EXIST);
6172                         }
6173                         *created = TRUE; /* retrans of create == created */
6174                         return (NFS4_OK);
6175                 }
6176                 VN_RELE(*vpp);
6177                 return (NFS4ERR_EXIST);
6178         }
6179 
6180         return (NFS4_OK);
6181 }
6182 
6183 static nfsstat4
6184 check_open_access(uint32_t access, struct compound_state *cs,
6185     struct svc_req *req)
6186 {
6187         int error;
6188         vnode_t *vp;
6189         bool_t readonly;
6190         cred_t *cr = cs->cr;
6191 
6192         /* For now we don't allow mandatory locking as per V2/V3 */
6193         if (cs->access == CS_ACCESS_DENIED || cs->mandlock) {
6194                 return (NFS4ERR_ACCESS);
6195         }
6196 
6197         vp = cs->vp;
6198         ASSERT(cr != NULL && vp->v_type == VREG);
6199 
6200         /*
6201          * If the file system is exported read only and we are trying
6202          * to open for write, then return NFS4ERR_ROFS
6203          */
6204 
6205         readonly = rdonly4(req, cs);
6206 
6207         if ((access & OPEN4_SHARE_ACCESS_WRITE) && readonly)
6208                 return (NFS4ERR_ROFS);
6209 
6210         if (access & OPEN4_SHARE_ACCESS_READ) {
6211                 if ((VOP_ACCESS(vp, VREAD, 0, cr, NULL) != 0) &&
6212                     (VOP_ACCESS(vp, VEXEC, 0, cr, NULL) != 0)) {
6213                         return (NFS4ERR_ACCESS);
6214                 }
6215         }
6216 
6217         if (access & OPEN4_SHARE_ACCESS_WRITE) {
6218                 error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
6219                 if (error)
6220                         return (NFS4ERR_ACCESS);
6221         }
6222 
6223         return (NFS4_OK);
6224 }
6225 
6226 static nfsstat4
6227 rfs4_createfile(OPEN4args *args, struct svc_req *req, struct compound_state *cs,
6228     change_info4 *cinfo, bitmap4 *attrset, clientid4 clientid)
6229 {
6230         struct nfs4_svgetit_arg sarg;
6231         struct nfs4_ntov_table ntov;
6232 
6233         bool_t ntov_table_init = FALSE;
6234         struct statvfs64 sb;
6235         nfsstat4 status;
6236         vnode_t *vp;
6237         vattr_t bva, ava, iva, cva, *vap;
6238         vnode_t *dvp;
6239         timespec32_t *mtime;
6240         char *nm = NULL;
6241         uint_t buflen;
6242         bool_t created;
6243         bool_t setsize = FALSE;
6244         len_t reqsize;
6245         int error;
6246         bool_t trunc;
6247         caller_context_t ct;
6248         component4 *component;
6249         bslabel_t *clabel;
6250         struct sockaddr *ca;
6251         char *name = NULL;
6252 
6253         sarg.sbp = &sb;
6254         sarg.is_referral = B_FALSE;
6255 
6256         dvp = cs->vp;
6257 
6258         /* Check if the file system is read only */
6259         if (rdonly4(req, cs))
6260                 return (NFS4ERR_ROFS);
6261 
6262         /* check the label of including directory */
6263         if (is_system_labeled()) {
6264                 ASSERT(req->rq_label != NULL);
6265                 clabel = req->rq_label;
6266                 DTRACE_PROBE2(tx__rfs4__log__info__opremove__clabel, char *,
6267                     "got client label from request(1)",
6268                     struct svc_req *, req);
6269                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
6270                         if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
6271                             cs->exi)) {
6272                                 return (NFS4ERR_ACCESS);
6273                         }
6274                 }
6275         }
6276 
6277         /*
6278          * Get the last component of path name in nm. cs will reference
6279          * the including directory on success.
6280          */
6281         component = &args->open_claim4_u.file;
6282         status = utf8_dir_verify(component);
6283         if (status != NFS4_OK)
6284                 return (status);
6285 
6286         nm = utf8_to_fn(component, &buflen, NULL);
6287 
6288         if (nm == NULL)
6289                 return (NFS4ERR_RESOURCE);
6290 
6291         if (buflen > MAXNAMELEN) {
6292                 kmem_free(nm, buflen);
6293                 return (NFS4ERR_NAMETOOLONG);
6294         }
6295 
6296         bva.va_mask = AT_TYPE|AT_CTIME|AT_SEQ;
6297         error = VOP_GETATTR(dvp, &bva, 0, cs->cr, NULL);
6298         if (error) {
6299                 kmem_free(nm, buflen);
6300                 return (puterrno4(error));
6301         }
6302 
6303         if (bva.va_type != VDIR) {
6304                 kmem_free(nm, buflen);
6305                 return (NFS4ERR_NOTDIR);
6306         }
6307 
6308         NFS4_SET_FATTR4_CHANGE(cinfo->before, bva.va_ctime)
6309 
6310         switch (args->mode) {
6311         case GUARDED4:
6312                 /*FALLTHROUGH*/
6313         case UNCHECKED4:
6314                 nfs4_ntov_table_init(&ntov);
6315                 ntov_table_init = TRUE;
6316 
6317                 *attrset = 0;
6318                 status = do_rfs4_set_attrs(attrset,
6319                     &args->createhow4_u.createattrs,
6320                     cs, &sarg, &ntov, NFS4ATTR_SETIT);
6321 
6322                 if (status == NFS4_OK && (sarg.vap->va_mask & AT_TYPE) &&
6323                     sarg.vap->va_type != VREG) {
6324                         if (sarg.vap->va_type == VDIR)
6325                                 status = NFS4ERR_ISDIR;
6326                         else if (sarg.vap->va_type == VLNK)
6327                                 status = NFS4ERR_SYMLINK;
6328                         else
6329                                 status = NFS4ERR_INVAL;
6330                 }
6331 
6332                 if (status != NFS4_OK) {
6333                         kmem_free(nm, buflen);
6334                         nfs4_ntov_table_free(&ntov, &sarg);
6335                         *attrset = 0;
6336                         return (status);
6337                 }
6338 
6339                 vap = sarg.vap;
6340                 vap->va_type = VREG;
6341                 vap->va_mask |= AT_TYPE;
6342 
6343                 if ((vap->va_mask & AT_MODE) == 0) {
6344                         vap->va_mask |= AT_MODE;
6345                         vap->va_mode = (mode_t)0600;
6346                 }
6347 
6348                 if (vap->va_mask & AT_SIZE) {
6349 
6350                         /* Disallow create with a non-zero size */
6351 
6352                         if ((reqsize = sarg.vap->va_size) != 0) {
6353                                 kmem_free(nm, buflen);
6354                                 nfs4_ntov_table_free(&ntov, &sarg);
6355                                 *attrset = 0;
6356                                 return (NFS4ERR_INVAL);
6357                         }
6358                         setsize = TRUE;
6359                 }
6360                 break;
6361 
6362         case EXCLUSIVE4:
6363                 /* prohibit EXCL create of named attributes */
6364                 if (dvp->v_flag & V_XATTRDIR) {
6365                         kmem_free(nm, buflen);
6366                         *attrset = 0;
6367                         return (NFS4ERR_INVAL);
6368                 }
6369 
6370                 cva.va_mask = AT_TYPE | AT_MTIME | AT_MODE;
6371                 cva.va_type = VREG;
6372                 /*
6373                  * Ensure no time overflows. Assumes underlying
6374                  * filesystem supports at least 32 bits.
6375                  * Truncate nsec to usec resolution to allow valid
6376                  * compares even if the underlying filesystem truncates.
6377                  */
6378                 mtime = (timespec32_t *)&args->createhow4_u.createverf;
6379                 cva.va_mtime.tv_sec = mtime->tv_sec % TIME32_MAX;
6380                 cva.va_mtime.tv_nsec = (mtime->tv_nsec / 1000) * 1000;
6381                 cva.va_mode = (mode_t)0;
6382                 vap = &cva;
6383 
6384                 /*
6385                  * For EXCL create, attrset is set to the server attr
6386                  * used to cache the client's verifier.
6387                  */
6388                 *attrset = FATTR4_TIME_MODIFY_MASK;
6389                 break;
6390         }
6391 
6392         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
6393         name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
6394             MAXPATHLEN  + 1);
6395 
6396         if (name == NULL) {
6397                 kmem_free(nm, buflen);
6398                 return (NFS4ERR_SERVERFAULT);
6399         }
6400 
6401         status = create_vnode(dvp, name, vap, args->mode,
6402             cs->cr, &vp, &created);
6403         if (nm != name)
6404                 kmem_free(name, MAXPATHLEN + 1);
6405         kmem_free(nm, buflen);
6406 
6407         if (status != NFS4_OK) {
6408                 if (ntov_table_init)
6409                         nfs4_ntov_table_free(&ntov, &sarg);
6410                 *attrset = 0;
6411                 return (status);
6412         }
6413 
6414         trunc = (setsize && !created);
6415 
6416         if (args->mode != EXCLUSIVE4) {
6417                 bitmap4 createmask = args->createhow4_u.createattrs.attrmask;
6418 
6419                 /*
6420                  * True verification that object was created with correct
6421                  * attrs is impossible.  The attrs could have been changed
6422                  * immediately after object creation.  If attributes did
6423                  * not verify, the only recourse for the server is to
6424                  * destroy the object.  Maybe if some attrs (like gid)
6425                  * are set incorrectly, the object should be destroyed;
6426                  * however, seems bad as a default policy.  Do we really
6427                  * want to destroy an object over one of the times not
6428                  * verifying correctly?  For these reasons, the server
6429                  * currently sets bits in attrset for createattrs
6430                  * that were set; however, no verification is done.
6431                  *
6432                  * vmask_to_nmask accounts for vattr bits set on create
6433                  *      [do_rfs4_set_attrs() only sets resp bits for
6434                  *       non-vattr/vfs bits.]
6435                  * Mask off any bits we set by default so as not to return
6436                  * more attrset bits than were requested in createattrs
6437                  */
6438                 if (created) {
6439                         nfs4_vmask_to_nmask(sarg.vap->va_mask, attrset);
6440                         *attrset &= createmask;
6441                 } else {
6442                         /*
6443                          * We did not create the vnode (we tried but it
6444                          * already existed).  In this case, the only createattr
6445                          * that the spec allows the server to set is size,
6446                          * and even then, it can only be set if it is 0.
6447                          */
6448                         *attrset = 0;
6449                         if (trunc)
6450                                 *attrset = FATTR4_SIZE_MASK;
6451                 }
6452         }
6453         if (ntov_table_init)
6454                 nfs4_ntov_table_free(&ntov, &sarg);
6455 
6456         /*
6457          * Get the initial "after" sequence number, if it fails,
6458          * set to zero, time to before.
6459          */
6460         iva.va_mask = AT_CTIME|AT_SEQ;
6461         if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL)) {
6462                 iva.va_seq = 0;
6463                 iva.va_ctime = bva.va_ctime;
6464         }
6465 
6466         /*
6467          * create_vnode attempts to create the file exclusive,
6468          * if it already exists the VOP_CREATE will fail and
6469          * may not increase va_seq. It is atomic if
6470          * we haven't changed the directory, but if it has changed
6471          * we don't know what changed it.
6472          */
6473         if (!created) {
6474                 if (bva.va_seq && iva.va_seq &&
6475                     bva.va_seq == iva.va_seq)
6476                         cinfo->atomic = TRUE;
6477                 else
6478                         cinfo->atomic = FALSE;
6479                 NFS4_SET_FATTR4_CHANGE(cinfo->after, iva.va_ctime);
6480         } else {
6481                 /*
6482                  * The entry was created, we need to sync the
6483                  * directory metadata.
6484                  */
6485                 (void) VOP_FSYNC(dvp, 0, cs->cr, NULL);
6486 
6487                 /*
6488                  * Get "after" change value, if it fails, simply return the
6489                  * before value.
6490                  */
6491                 ava.va_mask = AT_CTIME|AT_SEQ;
6492                 if (VOP_GETATTR(dvp, &ava, 0, cs->cr, NULL)) {
6493                         ava.va_ctime = bva.va_ctime;
6494                         ava.va_seq = 0;
6495                 }
6496 
6497                 NFS4_SET_FATTR4_CHANGE(cinfo->after, ava.va_ctime);
6498 
6499                 /*
6500                  * The cinfo->atomic = TRUE only if we have
6501                  * non-zero va_seq's, and it has incremented by exactly one
6502                  * during the create_vnode and it didn't
6503                  * change during the VOP_FSYNC.
6504                  */
6505                 if (bva.va_seq && iva.va_seq && ava.va_seq &&
6506                     iva.va_seq == (bva.va_seq + 1) && iva.va_seq == ava.va_seq)
6507                         cinfo->atomic = TRUE;
6508                 else
6509                         cinfo->atomic = FALSE;
6510         }
6511 
6512         /* Check for mandatory locking and that the size gets set. */
6513         cva.va_mask = AT_MODE;
6514         if (setsize)
6515                 cva.va_mask |= AT_SIZE;
6516 
6517         /* Assume the worst */
6518         cs->mandlock = TRUE;
6519 
6520         if (VOP_GETATTR(vp, &cva, 0, cs->cr, NULL) == 0) {
6521                 cs->mandlock = MANDLOCK(cs->vp, cva.va_mode);
6522 
6523                 /*
6524                  * Truncate the file if necessary; this would be
6525                  * the case for create over an existing file.
6526                  */
6527 
6528                 if (trunc) {
6529                         int in_crit = 0;
6530                         rfs4_file_t *fp;
6531                         bool_t create = FALSE;
6532 
6533                         /*
6534                          * We are writing over an existing file.
6535                          * Check to see if we need to recall a delegation.
6536                          */
6537                         rfs4_hold_deleg_policy();
6538                         if ((fp = rfs4_findfile(vp, NULL, &create)) != NULL) {
6539                                 if (rfs4_check_delegated_byfp(FWRITE, fp,
6540                                     (reqsize == 0), FALSE, FALSE, &clientid)) {
6541                                         rfs4_file_rele(fp);
6542                                         rfs4_rele_deleg_policy();
6543                                         VN_RELE(vp);
6544                                         *attrset = 0;
6545                                         return (NFS4ERR_DELAY);
6546                                 }
6547                                 rfs4_file_rele(fp);
6548                         }
6549                         rfs4_rele_deleg_policy();
6550 
6551                         if (nbl_need_check(vp)) {
6552                                 in_crit = 1;
6553 
6554                                 ASSERT(reqsize == 0);
6555 
6556                                 nbl_start_crit(vp, RW_READER);
6557                                 if (nbl_conflict(vp, NBL_WRITE, 0,
6558                                     cva.va_size, 0, NULL)) {
6559                                         in_crit = 0;
6560                                         nbl_end_crit(vp);
6561                                         VN_RELE(vp);
6562                                         *attrset = 0;
6563                                         return (NFS4ERR_ACCESS);
6564                                 }
6565                         }
6566                         ct.cc_sysid = 0;
6567                         ct.cc_pid = 0;
6568                         ct.cc_caller_id = nfs4_srv_caller_id;
6569                         ct.cc_flags = CC_DONTBLOCK;
6570 
6571                         cva.va_mask = AT_SIZE;
6572                         cva.va_size = reqsize;
6573                         (void) VOP_SETATTR(vp, &cva, 0, cs->cr, &ct);
6574                         if (in_crit)
6575                                 nbl_end_crit(vp);
6576                 }
6577         }
6578 
6579         error = makefh4(&cs->fh, vp, cs->exi);
6580 
6581         /*
6582          * Force modified data and metadata out to stable storage.
6583          */
6584         (void) VOP_FSYNC(vp, FNODSYNC, cs->cr, NULL);
6585 
6586         if (error) {
6587                 VN_RELE(vp);
6588                 *attrset = 0;
6589                 return (puterrno4(error));
6590         }
6591 
6592         /* if parent dir is attrdir, set namedattr fh flag */
6593         if (dvp->v_flag & V_XATTRDIR)
6594                 set_fh4_flag(&cs->fh, FH4_NAMEDATTR);
6595 
6596         if (cs->vp)
6597                 VN_RELE(cs->vp);
6598 
6599         cs->vp = vp;
6600 
6601         /*
6602          * if we did not create the file, we will need to check
6603          * the access bits on the file
6604          */
6605 
6606         if (!created) {
6607                 if (setsize)
6608                         args->share_access |= OPEN4_SHARE_ACCESS_WRITE;
6609                 status = check_open_access(args->share_access, cs, req);
6610                 if (status != NFS4_OK)
6611                         *attrset = 0;
6612         }
6613         return (status);
6614 }
6615 
6616 /*ARGSUSED*/
6617 static void
6618 rfs4_do_open(struct compound_state *cs, struct svc_req *req,
6619     rfs4_openowner_t *oo, delegreq_t deleg,
6620     uint32_t access, uint32_t deny,
6621     OPEN4res *resp, int deleg_cur)
6622 {
6623         /* XXX Currently not using req  */
6624         rfs4_state_t *sp;
6625         rfs4_file_t *fp;
6626         bool_t screate = TRUE;
6627         bool_t fcreate = TRUE;
6628         uint32_t open_a, share_a;
6629         uint32_t open_d, share_d;
6630         rfs4_deleg_state_t *dsp;
6631         sysid_t sysid;
6632         nfsstat4 status;
6633         caller_context_t ct;
6634         int fflags = 0;
6635         int recall = 0;
6636         int err;
6637         int first_open;
6638 
6639         /* get the file struct and hold a lock on it during initial open */
6640         fp = rfs4_findfile_withlock(cs->vp, &cs->fh, &fcreate);
6641         if (fp == NULL) {
6642                 resp->status = NFS4ERR_RESOURCE;
6643                 DTRACE_PROBE1(nfss__e__do__open1, nfsstat4, resp->status);
6644                 return;
6645         }
6646 
6647         sp = rfs4_findstate_by_owner_file(oo, fp, &screate);
6648         if (sp == NULL) {
6649                 resp->status = NFS4ERR_RESOURCE;
6650                 DTRACE_PROBE1(nfss__e__do__open2, nfsstat4, resp->status);
6651                 /* No need to keep any reference */
6652                 rw_exit(&fp->rf_file_rwlock);
6653                 rfs4_file_rele(fp);
6654                 return;
6655         }
6656 
6657         /* try to get the sysid before continuing */
6658         if ((status = rfs4_client_sysid(oo->ro_client, &sysid)) != NFS4_OK) {
6659                 resp->status = status;
6660                 rfs4_file_rele(fp);
6661                 /* Not a fully formed open; "close" it */
6662                 if (screate == TRUE)
6663                         rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6664                 rfs4_state_rele(sp);
6665                 return;
6666         }
6667 
6668         /* Calculate the fflags for this OPEN. */
6669         if (access & OPEN4_SHARE_ACCESS_READ)
6670                 fflags |= FREAD;
6671         if (access & OPEN4_SHARE_ACCESS_WRITE)
6672                 fflags |= FWRITE;
6673 
6674         rfs4_dbe_lock(sp->rs_dbe);
6675 
6676         /*
6677          * Calculate the new deny and access mode that this open is adding to
6678          * the file for this open owner;
6679          */
6680         open_d = (deny & ~sp->rs_open_deny);
6681         open_a = (access & ~sp->rs_open_access);
6682 
6683         /*
6684          * Calculate the new share access and share deny modes that this open
6685          * is adding to the file for this open owner;
6686          */
6687         share_a = (access & ~sp->rs_share_access);
6688         share_d = (deny & ~sp->rs_share_deny);
6689 
6690         first_open = (sp->rs_open_access & OPEN4_SHARE_ACCESS_BOTH) == 0;
6691 
6692         /*
6693          * Check to see the client has already sent an open for this
6694          * open owner on this file with the same share/deny modes.
6695          * If so, we don't need to check for a conflict and we don't
6696          * need to add another shrlock.  If not, then we need to
6697          * check for conflicts in deny and access before checking for
6698          * conflicts in delegation.  We don't want to recall a
6699          * delegation based on an open that will eventually fail based
6700          * on shares modes.
6701          */
6702 
6703         if (share_a || share_d) {
6704                 if ((err = rfs4_share(sp, access, deny)) != 0) {
6705                         rfs4_dbe_unlock(sp->rs_dbe);
6706                         resp->status = err;
6707 
6708                         rfs4_file_rele(fp);
6709                         /* Not a fully formed open; "close" it */
6710                         if (screate == TRUE)
6711                                 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6712                         rfs4_state_rele(sp);
6713                         return;
6714                 }
6715         }
6716 
6717         rfs4_dbe_lock(fp->rf_dbe);
6718 
6719         /*
6720          * Check to see if this file is delegated and if so, if a
6721          * recall needs to be done.
6722          */
6723         if (rfs4_check_recall(sp, access)) {
6724                 rfs4_dbe_unlock(fp->rf_dbe);
6725                 rfs4_dbe_unlock(sp->rs_dbe);
6726                 rfs4_recall_deleg(fp, FALSE, sp->rs_owner->ro_client);
6727                 delay(NFS4_DELEGATION_CONFLICT_DELAY);
6728                 rfs4_dbe_lock(sp->rs_dbe);
6729 
6730                 /* if state closed while lock was dropped */
6731                 if (sp->rs_closed) {
6732                         if (share_a || share_d)
6733                                 (void) rfs4_unshare(sp);
6734                         rfs4_dbe_unlock(sp->rs_dbe);
6735                         rfs4_file_rele(fp);
6736                         /* Not a fully formed open; "close" it */
6737                         if (screate == TRUE)
6738                                 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6739                         rfs4_state_rele(sp);
6740                         resp->status = NFS4ERR_OLD_STATEID;
6741                         return;
6742                 }
6743 
6744                 rfs4_dbe_lock(fp->rf_dbe);
6745                 /* Let's see if the delegation was returned */
6746                 if (rfs4_check_recall(sp, access)) {
6747                         rfs4_dbe_unlock(fp->rf_dbe);
6748                         if (share_a || share_d)
6749                                 (void) rfs4_unshare(sp);
6750                         rfs4_dbe_unlock(sp->rs_dbe);
6751                         rfs4_file_rele(fp);
6752                         rfs4_update_lease(sp->rs_owner->ro_client);
6753 
6754                         /* Not a fully formed open; "close" it */
6755                         if (screate == TRUE)
6756                                 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6757                         rfs4_state_rele(sp);
6758                         resp->status = NFS4ERR_DELAY;
6759                         return;
6760                 }
6761         }
6762         /*
6763          * the share check passed and any delegation conflict has been
6764          * taken care of, now call vop_open.
6765          * if this is the first open then call vop_open with fflags.
6766          * if not, call vn_open_upgrade with just the upgrade flags.
6767          *
6768          * if the file has been opened already, it will have the current
6769          * access mode in the state struct.  if it has no share access, then
6770          * this is a new open.
6771          *
6772          * However, if this is open with CLAIM_DLEGATE_CUR, then don't
6773          * call VOP_OPEN(), just do the open upgrade.
6774          */
6775         if (first_open && !deleg_cur) {
6776                 ct.cc_sysid = sysid;
6777                 ct.cc_pid = rfs4_dbe_getid(sp->rs_owner->ro_dbe);
6778                 ct.cc_caller_id = nfs4_srv_caller_id;
6779                 ct.cc_flags = CC_DONTBLOCK;
6780                 err = VOP_OPEN(&cs->vp, fflags, cs->cr, &ct);
6781                 if (err) {
6782                         rfs4_dbe_unlock(fp->rf_dbe);
6783                         if (share_a || share_d)
6784                                 (void) rfs4_unshare(sp);
6785                         rfs4_dbe_unlock(sp->rs_dbe);
6786                         rfs4_file_rele(fp);
6787 
6788                         /* Not a fully formed open; "close" it */
6789                         if (screate == TRUE)
6790                                 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6791                         rfs4_state_rele(sp);
6792                         /* check if a monitor detected a delegation conflict */
6793                         if (err == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK))
6794                                 resp->status = NFS4ERR_DELAY;
6795                         else
6796                                 resp->status = NFS4ERR_SERVERFAULT;
6797                         return;
6798                 }
6799         } else { /* open upgrade */
6800                 /*
6801                  * calculate the fflags for the new mode that is being added
6802                  * by this upgrade.
6803                  */
6804                 fflags = 0;
6805                 if (open_a & OPEN4_SHARE_ACCESS_READ)
6806                         fflags |= FREAD;
6807                 if (open_a & OPEN4_SHARE_ACCESS_WRITE)
6808                         fflags |= FWRITE;
6809                 vn_open_upgrade(cs->vp, fflags);
6810         }
6811         sp->rs_open_access |= access;
6812         sp->rs_open_deny |= deny;
6813 
6814         if (open_d & OPEN4_SHARE_DENY_READ)
6815                 fp->rf_deny_read++;
6816         if (open_d & OPEN4_SHARE_DENY_WRITE)
6817                 fp->rf_deny_write++;
6818         fp->rf_share_deny |= deny;
6819 
6820         if (open_a & OPEN4_SHARE_ACCESS_READ)
6821                 fp->rf_access_read++;
6822         if (open_a & OPEN4_SHARE_ACCESS_WRITE)
6823                 fp->rf_access_write++;
6824         fp->rf_share_access |= access;
6825 
6826         /*
6827          * Check for delegation here. if the deleg argument is not
6828          * DELEG_ANY, then this is a reclaim from a client and
6829          * we must honor the delegation requested. If necessary we can
6830          * set the recall flag.
6831          */
6832 
6833         dsp = rfs4_grant_delegation(deleg, sp, &recall);
6834 
6835         cs->deleg = (fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_WRITE);
6836 
6837         next_stateid(&sp->rs_stateid);
6838 
6839         resp->stateid = sp->rs_stateid.stateid;
6840 
6841         rfs4_dbe_unlock(fp->rf_dbe);
6842         rfs4_dbe_unlock(sp->rs_dbe);
6843 
6844         if (dsp) {
6845                 rfs4_set_deleg_response(dsp, &resp->delegation, NULL, recall);
6846                 rfs4_deleg_state_rele(dsp);
6847         }
6848 
6849         rfs4_file_rele(fp);
6850         rfs4_state_rele(sp);
6851 
6852         resp->status = NFS4_OK;
6853 }
6854 
6855 /*ARGSUSED*/
6856 static void
6857 rfs4_do_opennull(struct compound_state *cs, struct svc_req *req,
6858     OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
6859 {
6860         change_info4 *cinfo = &resp->cinfo;
6861         bitmap4 *attrset = &resp->attrset;
6862 
6863         if (args->opentype == OPEN4_NOCREATE)
6864                 resp->status = rfs4_lookupfile(&args->open_claim4_u.file,
6865                     req, cs, args->share_access, cinfo);
6866         else {
6867                 /* inhibit delegation grants during exclusive create */
6868 
6869                 if (args->mode == EXCLUSIVE4)
6870                         rfs4_disable_delegation();
6871 
6872                 resp->status = rfs4_createfile(args, req, cs, cinfo, attrset,
6873                     oo->ro_client->rc_clientid);
6874         }
6875 
6876         if (resp->status == NFS4_OK) {
6877 
6878                 /* cs->vp cs->fh now reference the desired file */
6879 
6880                 rfs4_do_open(cs, req, oo,
6881                     oo->ro_need_confirm ? DELEG_NONE : DELEG_ANY,
6882                     args->share_access, args->share_deny, resp, 0);
6883 
6884                 /*
6885                  * If rfs4_createfile set attrset, we must
6886                  * clear this attrset before the response is copied.
6887                  */
6888                 if (resp->status != NFS4_OK && resp->attrset) {
6889                         resp->attrset = 0;
6890                 }
6891         }
6892         else
6893                 *cs->statusp = resp->status;
6894 
6895         if (args->mode == EXCLUSIVE4)
6896                 rfs4_enable_delegation();
6897 }
6898 
6899 /*ARGSUSED*/
6900 static void
6901 rfs4_do_openprev(struct compound_state *cs, struct svc_req *req,
6902     OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
6903 {
6904         change_info4 *cinfo = &resp->cinfo;
6905         vattr_t va;
6906         vtype_t v_type = cs->vp->v_type;
6907         int error = 0;
6908 
6909         /* Verify that we have a regular file */
6910         if (v_type != VREG) {
6911                 if (v_type == VDIR)
6912                         resp->status = NFS4ERR_ISDIR;
6913                 else if (v_type == VLNK)
6914                         resp->status = NFS4ERR_SYMLINK;
6915                 else
6916                         resp->status = NFS4ERR_INVAL;
6917                 return;
6918         }
6919 
6920         va.va_mask = AT_MODE|AT_UID;
6921         error = VOP_GETATTR(cs->vp, &va, 0, cs->cr, NULL);
6922         if (error) {
6923                 resp->status = puterrno4(error);
6924                 return;
6925         }
6926 
6927         cs->mandlock = MANDLOCK(cs->vp, va.va_mode);
6928 
6929         /*
6930          * Check if we have access to the file, Note the the file
6931          * could have originally been open UNCHECKED or GUARDED
6932          * with mode bits that will now fail, but there is nothing
6933          * we can really do about that except in the case that the
6934          * owner of the file is the one requesting the open.
6935          */
6936         if (crgetuid(cs->cr) != va.va_uid) {
6937                 resp->status = check_open_access(args->share_access, cs, req);
6938                 if (resp->status != NFS4_OK) {
6939                         return;
6940                 }
6941         }
6942 
6943         /*
6944          * cinfo on a CLAIM_PREVIOUS is undefined, initialize to zero
6945          */
6946         cinfo->before = 0;
6947         cinfo->after = 0;
6948         cinfo->atomic = FALSE;
6949 
6950         rfs4_do_open(cs, req, oo,
6951             NFS4_DELEG4TYPE2REQTYPE(args->open_claim4_u.delegate_type),
6952             args->share_access, args->share_deny, resp, 0);
6953 }
6954 
6955 static void
6956 rfs4_do_opendelcur(struct compound_state *cs, struct svc_req *req,
6957     OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
6958 {
6959         int error;
6960         nfsstat4 status;
6961         stateid4 stateid =
6962             args->open_claim4_u.delegate_cur_info.delegate_stateid;
6963         rfs4_deleg_state_t *dsp;
6964 
6965         /*
6966          * Find the state info from the stateid and confirm that the
6967          * file is delegated.  If the state openowner is the same as
6968          * the supplied openowner we're done. If not, get the file
6969          * info from the found state info. Use that file info to
6970          * create the state for this lock owner. Note solaris doen't
6971          * really need the pathname to find the file. We may want to
6972          * lookup the pathname and make sure that the vp exist and
6973          * matches the vp in the file structure. However it is
6974          * possible that the pathname nolonger exists (local process
6975          * unlinks the file), so this may not be that useful.
6976          */
6977 
6978         status = rfs4_get_deleg_state(&stateid, &dsp);
6979         if (status != NFS4_OK) {
6980                 resp->status = status;
6981                 return;
6982         }
6983 
6984         ASSERT(dsp->rds_finfo->rf_dinfo.rd_dtype != OPEN_DELEGATE_NONE);
6985 
6986         /*
6987          * New lock owner, create state. Since this was probably called
6988          * in response to a CB_RECALL we set deleg to DELEG_NONE
6989          */
6990 
6991         ASSERT(cs->vp != NULL);
6992         VN_RELE(cs->vp);
6993         VN_HOLD(dsp->rds_finfo->rf_vp);
6994         cs->vp = dsp->rds_finfo->rf_vp;
6995 
6996         if (error = makefh4(&cs->fh, cs->vp, cs->exi)) {
6997                 rfs4_deleg_state_rele(dsp);
6998                 *cs->statusp = resp->status = puterrno4(error);
6999                 return;
7000         }
7001 
7002         /* Mark progress for delegation returns */
7003         dsp->rds_finfo->rf_dinfo.rd_time_lastwrite = gethrestime_sec();
7004         rfs4_deleg_state_rele(dsp);
7005         rfs4_do_open(cs, req, oo, DELEG_NONE,
7006             args->share_access, args->share_deny, resp, 1);
7007 }
7008 
7009 /*ARGSUSED*/
7010 static void
7011 rfs4_do_opendelprev(struct compound_state *cs, struct svc_req *req,
7012     OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
7013 {
7014         /*
7015          * Lookup the pathname, it must already exist since this file
7016          * was delegated.
7017          *
7018          * Find the file and state info for this vp and open owner pair.
7019          *      check that they are in fact delegated.
7020          *      check that the state access and deny modes are the same.
7021          *
7022          * Return the delgation possibly seting the recall flag.
7023          */
7024         rfs4_file_t *fp;
7025         rfs4_state_t *sp;
7026         bool_t create = FALSE;
7027         bool_t dcreate = FALSE;
7028         rfs4_deleg_state_t *dsp;
7029         nfsace4 *ace;
7030 
7031         /* Note we ignore oflags */
7032         resp->status = rfs4_lookupfile(&args->open_claim4_u.file_delegate_prev,
7033             req, cs, args->share_access, &resp->cinfo);
7034 
7035         if (resp->status != NFS4_OK) {
7036                 return;
7037         }
7038 
7039         /* get the file struct and hold a lock on it during initial open */
7040         fp = rfs4_findfile_withlock(cs->vp, NULL, &create);
7041         if (fp == NULL) {
7042                 resp->status = NFS4ERR_RESOURCE;
7043                 DTRACE_PROBE1(nfss__e__do_opendelprev1, nfsstat4, resp->status);
7044                 return;
7045         }
7046 
7047         sp = rfs4_findstate_by_owner_file(oo, fp, &create);
7048         if (sp == NULL) {
7049                 resp->status = NFS4ERR_SERVERFAULT;
7050                 DTRACE_PROBE1(nfss__e__do_opendelprev2, nfsstat4, resp->status);
7051                 rw_exit(&fp->rf_file_rwlock);
7052                 rfs4_file_rele(fp);
7053                 return;
7054         }
7055 
7056         rfs4_dbe_lock(sp->rs_dbe);
7057         rfs4_dbe_lock(fp->rf_dbe);
7058         if (args->share_access != sp->rs_share_access ||
7059             args->share_deny != sp->rs_share_deny ||
7060             sp->rs_finfo->rf_dinfo.rd_dtype == OPEN_DELEGATE_NONE) {
7061                 NFS4_DEBUG(rfs4_debug,
7062                     (CE_NOTE, "rfs4_do_opendelprev: state mixup"));
7063                 rfs4_dbe_unlock(fp->rf_dbe);
7064                 rfs4_dbe_unlock(sp->rs_dbe);
7065                 rfs4_file_rele(fp);
7066                 rfs4_state_rele(sp);
7067                 resp->status = NFS4ERR_SERVERFAULT;
7068                 return;
7069         }
7070         rfs4_dbe_unlock(fp->rf_dbe);
7071         rfs4_dbe_unlock(sp->rs_dbe);
7072 
7073         dsp = rfs4_finddeleg(sp, &dcreate);
7074         if (dsp == NULL) {
7075                 rfs4_state_rele(sp);
7076                 rfs4_file_rele(fp);
7077                 resp->status = NFS4ERR_SERVERFAULT;
7078                 return;
7079         }
7080 
7081         next_stateid(&sp->rs_stateid);
7082 
7083         resp->stateid = sp->rs_stateid.stateid;
7084 
7085         resp->delegation.delegation_type = dsp->rds_dtype;
7086 
7087         if (dsp->rds_dtype == OPEN_DELEGATE_READ) {
7088                 open_read_delegation4 *rv =
7089                     &resp->delegation.open_delegation4_u.read;
7090 
7091                 rv->stateid = dsp->rds_delegid.stateid;
7092                 rv->recall = FALSE; /* no policy in place to set to TRUE */
7093                 ace = &rv->permissions;
7094         } else {
7095                 open_write_delegation4 *rv =
7096                     &resp->delegation.open_delegation4_u.write;
7097 
7098                 rv->stateid = dsp->rds_delegid.stateid;
7099                 rv->recall = FALSE;  /* no policy in place to set to TRUE */
7100                 ace = &rv->permissions;
7101                 rv->space_limit.limitby = NFS_LIMIT_SIZE;
7102                 rv->space_limit.nfs_space_limit4_u.filesize = UINT64_MAX;
7103         }
7104 
7105         /* XXX For now */
7106         ace->type = ACE4_ACCESS_ALLOWED_ACE_TYPE;
7107         ace->flag = 0;
7108         ace->access_mask = 0;
7109         ace->who.utf8string_len = 0;
7110         ace->who.utf8string_val = 0;
7111 
7112         rfs4_deleg_state_rele(dsp);
7113         rfs4_state_rele(sp);
7114         rfs4_file_rele(fp);
7115 }
7116 
7117 typedef enum {
7118         NFS4_CHKSEQ_OKAY = 0,
7119         NFS4_CHKSEQ_REPLAY = 1,
7120         NFS4_CHKSEQ_BAD = 2
7121 } rfs4_chkseq_t;
7122 
7123 /*
7124  * Generic function for sequence number checks.
7125  */
7126 static rfs4_chkseq_t
7127 rfs4_check_seqid(seqid4 seqid, nfs_resop4 *lastop,
7128     seqid4 rqst_seq, nfs_resop4 *resop, bool_t copyres)
7129 {
7130         /* Same sequence ids and matching operations? */
7131         if (seqid == rqst_seq && resop->resop == lastop->resop) {
7132                 if (copyres == TRUE) {
7133                         rfs4_free_reply(resop);
7134                         rfs4_copy_reply(resop, lastop);
7135                 }
7136                 NFS4_DEBUG(rfs4_debug, (CE_NOTE,
7137                     "Replayed SEQID %d\n", seqid));
7138                 return (NFS4_CHKSEQ_REPLAY);
7139         }
7140 
7141         /* If the incoming sequence is not the next expected then it is bad */
7142         if (rqst_seq != seqid + 1) {
7143                 if (rqst_seq == seqid) {
7144                         NFS4_DEBUG(rfs4_debug,
7145                             (CE_NOTE, "BAD SEQID: Replayed sequence id "
7146                             "but last op was %d current op is %d\n",
7147                             lastop->resop, resop->resop));
7148                         return (NFS4_CHKSEQ_BAD);
7149                 }
7150                 NFS4_DEBUG(rfs4_debug,
7151                     (CE_NOTE, "BAD SEQID: got %u expecting %u\n",
7152                     rqst_seq, seqid));
7153                 return (NFS4_CHKSEQ_BAD);
7154         }
7155 
7156         /* Everything okay -- next expected */
7157         return (NFS4_CHKSEQ_OKAY);
7158 }
7159 
7160 
7161 static rfs4_chkseq_t
7162 rfs4_check_open_seqid(seqid4 seqid, rfs4_openowner_t *op, nfs_resop4 *resop)
7163 {
7164         rfs4_chkseq_t rc;
7165 
7166         rfs4_dbe_lock(op->ro_dbe);
7167         rc = rfs4_check_seqid(op->ro_open_seqid, &op->ro_reply, seqid, resop,
7168             TRUE);
7169         rfs4_dbe_unlock(op->ro_dbe);
7170 
7171         if (rc == NFS4_CHKSEQ_OKAY)
7172                 rfs4_update_lease(op->ro_client);
7173 
7174         return (rc);
7175 }
7176 
7177 static rfs4_chkseq_t
7178 rfs4_check_olo_seqid(seqid4 olo_seqid, rfs4_openowner_t *op, nfs_resop4 *resop)
7179 {
7180         rfs4_chkseq_t rc;
7181 
7182         rfs4_dbe_lock(op->ro_dbe);
7183         rc = rfs4_check_seqid(op->ro_open_seqid, &op->ro_reply,
7184             olo_seqid, resop, FALSE);
7185         rfs4_dbe_unlock(op->ro_dbe);
7186 
7187         return (rc);
7188 }
7189 
7190 static rfs4_chkseq_t
7191 rfs4_check_lock_seqid(seqid4 seqid, rfs4_lo_state_t *lsp, nfs_resop4 *resop)
7192 {
7193         rfs4_chkseq_t rc = NFS4_CHKSEQ_OKAY;
7194 
7195         rfs4_dbe_lock(lsp->rls_dbe);
7196         if (!lsp->rls_skip_seqid_check)
7197                 rc = rfs4_check_seqid(lsp->rls_seqid, &lsp->rls_reply, seqid,
7198                     resop, TRUE);
7199         rfs4_dbe_unlock(lsp->rls_dbe);
7200 
7201         return (rc);
7202 }
7203 
7204 static void
7205 rfs4_op_open(nfs_argop4 *argop, nfs_resop4 *resop,
7206     struct svc_req *req, struct compound_state *cs)
7207 {
7208         OPEN4args *args = &argop->nfs_argop4_u.opopen;
7209         OPEN4res *resp = &resop->nfs_resop4_u.opopen;
7210         open_owner4 *owner = &args->owner;
7211         open_claim_type4 claim = args->claim;
7212         rfs4_client_t *cp;
7213         rfs4_openowner_t *oo;
7214         bool_t create;
7215         bool_t replay = FALSE;
7216         int can_reclaim;
7217 
7218         DTRACE_NFSV4_2(op__open__start, struct compound_state *, cs,
7219             OPEN4args *, args);
7220 
7221         if (cs->vp == NULL) {
7222                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
7223                 goto end;
7224         }
7225 
7226         /*
7227          * Need to check clientid and lease expiration first based on
7228          * error ordering and incrementing sequence id.
7229          */
7230         cp = rfs4_findclient_by_id(owner->clientid, FALSE);
7231         if (cp == NULL) {
7232                 *cs->statusp = resp->status =
7233                     rfs4_check_clientid(&owner->clientid, 0);
7234                 goto end;
7235         }
7236 
7237         if (rfs4_lease_expired(cp)) {
7238                 rfs4_client_close(cp);
7239                 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
7240                 goto end;
7241         }
7242         can_reclaim = cp->rc_can_reclaim;
7243 
7244         /*
7245          * Find the open_owner for use from this point forward.  Take
7246          * care in updating the sequence id based on the type of error
7247          * being returned.
7248          */
7249 retry:
7250         create = TRUE;
7251         oo = rfs4_findopenowner(owner, &create, args->seqid);
7252         if (oo == NULL) {
7253                 *cs->statusp = resp->status = NFS4ERR_RESOURCE;
7254                 rfs4_client_rele(cp);
7255                 goto end;
7256         }
7257 
7258         /* Hold off access to the sequence space while the open is done */
7259         rfs4_sw_enter(&oo->ro_sw);
7260 
7261         /*
7262          * If the open_owner existed before at the server, then check
7263          * the sequence id.
7264          */
7265         if (!create && !oo->ro_postpone_confirm) {
7266                 switch (rfs4_check_open_seqid(args->seqid, oo, resop)) {
7267                 case NFS4_CHKSEQ_BAD:
7268                         if ((args->seqid > oo->ro_open_seqid) &&
7269                             oo->ro_need_confirm) {
7270                                 rfs4_free_opens(oo, TRUE, FALSE);
7271                                 rfs4_sw_exit(&oo->ro_sw);
7272                                 rfs4_openowner_rele(oo);
7273                                 goto retry;
7274                         }
7275                         resp->status = NFS4ERR_BAD_SEQID;
7276                         goto out;
7277                 case NFS4_CHKSEQ_REPLAY: /* replay of previous request */
7278                         replay = TRUE;
7279                         goto out;
7280                 default:
7281                         break;
7282                 }
7283 
7284                 /*
7285                  * Sequence was ok and open owner exists
7286                  * check to see if we have yet to see an
7287                  * open_confirm.
7288                  */
7289                 if (oo->ro_need_confirm) {
7290                         rfs4_free_opens(oo, TRUE, FALSE);
7291                         rfs4_sw_exit(&oo->ro_sw);
7292                         rfs4_openowner_rele(oo);
7293                         goto retry;
7294                 }
7295         }
7296         /* Grace only applies to regular-type OPENs */
7297         if (rfs4_clnt_in_grace(cp) &&
7298             (claim == CLAIM_NULL || claim == CLAIM_DELEGATE_CUR)) {
7299                 *cs->statusp = resp->status = NFS4ERR_GRACE;
7300                 goto out;
7301         }
7302 
7303         /*
7304          * If previous state at the server existed then can_reclaim
7305          * will be set. If not reply NFS4ERR_NO_GRACE to the
7306          * client.
7307          */
7308         if (rfs4_clnt_in_grace(cp) && claim == CLAIM_PREVIOUS && !can_reclaim) {
7309                 *cs->statusp = resp->status = NFS4ERR_NO_GRACE;
7310                 goto out;
7311         }
7312 
7313 
7314         /*
7315          * Reject the open if the client has missed the grace period
7316          */
7317         if (!rfs4_clnt_in_grace(cp) && claim == CLAIM_PREVIOUS) {
7318                 *cs->statusp = resp->status = NFS4ERR_NO_GRACE;
7319                 goto out;
7320         }
7321 
7322         /* Couple of up-front bookkeeping items */
7323         if (oo->ro_need_confirm) {
7324                 /*
7325                  * If this is a reclaim OPEN then we should not ask
7326                  * for a confirmation of the open_owner per the
7327                  * protocol specification.
7328                  */
7329                 if (claim == CLAIM_PREVIOUS)
7330                         oo->ro_need_confirm = FALSE;
7331                 else
7332                         resp->rflags |= OPEN4_RESULT_CONFIRM;
7333         }
7334         resp->rflags |= OPEN4_RESULT_LOCKTYPE_POSIX;
7335 
7336         /*
7337          * If there is an unshared filesystem mounted on this vnode,
7338          * do not allow to open/create in this directory.
7339          */
7340         if (vn_ismntpt(cs->vp)) {
7341                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
7342                 goto out;
7343         }
7344 
7345         /*
7346          * access must READ, WRITE, or BOTH.  No access is invalid.
7347          * deny can be READ, WRITE, BOTH, or NONE.
7348          * bits not defined for access/deny are invalid.
7349          */
7350         if (! (args->share_access & OPEN4_SHARE_ACCESS_BOTH) ||
7351             (args->share_access & ~OPEN4_SHARE_ACCESS_BOTH) ||
7352             (args->share_deny & ~OPEN4_SHARE_DENY_BOTH)) {
7353                 *cs->statusp = resp->status = NFS4ERR_INVAL;
7354                 goto out;
7355         }
7356 
7357 
7358         /*
7359          * make sure attrset is zero before response is built.
7360          */
7361         resp->attrset = 0;
7362 
7363         switch (claim) {
7364         case CLAIM_NULL:
7365                 rfs4_do_opennull(cs, req, args, oo, resp);
7366                 break;
7367         case CLAIM_PREVIOUS:
7368                 rfs4_do_openprev(cs, req, args, oo, resp);
7369                 break;
7370         case CLAIM_DELEGATE_CUR:
7371                 rfs4_do_opendelcur(cs, req, args, oo, resp);
7372                 break;
7373         case CLAIM_DELEGATE_PREV:
7374                 rfs4_do_opendelprev(cs, req, args, oo, resp);
7375                 break;
7376         default:
7377                 resp->status = NFS4ERR_INVAL;
7378                 break;
7379         }
7380 
7381 out:
7382         rfs4_client_rele(cp);
7383 
7384         /* Catch sequence id handling here to make it a little easier */
7385         switch (resp->status) {
7386         case NFS4ERR_BADXDR:
7387         case NFS4ERR_BAD_SEQID:
7388         case NFS4ERR_BAD_STATEID:
7389         case NFS4ERR_NOFILEHANDLE:
7390         case NFS4ERR_RESOURCE:
7391         case NFS4ERR_STALE_CLIENTID:
7392         case NFS4ERR_STALE_STATEID:
7393                 /*
7394                  * The protocol states that if any of these errors are
7395                  * being returned, the sequence id should not be
7396                  * incremented.  Any other return requires an
7397                  * increment.
7398                  */
7399                 break;
7400         default:
7401                 /* Always update the lease in this case */
7402                 rfs4_update_lease(oo->ro_client);
7403 
7404                 /* Regular response - copy the result */
7405                 if (!replay)
7406                         rfs4_update_open_resp(oo, resop, &cs->fh);
7407 
7408                 /*
7409                  * REPLAY case: Only if the previous response was OK
7410                  * do we copy the filehandle.  If not OK, no
7411                  * filehandle to copy.
7412                  */
7413                 if (replay == TRUE &&
7414                     resp->status == NFS4_OK &&
7415                     oo->ro_reply_fh.nfs_fh4_val) {
7416                         /*
7417                          * If this is a replay, we must restore the
7418                          * current filehandle/vp to that of what was
7419                          * returned originally.  Try our best to do
7420                          * it.
7421                          */
7422                         nfs_fh4_fmt_t *fh_fmtp =
7423                             (nfs_fh4_fmt_t *)oo->ro_reply_fh.nfs_fh4_val;
7424 
7425                         cs->exi = checkexport4(&fh_fmtp->fh4_fsid,
7426                             (fid_t *)&fh_fmtp->fh4_xlen, NULL);
7427 
7428                         if (cs->exi == NULL) {
7429                                 resp->status = NFS4ERR_STALE;
7430                                 goto finish;
7431                         }
7432 
7433                         VN_RELE(cs->vp);
7434 
7435                         cs->vp = nfs4_fhtovp(&oo->ro_reply_fh, cs->exi,
7436                             &resp->status);
7437 
7438                         if (cs->vp == NULL)
7439                                 goto finish;
7440 
7441                         nfs_fh4_copy(&oo->ro_reply_fh, &cs->fh);
7442                 }
7443 
7444                 /*
7445                  * If this was a replay, no need to update the
7446                  * sequence id. If the open_owner was not created on
7447                  * this pass, then update.  The first use of an
7448                  * open_owner will not bump the sequence id.
7449                  */
7450                 if (replay == FALSE && !create)
7451                         rfs4_update_open_sequence(oo);
7452                 /*
7453                  * If the client is receiving an error and the
7454                  * open_owner needs to be confirmed, there is no way
7455                  * to notify the client of this fact ignoring the fact
7456                  * that the server has no method of returning a
7457                  * stateid to confirm.  Therefore, the server needs to
7458                  * mark this open_owner in a way as to avoid the
7459                  * sequence id checking the next time the client uses
7460                  * this open_owner.
7461                  */
7462                 if (resp->status != NFS4_OK && oo->ro_need_confirm)
7463                         oo->ro_postpone_confirm = TRUE;
7464                 /*
7465                  * If OK response then clear the postpone flag and
7466                  * reset the sequence id to keep in sync with the
7467                  * client.
7468                  */
7469                 if (resp->status == NFS4_OK && oo->ro_postpone_confirm) {
7470                         oo->ro_postpone_confirm = FALSE;
7471                         oo->ro_open_seqid = args->seqid;
7472                 }
7473                 break;
7474         }
7475 
7476 finish:
7477         *cs->statusp = resp->status;
7478 
7479         rfs4_sw_exit(&oo->ro_sw);
7480         rfs4_openowner_rele(oo);
7481 
7482 end:
7483         DTRACE_NFSV4_2(op__open__done, struct compound_state *, cs,
7484             OPEN4res *, resp);
7485 }
7486 
7487 /*ARGSUSED*/
7488 void
7489 rfs4_op_open_confirm(nfs_argop4 *argop, nfs_resop4 *resop,
7490     struct svc_req *req, struct compound_state *cs)
7491 {
7492         OPEN_CONFIRM4args *args = &argop->nfs_argop4_u.opopen_confirm;
7493         OPEN_CONFIRM4res *resp = &resop->nfs_resop4_u.opopen_confirm;
7494         rfs4_state_t *sp;
7495         nfsstat4 status;
7496 
7497         DTRACE_NFSV4_2(op__open__confirm__start, struct compound_state *, cs,
7498             OPEN_CONFIRM4args *, args);
7499 
7500         if (cs->vp == NULL) {
7501                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
7502                 goto out;
7503         }
7504 
7505         if (cs->vp->v_type != VREG) {
7506                 *cs->statusp = resp->status =
7507                     cs->vp->v_type == VDIR ? NFS4ERR_ISDIR : NFS4ERR_INVAL;
7508                 return;
7509         }
7510 
7511         status = rfs4_get_state(&args->open_stateid, &sp, RFS4_DBS_VALID);
7512         if (status != NFS4_OK) {
7513                 *cs->statusp = resp->status = status;
7514                 goto out;
7515         }
7516 
7517         /* Ensure specified filehandle matches */
7518         if (cs->vp != sp->rs_finfo->rf_vp) {
7519                 rfs4_state_rele(sp);
7520                 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7521                 goto out;
7522         }
7523 
7524         /* hold off other access to open_owner while we tinker */
7525         rfs4_sw_enter(&sp->rs_owner->ro_sw);
7526 
7527         switch (rfs4_check_stateid_seqid(sp, &args->open_stateid)) {
7528         case NFS4_CHECK_STATEID_OKAY:
7529                 if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7530                     resop) != 0) {
7531                         *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7532                         break;
7533                 }
7534                 /*
7535                  * If it is the appropriate stateid and determined to
7536                  * be "OKAY" then this means that the stateid does not
7537                  * need to be confirmed and the client is in error for
7538                  * sending an OPEN_CONFIRM.
7539                  */
7540                 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7541                 break;
7542         case NFS4_CHECK_STATEID_OLD:
7543                 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7544                 break;
7545         case NFS4_CHECK_STATEID_BAD:
7546                 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7547                 break;
7548         case NFS4_CHECK_STATEID_EXPIRED:
7549                 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
7550                 break;
7551         case NFS4_CHECK_STATEID_CLOSED:
7552                 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7553                 break;
7554         case NFS4_CHECK_STATEID_REPLAY:
7555                 switch (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7556                     resop)) {
7557                 case NFS4_CHKSEQ_OKAY:
7558                         /*
7559                          * This is replayed stateid; if seqid matches
7560                          * next expected, then client is using wrong seqid.
7561                          */
7562                         /* fall through */
7563                 case NFS4_CHKSEQ_BAD:
7564                         *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7565                         break;
7566                 case NFS4_CHKSEQ_REPLAY:
7567                         /*
7568                          * Note this case is the duplicate case so
7569                          * resp->status is already set.
7570                          */
7571                         *cs->statusp = resp->status;
7572                         rfs4_update_lease(sp->rs_owner->ro_client);
7573                         break;
7574                 }
7575                 break;
7576         case NFS4_CHECK_STATEID_UNCONFIRMED:
7577                 if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7578                     resop) != NFS4_CHKSEQ_OKAY) {
7579                         *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7580                         break;
7581                 }
7582                 *cs->statusp = resp->status = NFS4_OK;
7583 
7584                 next_stateid(&sp->rs_stateid);
7585                 resp->open_stateid = sp->rs_stateid.stateid;
7586                 sp->rs_owner->ro_need_confirm = FALSE;
7587                 rfs4_update_lease(sp->rs_owner->ro_client);
7588                 rfs4_update_open_sequence(sp->rs_owner);
7589                 rfs4_update_open_resp(sp->rs_owner, resop, NULL);
7590                 break;
7591         default:
7592                 ASSERT(FALSE);
7593                 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
7594                 break;
7595         }
7596         rfs4_sw_exit(&sp->rs_owner->ro_sw);
7597         rfs4_state_rele(sp);
7598 
7599 out:
7600         DTRACE_NFSV4_2(op__open__confirm__done, struct compound_state *, cs,
7601             OPEN_CONFIRM4res *, resp);
7602 }
7603 
7604 /*ARGSUSED*/
7605 void
7606 rfs4_op_open_downgrade(nfs_argop4 *argop, nfs_resop4 *resop,
7607     struct svc_req *req, struct compound_state *cs)
7608 {
7609         OPEN_DOWNGRADE4args *args = &argop->nfs_argop4_u.opopen_downgrade;
7610         OPEN_DOWNGRADE4res *resp = &resop->nfs_resop4_u.opopen_downgrade;
7611         uint32_t access = args->share_access;
7612         uint32_t deny = args->share_deny;
7613         nfsstat4 status;
7614         rfs4_state_t *sp;
7615         rfs4_file_t *fp;
7616         int fflags = 0;
7617 
7618         DTRACE_NFSV4_2(op__open__downgrade__start, struct compound_state *, cs,
7619             OPEN_DOWNGRADE4args *, args);
7620 
7621         if (cs->vp == NULL) {
7622                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
7623                 goto out;
7624         }
7625 
7626         if (cs->vp->v_type != VREG) {
7627                 *cs->statusp = resp->status = NFS4ERR_INVAL;
7628                 return;
7629         }
7630 
7631         status = rfs4_get_state(&args->open_stateid, &sp, RFS4_DBS_VALID);
7632         if (status != NFS4_OK) {
7633                 *cs->statusp = resp->status = status;
7634                 goto out;
7635         }
7636 
7637         /* Ensure specified filehandle matches */
7638         if (cs->vp != sp->rs_finfo->rf_vp) {
7639                 rfs4_state_rele(sp);
7640                 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7641                 goto out;
7642         }
7643 
7644         /* hold off other access to open_owner while we tinker */
7645         rfs4_sw_enter(&sp->rs_owner->ro_sw);
7646 
7647         switch (rfs4_check_stateid_seqid(sp, &args->open_stateid)) {
7648         case NFS4_CHECK_STATEID_OKAY:
7649                 if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7650                     resop) != NFS4_CHKSEQ_OKAY) {
7651                         *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7652                         goto end;
7653                 }
7654                 break;
7655         case NFS4_CHECK_STATEID_OLD:
7656                 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7657                 goto end;
7658         case NFS4_CHECK_STATEID_BAD:
7659                 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7660                 goto end;
7661         case NFS4_CHECK_STATEID_EXPIRED:
7662                 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
7663                 goto end;
7664         case NFS4_CHECK_STATEID_CLOSED:
7665                 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7666                 goto end;
7667         case NFS4_CHECK_STATEID_UNCONFIRMED:
7668                 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7669                 goto end;
7670         case NFS4_CHECK_STATEID_REPLAY:
7671                 /* Check the sequence id for the open owner */
7672                 switch (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7673                     resop)) {
7674                 case NFS4_CHKSEQ_OKAY:
7675                         /*
7676                          * This is replayed stateid; if seqid matches
7677                          * next expected, then client is using wrong seqid.
7678                          */
7679                         /* fall through */
7680                 case NFS4_CHKSEQ_BAD:
7681                         *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7682                         goto end;
7683                 case NFS4_CHKSEQ_REPLAY:
7684                         /*
7685                          * Note this case is the duplicate case so
7686                          * resp->status is already set.
7687                          */
7688                         *cs->statusp = resp->status;
7689                         rfs4_update_lease(sp->rs_owner->ro_client);
7690                         goto end;
7691                 }
7692                 break;
7693         default:
7694                 ASSERT(FALSE);
7695                 break;
7696         }
7697 
7698         rfs4_dbe_lock(sp->rs_dbe);
7699         /*
7700          * Check that the new access modes and deny modes are valid.
7701          * Check that no invalid bits are set.
7702          */
7703         if ((access & ~(OPEN4_SHARE_ACCESS_READ | OPEN4_SHARE_ACCESS_WRITE)) ||
7704             (deny & ~(OPEN4_SHARE_DENY_READ | OPEN4_SHARE_DENY_WRITE))) {
7705                 *cs->statusp = resp->status = NFS4ERR_INVAL;
7706                 rfs4_update_open_sequence(sp->rs_owner);
7707                 rfs4_dbe_unlock(sp->rs_dbe);
7708                 goto end;
7709         }
7710 
7711         /*
7712          * The new modes must be a subset of the current modes and
7713          * the access must specify at least one mode. To test that
7714          * the new mode is a subset of the current modes we bitwise
7715          * AND them together and check that the result equals the new
7716          * mode. For example:
7717          * New mode, access == R and current mode, sp->rs_open_access  == RW
7718          * access & sp->rs_open_access == R == access, so the new access mode
7719          * is valid. Consider access == RW, sp->rs_open_access = R
7720          * access & sp->rs_open_access == R != access, so the new access mode
7721          * is invalid.
7722          */
7723         if ((access & sp->rs_open_access) != access ||
7724             (deny & sp->rs_open_deny) != deny ||
7725             (access &
7726             (OPEN4_SHARE_ACCESS_READ | OPEN4_SHARE_ACCESS_WRITE)) == 0) {
7727                 *cs->statusp = resp->status = NFS4ERR_INVAL;
7728                 rfs4_update_open_sequence(sp->rs_owner);
7729                 rfs4_dbe_unlock(sp->rs_dbe);
7730                 goto end;
7731         }
7732 
7733         /*
7734          * Release any share locks associated with this stateID.
7735          * Strictly speaking, this violates the spec because the
7736          * spec effectively requires that open downgrade be atomic.
7737          * At present, fs_shrlock does not have this capability.
7738          */
7739         (void) rfs4_unshare(sp);
7740 
7741         status = rfs4_share(sp, access, deny);
7742         if (status != NFS4_OK) {
7743                 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
7744                 rfs4_update_open_sequence(sp->rs_owner);
7745                 rfs4_dbe_unlock(sp->rs_dbe);
7746                 goto end;
7747         }
7748 
7749         fp = sp->rs_finfo;
7750         rfs4_dbe_lock(fp->rf_dbe);
7751 
7752         /*
7753          * If the current mode has deny read and the new mode
7754          * does not, decrement the number of deny read mode bits
7755          * and if it goes to zero turn off the deny read bit
7756          * on the file.
7757          */
7758         if ((sp->rs_open_deny & OPEN4_SHARE_DENY_READ) &&
7759             (deny & OPEN4_SHARE_DENY_READ) == 0) {
7760                 fp->rf_deny_read--;
7761                 if (fp->rf_deny_read == 0)
7762                         fp->rf_share_deny &= ~OPEN4_SHARE_DENY_READ;
7763         }
7764 
7765         /*
7766          * If the current mode has deny write and the new mode
7767          * does not, decrement the number of deny write mode bits
7768          * and if it goes to zero turn off the deny write bit
7769          * on the file.
7770          */
7771         if ((sp->rs_open_deny & OPEN4_SHARE_DENY_WRITE) &&
7772             (deny & OPEN4_SHARE_DENY_WRITE) == 0) {
7773                 fp->rf_deny_write--;
7774                 if (fp->rf_deny_write == 0)
7775                         fp->rf_share_deny &= ~OPEN4_SHARE_DENY_WRITE;
7776         }
7777 
7778         /*
7779          * If the current mode has access read and the new mode
7780          * does not, decrement the number of access read mode bits
7781          * and if it goes to zero turn off the access read bit
7782          * on the file.  set fflags to FREAD for the call to
7783          * vn_open_downgrade().
7784          */
7785         if ((sp->rs_open_access & OPEN4_SHARE_ACCESS_READ) &&
7786             (access & OPEN4_SHARE_ACCESS_READ) == 0) {
7787                 fp->rf_access_read--;
7788                 if (fp->rf_access_read == 0)
7789                         fp->rf_share_access &= ~OPEN4_SHARE_ACCESS_READ;
7790                 fflags |= FREAD;
7791         }
7792 
7793         /*
7794          * If the current mode has access write and the new mode
7795          * does not, decrement the number of access write mode bits
7796          * and if it goes to zero turn off the access write bit
7797          * on the file.  set fflags to FWRITE for the call to
7798          * vn_open_downgrade().
7799          */
7800         if ((sp->rs_open_access & OPEN4_SHARE_ACCESS_WRITE) &&
7801             (access & OPEN4_SHARE_ACCESS_WRITE) == 0) {
7802                 fp->rf_access_write--;
7803                 if (fp->rf_access_write == 0)
7804                         fp->rf_share_deny &= ~OPEN4_SHARE_ACCESS_WRITE;
7805                 fflags |= FWRITE;
7806         }
7807 
7808         /* Check that the file is still accessible */
7809         ASSERT(fp->rf_share_access);
7810 
7811         rfs4_dbe_unlock(fp->rf_dbe);
7812 
7813         /* now set the new open access and deny modes */
7814         sp->rs_open_access = access;
7815         sp->rs_open_deny = deny;
7816 
7817         /*
7818          * we successfully downgraded the share lock, now we need to downgrade
7819          * the open. it is possible that the downgrade was only for a deny
7820          * mode and we have nothing else to do.
7821          */
7822         if ((fflags & (FREAD|FWRITE)) != 0)
7823                 vn_open_downgrade(cs->vp, fflags);
7824 
7825         /* Update the stateid */
7826         next_stateid(&sp->rs_stateid);
7827         resp->open_stateid = sp->rs_stateid.stateid;
7828 
7829         rfs4_dbe_unlock(sp->rs_dbe);
7830 
7831         *cs->statusp = resp->status = NFS4_OK;
7832         /* Update the lease */
7833         rfs4_update_lease(sp->rs_owner->ro_client);
7834         /* And the sequence */
7835         rfs4_update_open_sequence(sp->rs_owner);
7836         rfs4_update_open_resp(sp->rs_owner, resop, NULL);
7837 
7838 end:
7839         rfs4_sw_exit(&sp->rs_owner->ro_sw);
7840         rfs4_state_rele(sp);
7841 out:
7842         DTRACE_NFSV4_2(op__open__downgrade__done, struct compound_state *, cs,
7843             OPEN_DOWNGRADE4res *, resp);
7844 }
7845 
7846 static void *
7847 memstr(const void *s1, const char *s2, size_t n)
7848 {
7849         size_t l = strlen(s2);
7850         char *p = (char *)s1;
7851 
7852         while (n >= l) {
7853                 if (bcmp(p, s2, l) == 0)
7854                         return (p);
7855                 p++;
7856                 n--;
7857         }
7858 
7859         return (NULL);
7860 }
7861 
7862 /*
7863  * The logic behind this function is detailed in the NFSv4 RFC in the
7864  * SETCLIENTID operation description under IMPLEMENTATION.  Refer to
7865  * that section for explicit guidance to server behavior for
7866  * SETCLIENTID.
7867  */
7868 void
7869 rfs4_op_setclientid(nfs_argop4 *argop, nfs_resop4 *resop,
7870     struct svc_req *req, struct compound_state *cs)
7871 {
7872         SETCLIENTID4args *args = &argop->nfs_argop4_u.opsetclientid;
7873         SETCLIENTID4res *res = &resop->nfs_resop4_u.opsetclientid;
7874         rfs4_client_t *cp, *newcp, *cp_confirmed, *cp_unconfirmed;
7875         rfs4_clntip_t *ci;
7876         bool_t create;
7877         char *addr, *netid;
7878         int len;
7879 
7880         DTRACE_NFSV4_2(op__setclientid__start, struct compound_state *, cs,
7881             SETCLIENTID4args *, args);
7882 retry:
7883         newcp = cp_confirmed = cp_unconfirmed = NULL;
7884 
7885         /*
7886          * Save the caller's IP address
7887          */
7888         args->client.cl_addr =
7889             (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
7890 
7891         /*
7892          * Record if it is a Solaris client that cannot handle referrals.
7893          */
7894         if (memstr(args->client.id_val, "Solaris", args->client.id_len) &&
7895             !memstr(args->client.id_val, "+referrals", args->client.id_len)) {
7896                 /* Add a "yes, it's downrev" record */
7897                 create = TRUE;
7898                 ci = rfs4_find_clntip(args->client.cl_addr, &create);
7899                 ASSERT(ci != NULL);
7900                 rfs4_dbe_rele(ci->ri_dbe);
7901         } else {
7902                 /* Remove any previous record */
7903                 rfs4_invalidate_clntip(args->client.cl_addr);
7904         }
7905 
7906         /*
7907          * In search of an EXISTING client matching the incoming
7908          * request to establish a new client identifier at the server
7909          */
7910         create = TRUE;
7911         cp = rfs4_findclient(&args->client, &create, NULL);
7912 
7913         /* Should never happen */
7914         ASSERT(cp != NULL);
7915 
7916         if (cp == NULL) {
7917                 *cs->statusp = res->status = NFS4ERR_SERVERFAULT;
7918                 goto out;
7919         }
7920 
7921         /*
7922          * Easiest case. Client identifier is newly created and is
7923          * unconfirmed.  Also note that for this case, no other
7924          * entries exist for the client identifier.  Nothing else to
7925          * check.  Just setup the response and respond.
7926          */
7927         if (create) {
7928                 *cs->statusp = res->status = NFS4_OK;
7929                 res->SETCLIENTID4res_u.resok4.clientid = cp->rc_clientid;
7930                 res->SETCLIENTID4res_u.resok4.setclientid_confirm =
7931                     cp->rc_confirm_verf;
7932                 /* Setup callback information; CB_NULL confirmation later */
7933                 rfs4_client_setcb(cp, &args->callback, args->callback_ident);
7934 
7935                 rfs4_client_rele(cp);
7936                 goto out;
7937         }
7938 
7939         /*
7940          * An existing, confirmed client may exist but it may not have
7941          * been active for at least one lease period.  If so, then
7942          * "close" the client and create a new client identifier
7943          */
7944         if (rfs4_lease_expired(cp)) {
7945                 rfs4_client_close(cp);
7946                 goto retry;
7947         }
7948 
7949         if (cp->rc_need_confirm == TRUE)
7950                 cp_unconfirmed = cp;
7951         else
7952                 cp_confirmed = cp;
7953 
7954         cp = NULL;
7955 
7956         /*
7957          * We have a confirmed client, now check for an
7958          * unconfimred entry
7959          */
7960         if (cp_confirmed) {
7961                 /* If creds don't match then client identifier is inuse */
7962                 if (!creds_ok(cp_confirmed->rc_cr_set, req, cs)) {
7963                         rfs4_cbinfo_t *cbp;
7964                         /*
7965                          * Some one else has established this client
7966                          * id. Try and say * who they are. We will use
7967                          * the call back address supplied by * the
7968                          * first client.
7969                          */
7970                         *cs->statusp = res->status = NFS4ERR_CLID_INUSE;
7971 
7972                         addr = netid = NULL;
7973 
7974                         cbp = &cp_confirmed->rc_cbinfo;
7975                         if (cbp->cb_callback.cb_location.r_addr &&
7976                             cbp->cb_callback.cb_location.r_netid) {
7977                                 cb_client4 *cbcp = &cbp->cb_callback;
7978 
7979                                 len = strlen(cbcp->cb_location.r_addr)+1;
7980                                 addr = kmem_alloc(len, KM_SLEEP);
7981                                 bcopy(cbcp->cb_location.r_addr, addr, len);
7982                                 len = strlen(cbcp->cb_location.r_netid)+1;
7983                                 netid = kmem_alloc(len, KM_SLEEP);
7984                                 bcopy(cbcp->cb_location.r_netid, netid, len);
7985                         }
7986 
7987                         res->SETCLIENTID4res_u.client_using.r_addr = addr;
7988                         res->SETCLIENTID4res_u.client_using.r_netid = netid;
7989 
7990                         rfs4_client_rele(cp_confirmed);
7991                 }
7992 
7993                 /*
7994                  * Confirmed, creds match, and verifier matches; must
7995                  * be an update of the callback info
7996                  */
7997                 if (cp_confirmed->rc_nfs_client.verifier ==
7998                     args->client.verifier) {
7999                         /* Setup callback information */
8000                         rfs4_client_setcb(cp_confirmed, &args->callback,
8001                             args->callback_ident);
8002 
8003                         /* everything okay -- move ahead */
8004                         *cs->statusp = res->status = NFS4_OK;
8005                         res->SETCLIENTID4res_u.resok4.clientid =
8006                             cp_confirmed->rc_clientid;
8007 
8008                         /* update the confirm_verifier and return it */
8009                         rfs4_client_scv_next(cp_confirmed);
8010                         res->SETCLIENTID4res_u.resok4.setclientid_confirm =
8011                             cp_confirmed->rc_confirm_verf;
8012 
8013                         rfs4_client_rele(cp_confirmed);
8014                         goto out;
8015                 }
8016 
8017                 /*
8018                  * Creds match but the verifier doesn't.  Must search
8019                  * for an unconfirmed client that would be replaced by
8020                  * this request.
8021                  */
8022                 create = FALSE;
8023                 cp_unconfirmed = rfs4_findclient(&args->client, &create,
8024                     cp_confirmed);
8025         }
8026 
8027         /*
8028          * At this point, we have taken care of the brand new client
8029          * struct, INUSE case, update of an existing, and confirmed
8030          * client struct.
8031          */
8032 
8033         /*
8034          * check to see if things have changed while we originally
8035          * picked up the client struct.  If they have, then return and
8036          * retry the processing of this SETCLIENTID request.
8037          */
8038         if (cp_unconfirmed) {
8039                 rfs4_dbe_lock(cp_unconfirmed->rc_dbe);
8040                 if (!cp_unconfirmed->rc_need_confirm) {
8041                         rfs4_dbe_unlock(cp_unconfirmed->rc_dbe);
8042                         rfs4_client_rele(cp_unconfirmed);
8043                         if (cp_confirmed)
8044                                 rfs4_client_rele(cp_confirmed);
8045                         goto retry;
8046                 }
8047                 /* do away with the old unconfirmed one */
8048                 rfs4_dbe_invalidate(cp_unconfirmed->rc_dbe);
8049                 rfs4_dbe_unlock(cp_unconfirmed->rc_dbe);
8050                 rfs4_client_rele(cp_unconfirmed);
8051                 cp_unconfirmed = NULL;
8052         }
8053 
8054         /*
8055          * This search will temporarily hide the confirmed client
8056          * struct while a new client struct is created as the
8057          * unconfirmed one.
8058          */
8059         create = TRUE;
8060         newcp = rfs4_findclient(&args->client, &create, cp_confirmed);
8061 
8062         ASSERT(newcp != NULL);
8063 
8064         if (newcp == NULL) {
8065                 *cs->statusp = res->status = NFS4ERR_SERVERFAULT;
8066                 rfs4_client_rele(cp_confirmed);
8067                 goto out;
8068         }
8069 
8070         /*
8071          * If one was not created, then a similar request must be in
8072          * process so release and start over with this one
8073          */
8074         if (create != TRUE) {
8075                 rfs4_client_rele(newcp);
8076                 if (cp_confirmed)
8077                         rfs4_client_rele(cp_confirmed);
8078                 goto retry;
8079         }
8080 
8081         *cs->statusp = res->status = NFS4_OK;
8082         res->SETCLIENTID4res_u.resok4.clientid = newcp->rc_clientid;
8083         res->SETCLIENTID4res_u.resok4.setclientid_confirm =
8084             newcp->rc_confirm_verf;
8085         /* Setup callback information; CB_NULL confirmation later */
8086         rfs4_client_setcb(newcp, &args->callback, args->callback_ident);
8087 
8088         newcp->rc_cp_confirmed = cp_confirmed;
8089 
8090         rfs4_client_rele(newcp);
8091 
8092 out:
8093         DTRACE_NFSV4_2(op__setclientid__done, struct compound_state *, cs,
8094             SETCLIENTID4res *, res);
8095 }
8096 
8097 /*ARGSUSED*/
8098 void
8099 rfs4_op_setclientid_confirm(nfs_argop4 *argop, nfs_resop4 *resop,
8100     struct svc_req *req, struct compound_state *cs)
8101 {
8102         SETCLIENTID_CONFIRM4args *args =
8103             &argop->nfs_argop4_u.opsetclientid_confirm;
8104         SETCLIENTID_CONFIRM4res *res =
8105             &resop->nfs_resop4_u.opsetclientid_confirm;
8106         rfs4_client_t *cp, *cptoclose = NULL;
8107 
8108         DTRACE_NFSV4_2(op__setclientid__confirm__start,
8109             struct compound_state *, cs,
8110             SETCLIENTID_CONFIRM4args *, args);
8111 
8112         *cs->statusp = res->status = NFS4_OK;
8113 
8114         cp = rfs4_findclient_by_id(args->clientid, TRUE);
8115 
8116         if (cp == NULL) {
8117                 *cs->statusp = res->status =
8118                     rfs4_check_clientid(&args->clientid, 1);
8119                 goto out;
8120         }
8121 
8122         if (!creds_ok(cp, req, cs)) {
8123                 *cs->statusp = res->status = NFS4ERR_CLID_INUSE;
8124                 rfs4_client_rele(cp);
8125                 goto out;
8126         }
8127 
8128         /* If the verifier doesn't match, the record doesn't match */
8129         if (cp->rc_confirm_verf != args->setclientid_confirm) {
8130                 *cs->statusp = res->status = NFS4ERR_STALE_CLIENTID;
8131                 rfs4_client_rele(cp);
8132                 goto out;
8133         }
8134 
8135         rfs4_dbe_lock(cp->rc_dbe);
8136         cp->rc_need_confirm = FALSE;
8137         if (cp->rc_cp_confirmed) {
8138                 cptoclose = cp->rc_cp_confirmed;
8139                 cptoclose->rc_ss_remove = 1;
8140                 cp->rc_cp_confirmed = NULL;
8141         }
8142 
8143         /*
8144          * Update the client's associated server instance, if it's changed
8145          * since the client was created.
8146          */
8147         if (rfs4_servinst(cp) != rfs4_cur_servinst)
8148                 rfs4_servinst_assign(cp, rfs4_cur_servinst);
8149 
8150         /*
8151          * Record clientid in stable storage.
8152          * Must be done after server instance has been assigned.
8153          */
8154         rfs4_ss_clid(cp);
8155 
8156         rfs4_dbe_unlock(cp->rc_dbe);
8157 
8158         if (cptoclose)
8159                 /* don't need to rele, client_close does it */
8160                 rfs4_client_close(cptoclose);
8161 
8162         /* If needed, initiate CB_NULL call for callback path */
8163         rfs4_deleg_cb_check(cp);
8164         rfs4_update_lease(cp);
8165 
8166         /*
8167          * Check to see if client can perform reclaims
8168          */
8169         rfs4_ss_chkclid(cp);
8170 
8171         rfs4_client_rele(cp);
8172 
8173 out:
8174         DTRACE_NFSV4_2(op__setclientid__confirm__done,
8175             struct compound_state *, cs,
8176             SETCLIENTID_CONFIRM4 *, res);
8177 }
8178 
8179 
8180 /*ARGSUSED*/
8181 void
8182 rfs4_op_close(nfs_argop4 *argop, nfs_resop4 *resop,
8183     struct svc_req *req, struct compound_state *cs)
8184 {
8185         CLOSE4args *args = &argop->nfs_argop4_u.opclose;
8186         CLOSE4res *resp = &resop->nfs_resop4_u.opclose;
8187         rfs4_state_t *sp;
8188         nfsstat4 status;
8189 
8190         DTRACE_NFSV4_2(op__close__start, struct compound_state *, cs,
8191             CLOSE4args *, args);
8192 
8193         if (cs->vp == NULL) {
8194                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
8195                 goto out;
8196         }
8197 
8198         status = rfs4_get_state(&args->open_stateid, &sp, RFS4_DBS_INVALID);
8199         if (status != NFS4_OK) {
8200                 *cs->statusp = resp->status = status;
8201                 goto out;
8202         }
8203 
8204         /* Ensure specified filehandle matches */
8205         if (cs->vp != sp->rs_finfo->rf_vp) {
8206                 rfs4_state_rele(sp);
8207                 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8208                 goto out;
8209         }
8210 
8211         /* hold off other access to open_owner while we tinker */
8212         rfs4_sw_enter(&sp->rs_owner->ro_sw);
8213 
8214         switch (rfs4_check_stateid_seqid(sp, &args->open_stateid)) {
8215         case NFS4_CHECK_STATEID_OKAY:
8216                 if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
8217                     resop) != NFS4_CHKSEQ_OKAY) {
8218                         *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8219                         goto end;
8220                 }
8221                 break;
8222         case NFS4_CHECK_STATEID_OLD:
8223                 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8224                 goto end;
8225         case NFS4_CHECK_STATEID_BAD:
8226                 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8227                 goto end;
8228         case NFS4_CHECK_STATEID_EXPIRED:
8229                 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
8230                 goto end;
8231         case NFS4_CHECK_STATEID_CLOSED:
8232                 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8233                 goto end;
8234         case NFS4_CHECK_STATEID_UNCONFIRMED:
8235                 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8236                 goto end;
8237         case NFS4_CHECK_STATEID_REPLAY:
8238                 /* Check the sequence id for the open owner */
8239                 switch (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
8240                     resop)) {
8241                 case NFS4_CHKSEQ_OKAY:
8242                         /*
8243                          * This is replayed stateid; if seqid matches
8244                          * next expected, then client is using wrong seqid.
8245                          */
8246                         /* FALL THROUGH */
8247                 case NFS4_CHKSEQ_BAD:
8248                         *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8249                         goto end;
8250                 case NFS4_CHKSEQ_REPLAY:
8251                         /*
8252                          * Note this case is the duplicate case so
8253                          * resp->status is already set.
8254                          */
8255                         *cs->statusp = resp->status;
8256                         rfs4_update_lease(sp->rs_owner->ro_client);
8257                         goto end;
8258                 }
8259                 break;
8260         default:
8261                 ASSERT(FALSE);
8262                 break;
8263         }
8264 
8265         rfs4_dbe_lock(sp->rs_dbe);
8266 
8267         /* Update the stateid. */
8268         next_stateid(&sp->rs_stateid);
8269         resp->open_stateid = sp->rs_stateid.stateid;
8270 
8271         rfs4_dbe_unlock(sp->rs_dbe);
8272 
8273         rfs4_update_lease(sp->rs_owner->ro_client);
8274         rfs4_update_open_sequence(sp->rs_owner);
8275         rfs4_update_open_resp(sp->rs_owner, resop, NULL);
8276 
8277         rfs4_state_close(sp, FALSE, FALSE, cs->cr);
8278 
8279         *cs->statusp = resp->status = status;
8280 
8281 end:
8282         rfs4_sw_exit(&sp->rs_owner->ro_sw);
8283         rfs4_state_rele(sp);
8284 out:
8285         DTRACE_NFSV4_2(op__close__done, struct compound_state *, cs,
8286             CLOSE4res *, resp);
8287 }
8288 
8289 /*
8290  * Manage the counts on the file struct and close all file locks
8291  */
8292 /*ARGSUSED*/
8293 void
8294 rfs4_release_share_lock_state(rfs4_state_t *sp, cred_t *cr,
8295     bool_t close_of_client)
8296 {
8297         rfs4_file_t *fp = sp->rs_finfo;
8298         rfs4_lo_state_t *lsp;
8299         int fflags = 0;
8300 
8301         /*
8302          * If this call is part of the larger closing down of client
8303          * state then it is just easier to release all locks
8304          * associated with this client instead of going through each
8305          * individual file and cleaning locks there.
8306          */
8307         if (close_of_client) {
8308                 if (sp->rs_owner->ro_client->rc_unlksys_completed == FALSE &&
8309                     !list_is_empty(&sp->rs_lostatelist) &&
8310                     sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID) {
8311                         /* Is the PxFS kernel module loaded? */
8312                         if (lm_remove_file_locks != NULL) {
8313                                 int new_sysid;
8314 
8315                                 /* Encode the cluster nodeid in new sysid */
8316                                 new_sysid = sp->rs_owner->ro_client->rc_sysidt;
8317                                 lm_set_nlmid_flk(&new_sysid);
8318 
8319                                 /*
8320                                  * This PxFS routine removes file locks for a
8321                                  * client over all nodes of a cluster.
8322                                  */
8323                                 NFS4_DEBUG(rfs4_debug, (CE_NOTE,
8324                                     "lm_remove_file_locks(sysid=0x%x)\n",
8325                                     new_sysid));
8326                                 (*lm_remove_file_locks)(new_sysid);
8327                         } else {
8328                                 struct flock64 flk;
8329 
8330                                 /* Release all locks for this client */
8331                                 flk.l_type = F_UNLKSYS;
8332                                 flk.l_whence = 0;
8333                                 flk.l_start = 0;
8334                                 flk.l_len = 0;
8335                                 flk.l_sysid =
8336                                     sp->rs_owner->ro_client->rc_sysidt;
8337                                 flk.l_pid = 0;
8338                                 (void) VOP_FRLOCK(sp->rs_finfo->rf_vp, F_SETLK,
8339                                     &flk, F_REMOTELOCK | FREAD | FWRITE,
8340                                     (u_offset_t)0, NULL, CRED(), NULL);
8341                         }
8342 
8343                         sp->rs_owner->ro_client->rc_unlksys_completed = TRUE;
8344                 }
8345         }
8346 
8347         /*
8348          * Release all locks on this file by this lock owner or at
8349          * least mark the locks as having been released
8350          */
8351         for (lsp = list_head(&sp->rs_lostatelist); lsp != NULL;
8352             lsp = list_next(&sp->rs_lostatelist, lsp)) {
8353                 lsp->rls_locks_cleaned = TRUE;
8354 
8355                 /* Was this already taken care of above? */
8356                 if (!close_of_client &&
8357                     sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID)
8358                         (void) cleanlocks(sp->rs_finfo->rf_vp,
8359                             lsp->rls_locker->rl_pid,
8360                             lsp->rls_locker->rl_client->rc_sysidt);
8361         }
8362 
8363         /*
8364          * Release any shrlocks associated with this open state ID.
8365          * This must be done before the rfs4_state gets marked closed.
8366          */
8367         if (sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID)
8368                 (void) rfs4_unshare(sp);
8369 
8370         if (sp->rs_open_access) {
8371                 rfs4_dbe_lock(fp->rf_dbe);
8372 
8373                 /*
8374                  * Decrement the count for each access and deny bit that this
8375                  * state has contributed to the file.
8376                  * If the file counts go to zero
8377                  * clear the appropriate bit in the appropriate mask.
8378                  */
8379                 if (sp->rs_open_access & OPEN4_SHARE_ACCESS_READ) {
8380                         fp->rf_access_read--;
8381                         fflags |= FREAD;
8382                         if (fp->rf_access_read == 0)
8383                                 fp->rf_share_access &= ~OPEN4_SHARE_ACCESS_READ;
8384                 }
8385                 if (sp->rs_open_access & OPEN4_SHARE_ACCESS_WRITE) {
8386                         fp->rf_access_write--;
8387                         fflags |= FWRITE;
8388                         if (fp->rf_access_write == 0)
8389                                 fp->rf_share_access &=
8390                                     ~OPEN4_SHARE_ACCESS_WRITE;
8391                 }
8392                 if (sp->rs_open_deny & OPEN4_SHARE_DENY_READ) {
8393                         fp->rf_deny_read--;
8394                         if (fp->rf_deny_read == 0)
8395                                 fp->rf_share_deny &= ~OPEN4_SHARE_DENY_READ;
8396                 }
8397                 if (sp->rs_open_deny & OPEN4_SHARE_DENY_WRITE) {
8398                         fp->rf_deny_write--;
8399                         if (fp->rf_deny_write == 0)
8400                                 fp->rf_share_deny &= ~OPEN4_SHARE_DENY_WRITE;
8401                 }
8402 
8403                 (void) VOP_CLOSE(fp->rf_vp, fflags, 1, (offset_t)0, cr, NULL);
8404 
8405                 rfs4_dbe_unlock(fp->rf_dbe);
8406 
8407                 sp->rs_open_access = 0;
8408                 sp->rs_open_deny = 0;
8409         }
8410 }
8411 
8412 /*
8413  * lock_denied: Fill in a LOCK4deneid structure given an flock64 structure.
8414  */
8415 static nfsstat4
8416 lock_denied(LOCK4denied *dp, struct flock64 *flk)
8417 {
8418         rfs4_lockowner_t *lo;
8419         rfs4_client_t *cp;
8420         uint32_t len;
8421 
8422         lo = rfs4_findlockowner_by_pid(flk->l_pid);
8423         if (lo != NULL) {
8424                 cp = lo->rl_client;
8425                 if (rfs4_lease_expired(cp)) {
8426                         rfs4_lockowner_rele(lo);
8427                         rfs4_dbe_hold(cp->rc_dbe);
8428                         rfs4_client_close(cp);
8429                         return (NFS4ERR_EXPIRED);
8430                 }
8431                 dp->owner.clientid = lo->rl_owner.clientid;
8432                 len = lo->rl_owner.owner_len;
8433                 dp->owner.owner_val = kmem_alloc(len, KM_SLEEP);
8434                 bcopy(lo->rl_owner.owner_val, dp->owner.owner_val, len);
8435                 dp->owner.owner_len = len;
8436                 rfs4_lockowner_rele(lo);
8437                 goto finish;
8438         }
8439 
8440         /*
8441          * Its not a NFS4 lock. We take advantage that the upper 32 bits
8442          * of the client id contain the boot time for a NFS4 lock. So we
8443          * fabricate and identity by setting clientid to the sysid, and
8444          * the lock owner to the pid.
8445          */
8446         dp->owner.clientid = flk->l_sysid;
8447         len = sizeof (pid_t);
8448         dp->owner.owner_len = len;
8449         dp->owner.owner_val = kmem_alloc(len, KM_SLEEP);
8450         bcopy(&flk->l_pid, dp->owner.owner_val, len);
8451 finish:
8452         dp->offset = flk->l_start;
8453         dp->length = flk->l_len;
8454 
8455         if (flk->l_type == F_RDLCK)
8456                 dp->locktype = READ_LT;
8457         else if (flk->l_type == F_WRLCK)
8458                 dp->locktype = WRITE_LT;
8459         else
8460                 return (NFS4ERR_INVAL); /* no mapping from POSIX ltype to v4 */
8461 
8462         return (NFS4_OK);
8463 }
8464 
8465 /*
8466  * The NFSv4.0 LOCK operation does not support the blocking lock (at the
8467  * NFSv4.0 protocol level) so the client needs to resend the LOCK request in a
8468  * case the lock is denied by the NFSv4.0 server.  NFSv4.0 clients are prepared
8469  * for that (obviously); they are sending the LOCK requests with some delays
8470  * between the attempts.  See nfs4frlock() and nfs4_block_and_wait() for the
8471  * locking and delay implementation at the client side.
8472  *
8473  * To make the life of the clients easier, the NFSv4.0 server tries to do some
8474  * fast retries on its own (the for loop below) in a hope the lock will be
8475  * available soon.  And if not, the client won't need to resend the LOCK
8476  * requests so fast to check the lock availability.  This basically saves some
8477  * network traffic and tries to make sure the client gets the lock ASAP.
8478  */
8479 static int
8480 setlock(vnode_t *vp, struct flock64 *flock, int flag, cred_t *cred)
8481 {
8482         int error;
8483         struct flock64 flk;
8484         int i;
8485         clock_t delaytime;
8486         int cmd;
8487         int spin_cnt = 0;
8488 
8489         cmd = nbl_need_check(vp) ? F_SETLK_NBMAND : F_SETLK;
8490 retry:
8491         delaytime = MSEC_TO_TICK_ROUNDUP(rfs4_lock_delay);
8492 
8493         for (i = 0; i < rfs4_maxlock_tries; i++) {
8494                 LOCK_PRINT(rfs4_debug, "setlock", cmd, flock);
8495                 error = VOP_FRLOCK(vp, cmd,
8496                     flock, flag, (u_offset_t)0, NULL, cred, NULL);
8497 
8498                 if (error != EAGAIN && error != EACCES)
8499                         break;
8500 
8501                 if (i < rfs4_maxlock_tries - 1) {
8502                         delay(delaytime);
8503                         delaytime *= 2;
8504                 }
8505         }
8506 
8507         if (error == EAGAIN || error == EACCES) {
8508                 /* Get the owner of the lock */
8509                 flk = *flock;
8510                 LOCK_PRINT(rfs4_debug, "setlock", F_GETLK, &flk);
8511                 if (VOP_FRLOCK(vp, F_GETLK, &flk, flag, 0, NULL, cred,
8512                     NULL) == 0) {
8513                         /*
8514                          * There's a race inherent in the current VOP_FRLOCK
8515                          * design where:
8516                          * a: "other guy" takes a lock that conflicts with a
8517                          * lock we want
8518                          * b: we attempt to take our lock (non-blocking) and
8519                          * the attempt fails.
8520                          * c: "other guy" releases the conflicting lock
8521                          * d: we ask what lock conflicts with the lock we want,
8522                          * getting F_UNLCK (no lock blocks us)
8523                          *
8524                          * If we retry the non-blocking lock attempt in this
8525                          * case (restart at step 'b') there's some possibility
8526                          * that many such attempts might fail.  However a test
8527                          * designed to actually provoke this race shows that
8528                          * the vast majority of cases require no retry, and
8529                          * only a few took as many as three retries.  Here's
8530                          * the test outcome:
8531                          *
8532                          *         number of retries    how many times we needed
8533                          *                              that many retries
8534                          *         0                    79461
8535                          *         1                      862
8536                          *         2                       49
8537                          *         3                        5
8538                          *
8539                          * Given those empirical results, we arbitrarily limit
8540                          * the retry count to ten.
8541                          *
8542                          * If we actually make to ten retries and give up,
8543                          * nothing catastrophic happens, but we're unable to
8544                          * return the information about the conflicting lock to
8545                          * the NFS client.  That's an acceptable trade off vs.
8546                          * letting this retry loop run forever.
8547                          */
8548                         if (flk.l_type == F_UNLCK) {
8549                                 if (spin_cnt++ < 10) {
8550                                         /* No longer locked, retry */
8551                                         goto retry;
8552                                 }
8553                         } else {
8554                                 *flock = flk;
8555                                 LOCK_PRINT(rfs4_debug, "setlock(blocking lock)",
8556                                     F_GETLK, &flk);
8557                         }
8558                 }
8559         }
8560 
8561         return (error);
8562 }
8563 
8564 /*ARGSUSED*/
8565 static nfsstat4
8566 rfs4_do_lock(rfs4_lo_state_t *lsp, nfs_lock_type4 locktype,
8567     offset4 offset, length4 length, cred_t *cred, nfs_resop4 *resop)
8568 {
8569         nfsstat4 status;
8570         rfs4_lockowner_t *lo = lsp->rls_locker;
8571         rfs4_state_t *sp = lsp->rls_state;
8572         struct flock64 flock;
8573         int16_t ltype;
8574         int flag;
8575         int error;
8576         sysid_t sysid;
8577         LOCK4res *lres;
8578         vnode_t *vp;
8579 
8580         if (rfs4_lease_expired(lo->rl_client)) {
8581                 return (NFS4ERR_EXPIRED);
8582         }
8583 
8584         if ((status = rfs4_client_sysid(lo->rl_client, &sysid)) != NFS4_OK)
8585                 return (status);
8586 
8587         /* Check for zero length. To lock to end of file use all ones for V4 */
8588         if (length == 0)
8589                 return (NFS4ERR_INVAL);
8590         else if (length == (length4)(~0))
8591                 length = 0;             /* Posix to end of file  */
8592 
8593 retry:
8594         rfs4_dbe_lock(sp->rs_dbe);
8595         if (sp->rs_closed == TRUE) {
8596                 rfs4_dbe_unlock(sp->rs_dbe);
8597                 return (NFS4ERR_OLD_STATEID);
8598         }
8599 
8600         if (resop->resop != OP_LOCKU) {
8601                 switch (locktype) {
8602                 case READ_LT:
8603                 case READW_LT:
8604                         if ((sp->rs_share_access
8605                             & OPEN4_SHARE_ACCESS_READ) == 0) {
8606                                 rfs4_dbe_unlock(sp->rs_dbe);
8607 
8608                                 return (NFS4ERR_OPENMODE);
8609                         }
8610                         ltype = F_RDLCK;
8611                         break;
8612                 case WRITE_LT:
8613                 case WRITEW_LT:
8614                         if ((sp->rs_share_access
8615                             & OPEN4_SHARE_ACCESS_WRITE) == 0) {
8616                                 rfs4_dbe_unlock(sp->rs_dbe);
8617 
8618                                 return (NFS4ERR_OPENMODE);
8619                         }
8620                         ltype = F_WRLCK;
8621                         break;
8622                 }
8623         } else
8624                 ltype = F_UNLCK;
8625 
8626         flock.l_type = ltype;
8627         flock.l_whence = 0;             /* SEEK_SET */
8628         flock.l_start = offset;
8629         flock.l_len = length;
8630         flock.l_sysid = sysid;
8631         flock.l_pid = lsp->rls_locker->rl_pid;
8632 
8633         /* Note that length4 is uint64_t but l_len and l_start are off64_t */
8634         if (flock.l_len < 0 || flock.l_start < 0) {
8635                 rfs4_dbe_unlock(sp->rs_dbe);
8636                 return (NFS4ERR_INVAL);
8637         }
8638 
8639         /*
8640          * N.B. FREAD has the same value as OPEN4_SHARE_ACCESS_READ and
8641          * FWRITE has the same value as OPEN4_SHARE_ACCESS_WRITE.
8642          */
8643         flag = (int)sp->rs_share_access | F_REMOTELOCK;
8644 
8645         vp = sp->rs_finfo->rf_vp;
8646         VN_HOLD(vp);
8647 
8648         /*
8649          * We need to unlock sp before we call the underlying filesystem to
8650          * acquire the file lock.
8651          */
8652         rfs4_dbe_unlock(sp->rs_dbe);
8653 
8654         error = setlock(vp, &flock, flag, cred);
8655 
8656         /*
8657          * Make sure the file is still open.  In a case the file was closed in
8658          * the meantime, clean the lock we acquired using the setlock() call
8659          * above, and return the appropriate error.
8660          */
8661         rfs4_dbe_lock(sp->rs_dbe);
8662         if (sp->rs_closed == TRUE) {
8663                 cleanlocks(vp, lsp->rls_locker->rl_pid, sysid);
8664                 rfs4_dbe_unlock(sp->rs_dbe);
8665 
8666                 VN_RELE(vp);
8667 
8668                 return (NFS4ERR_OLD_STATEID);
8669         }
8670         rfs4_dbe_unlock(sp->rs_dbe);
8671 
8672         VN_RELE(vp);
8673 
8674         if (error == 0) {
8675                 rfs4_dbe_lock(lsp->rls_dbe);
8676                 next_stateid(&lsp->rls_lockid);
8677                 rfs4_dbe_unlock(lsp->rls_dbe);
8678         }
8679 
8680         /*
8681          * N.B. We map error values to nfsv4 errors. This is differrent
8682          * than puterrno4 routine.
8683          */
8684         switch (error) {
8685         case 0:
8686                 status = NFS4_OK;
8687                 break;
8688         case EAGAIN:
8689         case EACCES:            /* Old value */
8690                 /* Can only get here if op is OP_LOCK */
8691                 ASSERT(resop->resop == OP_LOCK);
8692                 lres = &resop->nfs_resop4_u.oplock;
8693                 status = NFS4ERR_DENIED;
8694                 if (lock_denied(&lres->LOCK4res_u.denied, &flock)
8695                     == NFS4ERR_EXPIRED)
8696                         goto retry;
8697                 break;
8698         case ENOLCK:
8699                 status = NFS4ERR_DELAY;
8700                 break;
8701         case EOVERFLOW:
8702                 status = NFS4ERR_INVAL;
8703                 break;
8704         case EINVAL:
8705                 status = NFS4ERR_NOTSUPP;
8706                 break;
8707         default:
8708                 status = NFS4ERR_SERVERFAULT;
8709                 break;
8710         }
8711 
8712         return (status);
8713 }
8714 
8715 /*ARGSUSED*/
8716 void
8717 rfs4_op_lock(nfs_argop4 *argop, nfs_resop4 *resop,
8718     struct svc_req *req, struct compound_state *cs)
8719 {
8720         LOCK4args *args = &argop->nfs_argop4_u.oplock;
8721         LOCK4res *resp = &resop->nfs_resop4_u.oplock;
8722         nfsstat4 status;
8723         stateid4 *stateid;
8724         rfs4_lockowner_t *lo;
8725         rfs4_client_t *cp;
8726         rfs4_state_t *sp = NULL;
8727         rfs4_lo_state_t *lsp = NULL;
8728         bool_t ls_sw_held = FALSE;
8729         bool_t create = TRUE;
8730         bool_t lcreate = TRUE;
8731         bool_t dup_lock = FALSE;
8732         int rc;
8733 
8734         DTRACE_NFSV4_2(op__lock__start, struct compound_state *, cs,
8735             LOCK4args *, args);
8736 
8737         if (cs->vp == NULL) {
8738                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
8739                 DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
8740                     cs, LOCK4res *, resp);
8741                 return;
8742         }
8743 
8744         if (args->locker.new_lock_owner) {
8745                 /* Create a new lockowner for this instance */
8746                 open_to_lock_owner4 *olo = &args->locker.locker4_u.open_owner;
8747 
8748                 NFS4_DEBUG(rfs4_debug, (CE_NOTE, "Creating new lock owner"));
8749 
8750                 stateid = &olo->open_stateid;
8751                 status = rfs4_get_state(stateid, &sp, RFS4_DBS_VALID);
8752                 if (status != NFS4_OK) {
8753                         NFS4_DEBUG(rfs4_debug,
8754                             (CE_NOTE, "Get state failed in lock %d", status));
8755                         *cs->statusp = resp->status = status;
8756                         DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
8757                             cs, LOCK4res *, resp);
8758                         return;
8759                 }
8760 
8761                 /* Ensure specified filehandle matches */
8762                 if (cs->vp != sp->rs_finfo->rf_vp) {
8763                         rfs4_state_rele(sp);
8764                         *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8765                         DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
8766                             cs, LOCK4res *, resp);
8767                         return;
8768                 }
8769 
8770                 /* hold off other access to open_owner while we tinker */
8771                 rfs4_sw_enter(&sp->rs_owner->ro_sw);
8772 
8773                 switch (rc = rfs4_check_stateid_seqid(sp, stateid)) {
8774                 case NFS4_CHECK_STATEID_OLD:
8775                         *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8776                         goto end;
8777                 case NFS4_CHECK_STATEID_BAD:
8778                         *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8779                         goto end;
8780                 case NFS4_CHECK_STATEID_EXPIRED:
8781                         *cs->statusp = resp->status = NFS4ERR_EXPIRED;
8782                         goto end;
8783                 case NFS4_CHECK_STATEID_UNCONFIRMED:
8784                         *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8785                         goto end;
8786                 case NFS4_CHECK_STATEID_CLOSED:
8787                         *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8788                         goto end;
8789                 case NFS4_CHECK_STATEID_OKAY:
8790                 case NFS4_CHECK_STATEID_REPLAY:
8791                         switch (rfs4_check_olo_seqid(olo->open_seqid,
8792                             sp->rs_owner, resop)) {
8793                         case NFS4_CHKSEQ_OKAY:
8794                                 if (rc == NFS4_CHECK_STATEID_OKAY)
8795                                         break;
8796                                 /*
8797                                  * This is replayed stateid; if seqid
8798                                  * matches next expected, then client
8799                                  * is using wrong seqid.
8800                                  */
8801                                 /* FALLTHROUGH */
8802                         case NFS4_CHKSEQ_BAD:
8803                                 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8804                                 goto end;
8805                         case NFS4_CHKSEQ_REPLAY:
8806                                 /* This is a duplicate LOCK request */
8807                                 dup_lock = TRUE;
8808 
8809                                 /*
8810                                  * For a duplicate we do not want to
8811                                  * create a new lockowner as it should
8812                                  * already exist.
8813                                  * Turn off the lockowner create flag.
8814                                  */
8815                                 lcreate = FALSE;
8816                         }
8817                         break;
8818                 }
8819 
8820                 lo = rfs4_findlockowner(&olo->lock_owner, &lcreate);
8821                 if (lo == NULL) {
8822                         NFS4_DEBUG(rfs4_debug,
8823                             (CE_NOTE, "rfs4_op_lock: no lock owner"));
8824                         *cs->statusp = resp->status = NFS4ERR_RESOURCE;
8825                         goto end;
8826                 }
8827 
8828                 lsp = rfs4_findlo_state_by_owner(lo, sp, &create);
8829                 if (lsp == NULL) {
8830                         rfs4_update_lease(sp->rs_owner->ro_client);
8831                         /*
8832                          * Only update theh open_seqid if this is not
8833                          * a duplicate request
8834                          */
8835                         if (dup_lock == FALSE) {
8836                                 rfs4_update_open_sequence(sp->rs_owner);
8837                         }
8838 
8839                         NFS4_DEBUG(rfs4_debug,
8840                             (CE_NOTE, "rfs4_op_lock: no state"));
8841                         *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
8842                         rfs4_update_open_resp(sp->rs_owner, resop, NULL);
8843                         rfs4_lockowner_rele(lo);
8844                         goto end;
8845                 }
8846 
8847                 /*
8848                  * This is the new_lock_owner branch and the client is
8849                  * supposed to be associating a new lock_owner with
8850                  * the open file at this point.  If we find that a
8851                  * lock_owner/state association already exists and a
8852                  * successful LOCK request was returned to the client,
8853                  * an error is returned to the client since this is
8854                  * not appropriate.  The client should be using the
8855                  * existing lock_owner branch.
8856                  */
8857                 if (dup_lock == FALSE && create == FALSE) {
8858                         if (lsp->rls_lock_completed == TRUE) {
8859                                 *cs->statusp =
8860                                     resp->status = NFS4ERR_BAD_SEQID;
8861                                 rfs4_lockowner_rele(lo);
8862                                 goto end;
8863                         }
8864                 }
8865 
8866                 rfs4_update_lease(sp->rs_owner->ro_client);
8867 
8868                 /*
8869                  * Only update theh open_seqid if this is not
8870                  * a duplicate request
8871                  */
8872                 if (dup_lock == FALSE) {
8873                         rfs4_update_open_sequence(sp->rs_owner);
8874                 }
8875 
8876                 /*
8877                  * If this is a duplicate lock request, just copy the
8878                  * previously saved reply and return.
8879                  */
8880                 if (dup_lock == TRUE) {
8881                         /* verify that lock_seqid's match */
8882                         if (lsp->rls_seqid != olo->lock_seqid) {
8883                                 NFS4_DEBUG(rfs4_debug,
8884                                     (CE_NOTE, "rfs4_op_lock: Dup-Lock seqid bad"
8885                                     "lsp->seqid=%d old->seqid=%d",
8886                                     lsp->rls_seqid, olo->lock_seqid));
8887                                 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8888                         } else {
8889                                 rfs4_copy_reply(resop, &lsp->rls_reply);
8890                                 /*
8891                                  * Make sure to copy the just
8892                                  * retrieved reply status into the
8893                                  * overall compound status
8894                                  */
8895                                 *cs->statusp = resp->status;
8896                         }
8897                         rfs4_lockowner_rele(lo);
8898                         goto end;
8899                 }
8900 
8901                 rfs4_dbe_lock(lsp->rls_dbe);
8902 
8903                 /* Make sure to update the lock sequence id */
8904                 lsp->rls_seqid = olo->lock_seqid;
8905 
8906                 NFS4_DEBUG(rfs4_debug,
8907                     (CE_NOTE, "Lock seqid established as %d", lsp->rls_seqid));
8908 
8909                 /*
8910                  * This is used to signify the newly created lockowner
8911                  * stateid and its sequence number.  The checks for
8912                  * sequence number and increment don't occur on the
8913                  * very first lock request for a lockowner.
8914                  */
8915                 lsp->rls_skip_seqid_check = TRUE;
8916 
8917                 /* hold off other access to lsp while we tinker */
8918                 rfs4_sw_enter(&lsp->rls_sw);
8919                 ls_sw_held = TRUE;
8920 
8921                 rfs4_dbe_unlock(lsp->rls_dbe);
8922 
8923                 rfs4_lockowner_rele(lo);
8924         } else {
8925                 stateid = &args->locker.locker4_u.lock_owner.lock_stateid;
8926                 /* get lsp and hold the lock on the underlying file struct */
8927                 if ((status = rfs4_get_lo_state(stateid, &lsp, TRUE))
8928                     != NFS4_OK) {
8929                         *cs->statusp = resp->status = status;
8930                         DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
8931                             cs, LOCK4res *, resp);
8932                         return;
8933                 }
8934                 create = FALSE; /* We didn't create lsp */
8935 
8936                 /* Ensure specified filehandle matches */
8937                 if (cs->vp != lsp->rls_state->rs_finfo->rf_vp) {
8938                         rfs4_lo_state_rele(lsp, TRUE);
8939                         *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8940                         DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
8941                             cs, LOCK4res *, resp);
8942                         return;
8943                 }
8944 
8945                 /* hold off other access to lsp while we tinker */
8946                 rfs4_sw_enter(&lsp->rls_sw);
8947                 ls_sw_held = TRUE;
8948 
8949                 switch (rfs4_check_lo_stateid_seqid(lsp, stateid)) {
8950                 /*
8951                  * The stateid looks like it was okay (expected to be
8952                  * the next one)
8953                  */
8954                 case NFS4_CHECK_STATEID_OKAY:
8955                         /*
8956                          * The sequence id is now checked.  Determine
8957                          * if this is a replay or if it is in the
8958                          * expected (next) sequence.  In the case of a
8959                          * replay, there are two replay conditions
8960                          * that may occur.  The first is the normal
8961                          * condition where a LOCK is done with a
8962                          * NFS4_OK response and the stateid is
8963                          * updated.  That case is handled below when
8964                          * the stateid is identified as a REPLAY.  The
8965                          * second is the case where an error is
8966                          * returned, like NFS4ERR_DENIED, and the
8967                          * sequence number is updated but the stateid
8968                          * is not updated.  This second case is dealt
8969                          * with here.  So it may seem odd that the
8970                          * stateid is okay but the sequence id is a
8971                          * replay but it is okay.
8972                          */
8973                         switch (rfs4_check_lock_seqid(
8974                             args->locker.locker4_u.lock_owner.lock_seqid,
8975                             lsp, resop)) {
8976                         case NFS4_CHKSEQ_REPLAY:
8977                                 if (resp->status != NFS4_OK) {
8978                                         /*
8979                                          * Here is our replay and need
8980                                          * to verify that the last
8981                                          * response was an error.
8982                                          */
8983                                         *cs->statusp = resp->status;
8984                                         goto end;
8985                                 }
8986                                 /*
8987                                  * This is done since the sequence id
8988                                  * looked like a replay but it didn't
8989                                  * pass our check so a BAD_SEQID is
8990                                  * returned as a result.
8991                                  */
8992                                 /*FALLTHROUGH*/
8993                         case NFS4_CHKSEQ_BAD:
8994                                 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8995                                 goto end;
8996                         case NFS4_CHKSEQ_OKAY:
8997                                 /* Everything looks okay move ahead */
8998                                 break;
8999                         }
9000                         break;
9001                 case NFS4_CHECK_STATEID_OLD:
9002                         *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9003                         goto end;
9004                 case NFS4_CHECK_STATEID_BAD:
9005                         *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9006                         goto end;
9007                 case NFS4_CHECK_STATEID_EXPIRED:
9008                         *cs->statusp = resp->status = NFS4ERR_EXPIRED;
9009                         goto end;
9010                 case NFS4_CHECK_STATEID_CLOSED:
9011                         *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9012                         goto end;
9013                 case NFS4_CHECK_STATEID_REPLAY:
9014                         switch (rfs4_check_lock_seqid(
9015                             args->locker.locker4_u.lock_owner.lock_seqid,
9016                             lsp, resop)) {
9017                         case NFS4_CHKSEQ_OKAY:
9018                                 /*
9019                                  * This is a replayed stateid; if
9020                                  * seqid matches the next expected,
9021                                  * then client is using wrong seqid.
9022                                  */
9023                         case NFS4_CHKSEQ_BAD:
9024                                 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9025                                 goto end;
9026                         case NFS4_CHKSEQ_REPLAY:
9027                                 rfs4_update_lease(lsp->rls_locker->rl_client);
9028                                 *cs->statusp = status = resp->status;
9029                                 goto end;
9030                         }
9031                         break;
9032                 default:
9033                         ASSERT(FALSE);
9034                         break;
9035                 }
9036 
9037                 rfs4_update_lock_sequence(lsp);
9038                 rfs4_update_lease(lsp->rls_locker->rl_client);
9039         }
9040 
9041         /*
9042          * NFS4 only allows locking on regular files, so
9043          * verify type of object.
9044          */
9045         if (cs->vp->v_type != VREG) {
9046                 if (cs->vp->v_type == VDIR)
9047                         status = NFS4ERR_ISDIR;
9048                 else
9049                         status = NFS4ERR_INVAL;
9050                 goto out;
9051         }
9052 
9053         cp = lsp->rls_state->rs_owner->ro_client;
9054 
9055         if (rfs4_clnt_in_grace(cp) && !args->reclaim) {
9056                 status = NFS4ERR_GRACE;
9057                 goto out;
9058         }
9059 
9060         if (rfs4_clnt_in_grace(cp) && args->reclaim && !cp->rc_can_reclaim) {
9061                 status = NFS4ERR_NO_GRACE;
9062                 goto out;
9063         }
9064 
9065         if (!rfs4_clnt_in_grace(cp) && args->reclaim) {
9066                 status = NFS4ERR_NO_GRACE;
9067                 goto out;
9068         }
9069 
9070         if (lsp->rls_state->rs_finfo->rf_dinfo.rd_dtype == OPEN_DELEGATE_WRITE)
9071                 cs->deleg = TRUE;
9072 
9073         status = rfs4_do_lock(lsp, args->locktype,
9074             args->offset, args->length, cs->cr, resop);
9075 
9076 out:
9077         lsp->rls_skip_seqid_check = FALSE;
9078 
9079         *cs->statusp = resp->status = status;
9080 
9081         if (status == NFS4_OK) {
9082                 resp->LOCK4res_u.lock_stateid = lsp->rls_lockid.stateid;
9083                 lsp->rls_lock_completed = TRUE;
9084         }
9085         /*
9086          * Only update the "OPEN" response here if this was a new
9087          * lock_owner
9088          */
9089         if (sp)
9090                 rfs4_update_open_resp(sp->rs_owner, resop, NULL);
9091 
9092         rfs4_update_lock_resp(lsp, resop);
9093 
9094 end:
9095         if (lsp) {
9096                 if (ls_sw_held)
9097                         rfs4_sw_exit(&lsp->rls_sw);
9098                 /*
9099                  * If an sp obtained, then the lsp does not represent
9100                  * a lock on the file struct.
9101                  */
9102                 if (sp != NULL)
9103                         rfs4_lo_state_rele(lsp, FALSE);
9104                 else
9105                         rfs4_lo_state_rele(lsp, TRUE);
9106         }
9107         if (sp) {
9108                 rfs4_sw_exit(&sp->rs_owner->ro_sw);
9109                 rfs4_state_rele(sp);
9110         }
9111 
9112         DTRACE_NFSV4_2(op__lock__done, struct compound_state *, cs,
9113             LOCK4res *, resp);
9114 }
9115 
9116 /* free function for LOCK/LOCKT */
9117 static void
9118 lock_denied_free(nfs_resop4 *resop)
9119 {
9120         LOCK4denied *dp = NULL;
9121 
9122         switch (resop->resop) {
9123         case OP_LOCK:
9124                 if (resop->nfs_resop4_u.oplock.status == NFS4ERR_DENIED)
9125                         dp = &resop->nfs_resop4_u.oplock.LOCK4res_u.denied;
9126                 break;
9127         case OP_LOCKT:
9128                 if (resop->nfs_resop4_u.oplockt.status == NFS4ERR_DENIED)
9129                         dp = &resop->nfs_resop4_u.oplockt.denied;
9130                 break;
9131         default:
9132                 break;
9133         }
9134 
9135         if (dp)
9136                 kmem_free(dp->owner.owner_val, dp->owner.owner_len);
9137 }
9138 
9139 /*ARGSUSED*/
9140 void
9141 rfs4_op_locku(nfs_argop4 *argop, nfs_resop4 *resop,
9142     struct svc_req *req, struct compound_state *cs)
9143 {
9144         LOCKU4args *args = &argop->nfs_argop4_u.oplocku;
9145         LOCKU4res *resp = &resop->nfs_resop4_u.oplocku;
9146         nfsstat4 status;
9147         stateid4 *stateid = &args->lock_stateid;
9148         rfs4_lo_state_t *lsp;
9149 
9150         DTRACE_NFSV4_2(op__locku__start, struct compound_state *, cs,
9151             LOCKU4args *, args);
9152 
9153         if (cs->vp == NULL) {
9154                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
9155                 DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
9156                     LOCKU4res *, resp);
9157                 return;
9158         }
9159 
9160         if ((status = rfs4_get_lo_state(stateid, &lsp, TRUE)) != NFS4_OK) {
9161                 *cs->statusp = resp->status = status;
9162                 DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
9163                     LOCKU4res *, resp);
9164                 return;
9165         }
9166 
9167         /* Ensure specified filehandle matches */
9168         if (cs->vp != lsp->rls_state->rs_finfo->rf_vp) {
9169                 rfs4_lo_state_rele(lsp, TRUE);
9170                 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9171                 DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
9172                     LOCKU4res *, resp);
9173                 return;
9174         }
9175 
9176         /* hold off other access to lsp while we tinker */
9177         rfs4_sw_enter(&lsp->rls_sw);
9178 
9179         switch (rfs4_check_lo_stateid_seqid(lsp, stateid)) {
9180         case NFS4_CHECK_STATEID_OKAY:
9181                 if (rfs4_check_lock_seqid(args->seqid, lsp, resop)
9182                     != NFS4_CHKSEQ_OKAY) {
9183                         *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9184                         goto end;
9185                 }
9186                 break;
9187         case NFS4_CHECK_STATEID_OLD:
9188                 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9189                 goto end;
9190         case NFS4_CHECK_STATEID_BAD:
9191                 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9192                 goto end;
9193         case NFS4_CHECK_STATEID_EXPIRED:
9194                 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
9195                 goto end;
9196         case NFS4_CHECK_STATEID_CLOSED:
9197                 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9198                 goto end;
9199         case NFS4_CHECK_STATEID_REPLAY:
9200                 switch (rfs4_check_lock_seqid(args->seqid, lsp, resop)) {
9201                 case NFS4_CHKSEQ_OKAY:
9202                                 /*
9203                                  * This is a replayed stateid; if
9204                                  * seqid matches the next expected,
9205                                  * then client is using wrong seqid.
9206                                  */
9207                 case NFS4_CHKSEQ_BAD:
9208                         *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9209                         goto end;
9210                 case NFS4_CHKSEQ_REPLAY:
9211                         rfs4_update_lease(lsp->rls_locker->rl_client);
9212                         *cs->statusp = status = resp->status;
9213                         goto end;
9214                 }
9215                 break;
9216         default:
9217                 ASSERT(FALSE);
9218                 break;
9219         }
9220 
9221         rfs4_update_lock_sequence(lsp);
9222         rfs4_update_lease(lsp->rls_locker->rl_client);
9223 
9224         /*
9225          * NFS4 only allows locking on regular files, so
9226          * verify type of object.
9227          */
9228         if (cs->vp->v_type != VREG) {
9229                 if (cs->vp->v_type == VDIR)
9230                         status = NFS4ERR_ISDIR;
9231                 else
9232                         status = NFS4ERR_INVAL;
9233                 goto out;
9234         }
9235 
9236         if (rfs4_clnt_in_grace(lsp->rls_state->rs_owner->ro_client)) {
9237                 status = NFS4ERR_GRACE;
9238                 goto out;
9239         }
9240 
9241         status = rfs4_do_lock(lsp, args->locktype,
9242             args->offset, args->length, cs->cr, resop);
9243 
9244 out:
9245         *cs->statusp = resp->status = status;
9246 
9247         if (status == NFS4_OK)
9248                 resp->lock_stateid = lsp->rls_lockid.stateid;
9249 
9250         rfs4_update_lock_resp(lsp, resop);
9251 
9252 end:
9253         rfs4_sw_exit(&lsp->rls_sw);
9254         rfs4_lo_state_rele(lsp, TRUE);
9255 
9256         DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
9257             LOCKU4res *, resp);
9258 }
9259 
9260 /*
9261  * LOCKT is a best effort routine, the client can not be guaranteed that
9262  * the status return is still in effect by the time the reply is received.
9263  * They are numerous race conditions in this routine, but we are not required
9264  * and can not be accurate.
9265  */
9266 /*ARGSUSED*/
9267 void
9268 rfs4_op_lockt(nfs_argop4 *argop, nfs_resop4 *resop,
9269     struct svc_req *req, struct compound_state *cs)
9270 {
9271         LOCKT4args *args = &argop->nfs_argop4_u.oplockt;
9272         LOCKT4res *resp = &resop->nfs_resop4_u.oplockt;
9273         rfs4_lockowner_t *lo;
9274         rfs4_client_t *cp;
9275         bool_t create = FALSE;
9276         struct flock64 flk;
9277         int error;
9278         int flag = FREAD | FWRITE;
9279         int ltype;
9280         length4 posix_length;
9281         sysid_t sysid;
9282         pid_t pid;
9283 
9284         DTRACE_NFSV4_2(op__lockt__start, struct compound_state *, cs,
9285             LOCKT4args *, args);
9286 
9287         if (cs->vp == NULL) {
9288                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
9289                 goto out;
9290         }
9291 
9292         /*
9293          * NFS4 only allows locking on regular files, so
9294          * verify type of object.
9295          */
9296         if (cs->vp->v_type != VREG) {
9297                 if (cs->vp->v_type == VDIR)
9298                         *cs->statusp = resp->status = NFS4ERR_ISDIR;
9299                 else
9300                         *cs->statusp = resp->status =  NFS4ERR_INVAL;
9301                 goto out;
9302         }
9303 
9304         /*
9305          * Check out the clientid to ensure the server knows about it
9306          * so that we correctly inform the client of a server reboot.
9307          */
9308         if ((cp = rfs4_findclient_by_id(args->owner.clientid, FALSE))
9309             == NULL) {
9310                 *cs->statusp = resp->status =
9311                     rfs4_check_clientid(&args->owner.clientid, 0);
9312                 goto out;
9313         }
9314         if (rfs4_lease_expired(cp)) {
9315                 rfs4_client_close(cp);
9316                 /*
9317                  * Protocol doesn't allow returning NFS4ERR_STALE as
9318                  * other operations do on this check so STALE_CLIENTID
9319                  * is returned instead
9320                  */
9321                 *cs->statusp = resp->status = NFS4ERR_STALE_CLIENTID;
9322                 goto out;
9323         }
9324 
9325         if (rfs4_clnt_in_grace(cp) && !(cp->rc_can_reclaim)) {
9326                 *cs->statusp = resp->status = NFS4ERR_GRACE;
9327                 rfs4_client_rele(cp);
9328                 goto out;
9329         }
9330         rfs4_client_rele(cp);
9331 
9332         resp->status = NFS4_OK;
9333 
9334         switch (args->locktype) {
9335         case READ_LT:
9336         case READW_LT:
9337                 ltype = F_RDLCK;
9338                 break;
9339         case WRITE_LT:
9340         case WRITEW_LT:
9341                 ltype = F_WRLCK;
9342                 break;
9343         }
9344 
9345         posix_length = args->length;
9346         /* Check for zero length. To lock to end of file use all ones for V4 */
9347         if (posix_length == 0) {
9348                 *cs->statusp = resp->status = NFS4ERR_INVAL;
9349                 goto out;
9350         } else if (posix_length == (length4)(~0)) {
9351                 posix_length = 0;       /* Posix to end of file  */
9352         }
9353 
9354         /* Find or create a lockowner */
9355         lo = rfs4_findlockowner(&args->owner, &create);
9356 
9357         if (lo) {
9358                 pid = lo->rl_pid;
9359                 if ((resp->status =
9360                     rfs4_client_sysid(lo->rl_client, &sysid)) != NFS4_OK)
9361                         goto err;
9362         } else {
9363                 pid = 0;
9364                 sysid = lockt_sysid;
9365         }
9366 retry:
9367         flk.l_type = ltype;
9368         flk.l_whence = 0;               /* SEEK_SET */
9369         flk.l_start = args->offset;
9370         flk.l_len = posix_length;
9371         flk.l_sysid = sysid;
9372         flk.l_pid = pid;
9373         flag |= F_REMOTELOCK;
9374 
9375         LOCK_PRINT(rfs4_debug, "rfs4_op_lockt", F_GETLK, &flk);
9376 
9377         /* Note that length4 is uint64_t but l_len and l_start are off64_t */
9378         if (flk.l_len < 0 || flk.l_start < 0) {
9379                 resp->status = NFS4ERR_INVAL;
9380                 goto err;
9381         }
9382         error = VOP_FRLOCK(cs->vp, F_GETLK, &flk, flag, (u_offset_t)0,
9383             NULL, cs->cr, NULL);
9384 
9385         /*
9386          * N.B. We map error values to nfsv4 errors. This is differrent
9387          * than puterrno4 routine.
9388          */
9389         switch (error) {
9390         case 0:
9391                 if (flk.l_type == F_UNLCK)
9392                         resp->status = NFS4_OK;
9393                 else {
9394                         if (lock_denied(&resp->denied, &flk) == NFS4ERR_EXPIRED)
9395                                 goto retry;
9396                         resp->status = NFS4ERR_DENIED;
9397                 }
9398                 break;
9399         case EOVERFLOW:
9400                 resp->status = NFS4ERR_INVAL;
9401                 break;
9402         case EINVAL:
9403                 resp->status = NFS4ERR_NOTSUPP;
9404                 break;
9405         default:
9406                 cmn_err(CE_WARN, "rfs4_op_lockt: unexpected errno (%d)",
9407                     error);
9408                 resp->status = NFS4ERR_SERVERFAULT;
9409                 break;
9410         }
9411 
9412 err:
9413         if (lo)
9414                 rfs4_lockowner_rele(lo);
9415         *cs->statusp = resp->status;
9416 out:
9417         DTRACE_NFSV4_2(op__lockt__done, struct compound_state *, cs,
9418             LOCKT4res *, resp);
9419 }
9420 
9421 int
9422 rfs4_share(rfs4_state_t *sp, uint32_t access, uint32_t deny)
9423 {
9424         int err;
9425         int cmd;
9426         vnode_t *vp;
9427         struct shrlock shr;
9428         struct shr_locowner shr_loco;
9429         int fflags = 0;
9430 
9431         ASSERT(rfs4_dbe_islocked(sp->rs_dbe));
9432         ASSERT(sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID);
9433 
9434         if (sp->rs_closed)
9435                 return (NFS4ERR_OLD_STATEID);
9436 
9437         vp = sp->rs_finfo->rf_vp;
9438         ASSERT(vp);
9439 
9440         shr.s_access = shr.s_deny = 0;
9441 
9442         if (access & OPEN4_SHARE_ACCESS_READ) {
9443                 fflags |= FREAD;
9444                 shr.s_access |= F_RDACC;
9445         }
9446         if (access & OPEN4_SHARE_ACCESS_WRITE) {
9447                 fflags |= FWRITE;
9448                 shr.s_access |= F_WRACC;
9449         }
9450         ASSERT(shr.s_access);
9451 
9452         if (deny & OPEN4_SHARE_DENY_READ)
9453                 shr.s_deny |= F_RDDNY;
9454         if (deny & OPEN4_SHARE_DENY_WRITE)
9455                 shr.s_deny |= F_WRDNY;
9456 
9457         shr.s_pid = rfs4_dbe_getid(sp->rs_owner->ro_dbe);
9458         shr.s_sysid = sp->rs_owner->ro_client->rc_sysidt;
9459         shr_loco.sl_pid = shr.s_pid;
9460         shr_loco.sl_id = shr.s_sysid;
9461         shr.s_owner = (caddr_t)&shr_loco;
9462         shr.s_own_len = sizeof (shr_loco);
9463 
9464         cmd = nbl_need_check(vp) ? F_SHARE_NBMAND : F_SHARE;
9465 
9466         err = VOP_SHRLOCK(vp, cmd, &shr, fflags, CRED(), NULL);
9467         if (err != 0) {
9468                 if (err == EAGAIN)
9469                         err = NFS4ERR_SHARE_DENIED;
9470                 else
9471                         err = puterrno4(err);
9472                 return (err);
9473         }
9474 
9475         sp->rs_share_access |= access;
9476         sp->rs_share_deny |= deny;
9477 
9478         return (0);
9479 }
9480 
9481 int
9482 rfs4_unshare(rfs4_state_t *sp)
9483 {
9484         int err;
9485         struct shrlock shr;
9486         struct shr_locowner shr_loco;
9487 
9488         ASSERT(rfs4_dbe_islocked(sp->rs_dbe));
9489 
9490         if (sp->rs_closed || sp->rs_share_access == 0)
9491                 return (0);
9492 
9493         ASSERT(sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID);
9494         ASSERT(sp->rs_finfo->rf_vp);
9495 
9496         shr.s_access = shr.s_deny = 0;
9497         shr.s_pid = rfs4_dbe_getid(sp->rs_owner->ro_dbe);
9498         shr.s_sysid = sp->rs_owner->ro_client->rc_sysidt;
9499         shr_loco.sl_pid = shr.s_pid;
9500         shr_loco.sl_id = shr.s_sysid;
9501         shr.s_owner = (caddr_t)&shr_loco;
9502         shr.s_own_len = sizeof (shr_loco);
9503 
9504         err = VOP_SHRLOCK(sp->rs_finfo->rf_vp, F_UNSHARE, &shr, 0, CRED(),
9505             NULL);
9506         if (err != 0) {
9507                 err = puterrno4(err);
9508                 return (err);
9509         }
9510 
9511         sp->rs_share_access = 0;
9512         sp->rs_share_deny = 0;
9513 
9514         return (0);
9515 
9516 }
9517 
9518 static int
9519 rdma_setup_read_data4(READ4args *args, READ4res *rok)
9520 {
9521         struct clist    *wcl;
9522         count4          count = rok->data_len;
9523         int             wlist_len;
9524 
9525         wcl = args->wlist;
9526         if (rdma_setup_read_chunks(wcl, count, &wlist_len) == FALSE) {
9527                 return (FALSE);
9528         }
9529         wcl = args->wlist;
9530         rok->wlist_len = wlist_len;
9531         rok->wlist = wcl;
9532         return (TRUE);
9533 }
9534 
9535 /* tunable to disable server referrals */
9536 int rfs4_no_referrals = 0;
9537 
9538 /*
9539  * Find an NFS record in reparse point data.
9540  * Returns 0 for success and <0 or an errno value on failure.
9541  */
9542 int
9543 vn_find_nfs_record(vnode_t *vp, nvlist_t **nvlp, char **svcp, char **datap)
9544 {
9545         int err;
9546         char *stype, *val;
9547         nvlist_t *nvl;
9548         nvpair_t *curr;
9549 
9550         if ((nvl = reparse_init()) == NULL)
9551                 return (-1);
9552 
9553         if ((err = reparse_vnode_parse(vp, nvl)) != 0) {
9554                 reparse_free(nvl);
9555                 return (err);
9556         }
9557 
9558         curr = NULL;
9559         while ((curr = nvlist_next_nvpair(nvl, curr)) != NULL) {
9560                 if ((stype = nvpair_name(curr)) == NULL) {
9561                         reparse_free(nvl);
9562                         return (-2);
9563                 }
9564                 if (strncasecmp(stype, "NFS", 3) == 0)
9565                         break;
9566         }
9567 
9568         if ((curr == NULL) ||
9569             (nvpair_value_string(curr, &val))) {
9570                 reparse_free(nvl);
9571                 return (-3);
9572         }
9573         *nvlp = nvl;
9574         *svcp = stype;
9575         *datap = val;
9576         return (0);
9577 }
9578 
9579 int
9580 vn_is_nfs_reparse(vnode_t *vp, cred_t *cr)
9581 {
9582         nvlist_t *nvl;
9583         char *s, *d;
9584 
9585         if (rfs4_no_referrals != 0)
9586                 return (B_FALSE);
9587 
9588         if (vn_is_reparse(vp, cr, NULL) == B_FALSE)
9589                 return (B_FALSE);
9590 
9591         if (vn_find_nfs_record(vp, &nvl, &s, &d) != 0)
9592                 return (B_FALSE);
9593 
9594         reparse_free(nvl);
9595 
9596         return (B_TRUE);
9597 }
9598 
9599 /*
9600  * There is a user-level copy of this routine in ref_subr.c.
9601  * Changes should be kept in sync.
9602  */
9603 static int
9604 nfs4_create_components(char *path, component4 *comp4)
9605 {
9606         int slen, plen, ncomp;
9607         char *ori_path, *nxtc, buf[MAXNAMELEN];
9608 
9609         if (path == NULL)
9610                 return (0);
9611 
9612         plen = strlen(path) + 1;        /* include the terminator */
9613         ori_path = path;
9614         ncomp = 0;
9615 
9616         /* count number of components in the path */
9617         for (nxtc = path; nxtc < ori_path + plen; nxtc++) {
9618                 if (*nxtc == '/' || *nxtc == '\0' || *nxtc == '\n') {
9619                         if ((slen = nxtc - path) == 0) {
9620                                 path = nxtc + 1;
9621                                 continue;
9622                         }
9623 
9624                         if (comp4 != NULL) {
9625                                 bcopy(path, buf, slen);
9626                                 buf[slen] = '\0';
9627                                 (void) str_to_utf8(buf, &comp4[ncomp]);
9628                         }
9629 
9630                         ncomp++;        /* 1 valid component */
9631                         path = nxtc + 1;
9632                 }
9633                 if (*nxtc == '\0' || *nxtc == '\n')
9634                         break;
9635         }
9636 
9637         return (ncomp);
9638 }
9639 
9640 /*
9641  * There is a user-level copy of this routine in ref_subr.c.
9642  * Changes should be kept in sync.
9643  */
9644 static int
9645 make_pathname4(char *path, pathname4 *pathname)
9646 {
9647         int ncomp;
9648         component4 *comp4;
9649 
9650         if (pathname == NULL)
9651                 return (0);
9652 
9653         if (path == NULL) {
9654                 pathname->pathname4_val = NULL;
9655                 pathname->pathname4_len = 0;
9656                 return (0);
9657         }
9658 
9659         /* count number of components to alloc buffer */
9660         if ((ncomp = nfs4_create_components(path, NULL)) == 0) {
9661                 pathname->pathname4_val = NULL;
9662                 pathname->pathname4_len = 0;
9663                 return (0);
9664         }
9665         comp4 = kmem_zalloc(ncomp * sizeof (component4), KM_SLEEP);
9666 
9667         /* copy components into allocated buffer */
9668         ncomp = nfs4_create_components(path, comp4);
9669 
9670         pathname->pathname4_val = comp4;
9671         pathname->pathname4_len = ncomp;
9672 
9673         return (ncomp);
9674 }
9675 
9676 #define xdr_fs_locations4 xdr_fattr4_fs_locations
9677 
9678 fs_locations4 *
9679 fetch_referral(vnode_t *vp, cred_t *cr)
9680 {
9681         nvlist_t *nvl;
9682         char *stype, *sdata;
9683         fs_locations4 *result;
9684         char buf[1024];
9685         size_t bufsize;
9686         XDR xdr;
9687         int err;
9688 
9689         /*
9690          * Check attrs to ensure it's a reparse point
9691          */
9692         if (vn_is_reparse(vp, cr, NULL) == B_FALSE)
9693                 return (NULL);
9694 
9695         /*
9696          * Look for an NFS record and get the type and data
9697          */
9698         if (vn_find_nfs_record(vp, &nvl, &stype, &sdata) != 0)
9699                 return (NULL);
9700 
9701         /*
9702          * With the type and data, upcall to get the referral
9703          */
9704         bufsize = sizeof (buf);
9705         bzero(buf, sizeof (buf));
9706         err = reparse_kderef((const char *)stype, (const char *)sdata,
9707             buf, &bufsize);
9708         reparse_free(nvl);
9709 
9710         DTRACE_PROBE4(nfs4serv__func__referral__upcall,
9711             char *, stype, char *, sdata, char *, buf, int, err);
9712         if (err) {
9713                 cmn_err(CE_NOTE,
9714                     "reparsed daemon not running: unable to get referral (%d)",
9715                     err);
9716                 return (NULL);
9717         }
9718 
9719         /*
9720          * We get an XDR'ed record back from the kderef call
9721          */
9722         xdrmem_create(&xdr, buf, bufsize, XDR_DECODE);
9723         result = kmem_alloc(sizeof (fs_locations4), KM_SLEEP);
9724         err = xdr_fs_locations4(&xdr, result);
9725         XDR_DESTROY(&xdr);
9726         if (err != TRUE) {
9727                 DTRACE_PROBE1(nfs4serv__func__referral__upcall__xdrfail,
9728                     int, err);
9729                 return (NULL);
9730         }
9731 
9732         /*
9733          * Look at path to recover fs_root, ignoring the leading '/'
9734          */
9735         (void) make_pathname4(vp->v_path, &result->fs_root);
9736 
9737         return (result);
9738 }
9739 
9740 char *
9741 build_symlink(vnode_t *vp, cred_t *cr, size_t *strsz)
9742 {
9743         fs_locations4 *fsl;
9744         fs_location4 *fs;
9745         char *server, *path, *symbuf;
9746         static char *prefix = "/net/";
9747         int i, size, npaths;
9748         uint_t len;
9749 
9750         /* Get the referral */
9751         if ((fsl = fetch_referral(vp, cr)) == NULL)
9752                 return (NULL);
9753 
9754         /* Deal with only the first location and first server */
9755         fs = &fsl->locations_val[0];
9756         server = utf8_to_str(&fs->server_val[0], &len, NULL);
9757         if (server == NULL) {
9758                 rfs4_free_fs_locations4(fsl);
9759                 kmem_free(fsl, sizeof (fs_locations4));
9760                 return (NULL);
9761         }
9762 
9763         /* Figure out size for "/net/" + host + /path/path/path + NULL */
9764         size = strlen(prefix) + len;
9765         for (i = 0; i < fs->rootpath.pathname4_len; i++)
9766                 size += fs->rootpath.pathname4_val[i].utf8string_len + 1;
9767 
9768         /* Allocate the symlink buffer and fill it */
9769         symbuf = kmem_zalloc(size, KM_SLEEP);
9770         (void) strcat(symbuf, prefix);
9771         (void) strcat(symbuf, server);
9772         kmem_free(server, len);
9773 
9774         npaths = 0;
9775         for (i = 0; i < fs->rootpath.pathname4_len; i++) {
9776                 path = utf8_to_str(&fs->rootpath.pathname4_val[i], &len, NULL);
9777                 if (path == NULL)
9778                         continue;
9779                 (void) strcat(symbuf, "/");
9780                 (void) strcat(symbuf, path);
9781                 npaths++;
9782                 kmem_free(path, len);
9783         }
9784 
9785         rfs4_free_fs_locations4(fsl);
9786         kmem_free(fsl, sizeof (fs_locations4));
9787 
9788         if (strsz != NULL)
9789                 *strsz = size;
9790         return (symbuf);
9791 }
9792 
9793 /*
9794  * Check to see if we have a downrev Solaris client, so that we
9795  * can send it a symlink instead of a referral.
9796  */
9797 int
9798 client_is_downrev(struct svc_req *req)
9799 {
9800         struct sockaddr *ca;
9801         rfs4_clntip_t *ci;
9802         bool_t create = FALSE;
9803         int is_downrev;
9804 
9805         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
9806         ASSERT(ca);
9807         ci = rfs4_find_clntip(ca, &create);
9808         if (ci == NULL)
9809                 return (0);
9810         is_downrev = ci->ri_no_referrals;
9811         rfs4_dbe_rele(ci->ri_dbe);
9812         return (is_downrev);
9813 }