1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright 2018 Nexenta Systems, Inc.
  24  * Copyright (c) 1994, 2010, Oracle and/or its affiliates. All rights reserved.
  25  * Copyright (c) 2013 by Delphix. All rights reserved.
  26  */
  27 
  28 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
  29 /* All Rights Reserved */
  30 
  31 
  32 #include <sys/param.h>
  33 #include <sys/types.h>
  34 #include <sys/systm.h>
  35 #include <sys/cred.h>
  36 #include <sys/buf.h>
  37 #include <sys/vfs.h>
  38 #include <sys/vnode.h>
  39 #include <sys/uio.h>
  40 #include <sys/errno.h>
  41 #include <sys/sysmacros.h>
  42 #include <sys/statvfs.h>
  43 #include <sys/kmem.h>
  44 #include <sys/dirent.h>
  45 #include <sys/cmn_err.h>
  46 #include <sys/debug.h>
  47 #include <sys/systeminfo.h>
  48 #include <sys/flock.h>
  49 #include <sys/nbmlock.h>
  50 #include <sys/policy.h>
  51 #include <sys/sdt.h>
  52 
  53 #include <rpc/types.h>
  54 #include <rpc/auth.h>
  55 #include <rpc/svc.h>
  56 #include <rpc/rpc_rdma.h>
  57 
  58 #include <nfs/nfs.h>
  59 #include <nfs/export.h>
  60 #include <nfs/nfs_cmd.h>
  61 
  62 #include <sys/strsubr.h>
  63 #include <sys/tsol/label.h>
  64 #include <sys/tsol/tndb.h>
  65 
  66 #include <sys/zone.h>
  67 
  68 #include <inet/ip.h>
  69 #include <inet/ip6.h>
  70 
  71 /*
  72  * Zone global variables of NFSv3 server
  73  */
  74 typedef struct nfs3_srv {
  75         writeverf3      write3verf;
  76 } nfs3_srv_t;
  77 
  78 /*
  79  * These are the interface routines for the server side of the
  80  * Network File System.  See the NFS version 3 protocol specification
  81  * for a description of this interface.
  82  */
  83 
  84 static int      sattr3_to_vattr(sattr3 *, struct vattr *);
  85 static int      vattr_to_fattr3(struct vattr *, fattr3 *);
  86 static int      vattr_to_wcc_attr(struct vattr *, wcc_attr *);
  87 static void     vattr_to_pre_op_attr(struct vattr *, pre_op_attr *);
  88 static void     vattr_to_wcc_data(struct vattr *, struct vattr *, wcc_data *);
  89 static int      rdma_setup_read_data3(READ3args *, READ3resok *);
  90 
  91 extern int nfs_loaned_buffers;
  92 
  93 u_longlong_t nfs3_srv_caller_id;
  94 static zone_key_t rfs3_zone_key;
  95 
  96 /* ARGSUSED */
  97 void
  98 rfs3_getattr(GETATTR3args *args, GETATTR3res *resp, struct exportinfo *exi,
  99     struct svc_req *req, cred_t *cr, bool_t ro)
 100 {
 101         int error;
 102         vnode_t *vp;
 103         struct vattr va;
 104 
 105         vp = nfs3_fhtovp(&args->object, exi);
 106 
 107         DTRACE_NFSV3_5(op__getattr__start, struct svc_req *, req,
 108             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
 109             GETATTR3args *, args);
 110 
 111         if (vp == NULL) {
 112                 error = ESTALE;
 113                 goto out;
 114         }
 115 
 116         va.va_mask = AT_ALL;
 117         error = rfs4_delegated_getattr(vp, &va, 0, cr);
 118 
 119         if (!error) {
 120                 /* Lie about the object type for a referral */
 121                 if (vn_is_nfs_reparse(vp, cr))
 122                         va.va_type = VLNK;
 123 
 124                 /* overflow error if time or size is out of range */
 125                 error = vattr_to_fattr3(&va, &resp->resok.obj_attributes);
 126                 if (error)
 127                         goto out;
 128                 resp->status = NFS3_OK;
 129 
 130                 DTRACE_NFSV3_5(op__getattr__done, struct svc_req *, req,
 131                     cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
 132                     GETATTR3res *, resp);
 133 
 134                 VN_RELE(vp);
 135 
 136                 return;
 137         }
 138 
 139 out:
 140         if (curthread->t_flag & T_WOULDBLOCK) {
 141                 curthread->t_flag &= ~T_WOULDBLOCK;
 142                 resp->status = NFS3ERR_JUKEBOX;
 143         } else
 144                 resp->status = puterrno3(error);
 145 
 146         DTRACE_NFSV3_5(op__getattr__done, struct svc_req *, req,
 147             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
 148             GETATTR3res *, resp);
 149 
 150         if (vp != NULL)
 151                 VN_RELE(vp);
 152 }
 153 
 154 void *
 155 rfs3_getattr_getfh(GETATTR3args *args)
 156 {
 157 
 158         return (&args->object);
 159 }
 160 
 161 void
 162 rfs3_setattr(SETATTR3args *args, SETATTR3res *resp, struct exportinfo *exi,
 163     struct svc_req *req, cred_t *cr, bool_t ro)
 164 {
 165         int error;
 166         vnode_t *vp;
 167         struct vattr *bvap;
 168         struct vattr bva;
 169         struct vattr *avap;
 170         struct vattr ava;
 171         int flag;
 172         int in_crit = 0;
 173         struct flock64 bf;
 174         caller_context_t ct;
 175 
 176         bvap = NULL;
 177         avap = NULL;
 178 
 179         vp = nfs3_fhtovp(&args->object, exi);
 180 
 181         DTRACE_NFSV3_5(op__setattr__start, struct svc_req *, req,
 182             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
 183             SETATTR3args *, args);
 184 
 185         if (vp == NULL) {
 186                 error = ESTALE;
 187                 goto out;
 188         }
 189 
 190         error = sattr3_to_vattr(&args->new_attributes, &ava);
 191         if (error)
 192                 goto out;
 193 
 194         if (is_system_labeled()) {
 195                 bslabel_t *clabel = req->rq_label;
 196 
 197                 ASSERT(clabel != NULL);
 198                 DTRACE_PROBE2(tx__rfs3__log__info__opsetattr__clabel, char *,
 199                     "got client label from request(1)", struct svc_req *, req);
 200 
 201                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
 202                         if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
 203                             exi)) {
 204                                 resp->status = NFS3ERR_ACCES;
 205                                 goto out1;
 206                         }
 207                 }
 208         }
 209 
 210         /*
 211          * We need to specially handle size changes because of
 212          * possible conflicting NBMAND locks. Get into critical
 213          * region before VOP_GETATTR, so the size attribute is
 214          * valid when checking conflicts.
 215          *
 216          * Also, check to see if the v4 side of the server has
 217          * delegated this file.  If so, then we return JUKEBOX to
 218          * allow the client to retrasmit its request.
 219          */
 220         if (vp->v_type == VREG && (ava.va_mask & AT_SIZE)) {
 221                 if (nbl_need_check(vp)) {
 222                         nbl_start_crit(vp, RW_READER);
 223                         in_crit = 1;
 224                 }
 225         }
 226 
 227         bva.va_mask = AT_ALL;
 228         error = rfs4_delegated_getattr(vp, &bva, 0, cr);
 229 
 230         /*
 231          * If we can't get the attributes, then we can't do the
 232          * right access checking.  So, we'll fail the request.
 233          */
 234         if (error)
 235                 goto out;
 236 
 237         bvap = &bva;
 238 
 239         if (rdonly(ro, vp)) {
 240                 resp->status = NFS3ERR_ROFS;
 241                 goto out1;
 242         }
 243 
 244         if (args->guard.check &&
 245             (args->guard.obj_ctime.seconds != bva.va_ctime.tv_sec ||
 246             args->guard.obj_ctime.nseconds != bva.va_ctime.tv_nsec)) {
 247                 resp->status = NFS3ERR_NOT_SYNC;
 248                 goto out1;
 249         }
 250 
 251         if (args->new_attributes.mtime.set_it == SET_TO_CLIENT_TIME)
 252                 flag = ATTR_UTIME;
 253         else
 254                 flag = 0;
 255 
 256         /*
 257          * If the filesystem is exported with nosuid, then mask off
 258          * the setuid and setgid bits.
 259          */
 260         if ((ava.va_mask & AT_MODE) && vp->v_type == VREG &&
 261             (exi->exi_export.ex_flags & EX_NOSUID))
 262                 ava.va_mode &= ~(VSUID | VSGID);
 263 
 264         ct.cc_sysid = 0;
 265         ct.cc_pid = 0;
 266         ct.cc_caller_id = nfs3_srv_caller_id;
 267         ct.cc_flags = CC_DONTBLOCK;
 268 
 269         /*
 270          * We need to specially handle size changes because it is
 271          * possible for the client to create a file with modes
 272          * which indicate read-only, but with the file opened for
 273          * writing.  If the client then tries to set the size of
 274          * the file, then the normal access checking done in
 275          * VOP_SETATTR would prevent the client from doing so,
 276          * although it should be legal for it to do so.  To get
 277          * around this, we do the access checking for ourselves
 278          * and then use VOP_SPACE which doesn't do the access
 279          * checking which VOP_SETATTR does. VOP_SPACE can only
 280          * operate on VREG files, let VOP_SETATTR handle the other
 281          * extremely rare cases.
 282          * Also the client should not be allowed to change the
 283          * size of the file if there is a conflicting non-blocking
 284          * mandatory lock in the region the change.
 285          */
 286         if (vp->v_type == VREG && (ava.va_mask & AT_SIZE)) {
 287                 if (in_crit) {
 288                         u_offset_t offset;
 289                         ssize_t length;
 290 
 291                         if (ava.va_size < bva.va_size) {
 292                                 offset = ava.va_size;
 293                                 length = bva.va_size - ava.va_size;
 294                         } else {
 295                                 offset = bva.va_size;
 296                                 length = ava.va_size - bva.va_size;
 297                         }
 298                         if (nbl_conflict(vp, NBL_WRITE, offset, length, 0,
 299                             NULL)) {
 300                                 error = EACCES;
 301                                 goto out;
 302                         }
 303                 }
 304 
 305                 if (crgetuid(cr) == bva.va_uid && ava.va_size != bva.va_size) {
 306                         ava.va_mask &= ~AT_SIZE;
 307                         bf.l_type = F_WRLCK;
 308                         bf.l_whence = 0;
 309                         bf.l_start = (off64_t)ava.va_size;
 310                         bf.l_len = 0;
 311                         bf.l_sysid = 0;
 312                         bf.l_pid = 0;
 313                         error = VOP_SPACE(vp, F_FREESP, &bf, FWRITE,
 314                             (offset_t)ava.va_size, cr, &ct);
 315                 }
 316         }
 317 
 318         if (!error && ava.va_mask)
 319                 error = VOP_SETATTR(vp, &ava, flag, cr, &ct);
 320 
 321         /* check if a monitor detected a delegation conflict */
 322         if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
 323                 resp->status = NFS3ERR_JUKEBOX;
 324                 goto out1;
 325         }
 326 
 327         ava.va_mask = AT_ALL;
 328         avap = rfs4_delegated_getattr(vp, &ava, 0, cr) ? NULL : &ava;
 329 
 330         /*
 331          * Force modified metadata out to stable storage.
 332          */
 333         (void) VOP_FSYNC(vp, FNODSYNC, cr, &ct);
 334 
 335         if (error)
 336                 goto out;
 337 
 338         if (in_crit)
 339                 nbl_end_crit(vp);
 340 
 341         resp->status = NFS3_OK;
 342         vattr_to_wcc_data(bvap, avap, &resp->resok.obj_wcc);
 343 
 344         DTRACE_NFSV3_5(op__setattr__done, struct svc_req *, req,
 345             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
 346             SETATTR3res *, resp);
 347 
 348         VN_RELE(vp);
 349 
 350         return;
 351 
 352 out:
 353         if (curthread->t_flag & T_WOULDBLOCK) {
 354                 curthread->t_flag &= ~T_WOULDBLOCK;
 355                 resp->status = NFS3ERR_JUKEBOX;
 356         } else
 357                 resp->status = puterrno3(error);
 358 out1:
 359         DTRACE_NFSV3_5(op__setattr__done, struct svc_req *, req,
 360             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
 361             SETATTR3res *, resp);
 362 
 363         if (vp != NULL) {
 364                 if (in_crit)
 365                         nbl_end_crit(vp);
 366                 VN_RELE(vp);
 367         }
 368         vattr_to_wcc_data(bvap, avap, &resp->resfail.obj_wcc);
 369 }
 370 
 371 void *
 372 rfs3_setattr_getfh(SETATTR3args *args)
 373 {
 374 
 375         return (&args->object);
 376 }
 377 
 378 /* ARGSUSED */
 379 void
 380 rfs3_lookup(LOOKUP3args *args, LOOKUP3res *resp, struct exportinfo *exi,
 381     struct svc_req *req, cred_t *cr, bool_t ro)
 382 {
 383         int error;
 384         vnode_t *vp;
 385         vnode_t *dvp;
 386         struct vattr *vap;
 387         struct vattr va;
 388         struct vattr *dvap;
 389         struct vattr dva;
 390         nfs_fh3 *fhp;
 391         struct sec_ol sec = {0, 0};
 392         bool_t publicfh_flag = FALSE, auth_weak = FALSE;
 393         struct sockaddr *ca;
 394         char *name = NULL;
 395 
 396         dvap = NULL;
 397 
 398         if (exi != NULL)
 399                 exi_hold(exi);
 400 
 401         /*
 402          * Allow lookups from the root - the default
 403          * location of the public filehandle.
 404          */
 405         if (exi != NULL && (exi->exi_export.ex_flags & EX_PUBLIC)) {
 406                 dvp = ZONE_ROOTVP();
 407                 VN_HOLD(dvp);
 408 
 409                 DTRACE_NFSV3_5(op__lookup__start, struct svc_req *, req,
 410                     cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
 411                     LOOKUP3args *, args);
 412         } else {
 413                 dvp = nfs3_fhtovp(&args->what.dir, exi);
 414 
 415                 DTRACE_NFSV3_5(op__lookup__start, struct svc_req *, req,
 416                     cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
 417                     LOOKUP3args *, args);
 418 
 419                 if (dvp == NULL) {
 420                         error = ESTALE;
 421                         goto out;
 422                 }
 423         }
 424 
 425         dva.va_mask = AT_ALL;
 426         dvap = VOP_GETATTR(dvp, &dva, 0, cr, NULL) ? NULL : &dva;
 427 
 428         if (args->what.name == nfs3nametoolong) {
 429                 resp->status = NFS3ERR_NAMETOOLONG;
 430                 goto out1;
 431         }
 432 
 433         if (args->what.name == NULL || *(args->what.name) == '\0') {
 434                 resp->status = NFS3ERR_ACCES;
 435                 goto out1;
 436         }
 437 
 438         fhp = &args->what.dir;
 439         ASSERT3P(curzone, ==, exi->exi_zone); /* exi is guaranteed non-NULL. */
 440         if (strcmp(args->what.name, "..") == 0 &&
 441             EQFID(&exi->exi_fid, FH3TOFIDP(fhp))) {
 442                 if ((exi->exi_export.ex_flags & EX_NOHIDE) &&
 443                     ((dvp->v_flag & VROOT) || VN_IS_CURZONEROOT(dvp))) {
 444                         /*
 445                          * special case for ".." and 'nohide'exported root
 446                          */
 447                         if (rfs_climb_crossmnt(&dvp, &exi, cr) != 0) {
 448                                 resp->status = NFS3ERR_ACCES;
 449                                 goto out1;
 450                         }
 451                 } else {
 452                         resp->status = NFS3ERR_NOENT;
 453                         goto out1;
 454                 }
 455         }
 456 
 457         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
 458         name = nfscmd_convname(ca, exi, args->what.name,
 459             NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
 460 
 461         if (name == NULL) {
 462                 resp->status = NFS3ERR_ACCES;
 463                 goto out1;
 464         }
 465 
 466         /*
 467          * If the public filehandle is used then allow
 468          * a multi-component lookup
 469          */
 470         if (PUBLIC_FH3(&args->what.dir)) {
 471                 publicfh_flag = TRUE;
 472 
 473                 exi_rele(exi);
 474 
 475                 error = rfs_publicfh_mclookup(name, dvp, cr, &vp,
 476                     &exi, &sec);
 477 
 478                 /*
 479                  * Since WebNFS may bypass MOUNT, we need to ensure this
 480                  * request didn't come from an unlabeled admin_low client.
 481                  */
 482                 if (is_system_labeled() && error == 0) {
 483                         int             addr_type;
 484                         void            *ipaddr;
 485                         tsol_tpc_t      *tp;
 486 
 487                         if (ca->sa_family == AF_INET) {
 488                                 addr_type = IPV4_VERSION;
 489                                 ipaddr = &((struct sockaddr_in *)ca)->sin_addr;
 490                         } else if (ca->sa_family == AF_INET6) {
 491                                 addr_type = IPV6_VERSION;
 492                                 ipaddr = &((struct sockaddr_in6 *)
 493                                     ca)->sin6_addr;
 494                         }
 495                         tp = find_tpc(ipaddr, addr_type, B_FALSE);
 496                         if (tp == NULL || tp->tpc_tp.tp_doi !=
 497                             l_admin_low->tsl_doi || tp->tpc_tp.host_type !=
 498                             SUN_CIPSO) {
 499                                 VN_RELE(vp);
 500                                 error = EACCES;
 501                         }
 502                         if (tp != NULL)
 503                                 TPC_RELE(tp);
 504                 }
 505         } else {
 506                 error = VOP_LOOKUP(dvp, name, &vp,
 507                     NULL, 0, NULL, cr, NULL, NULL, NULL);
 508         }
 509 
 510         if (name != args->what.name)
 511                 kmem_free(name, MAXPATHLEN + 1);
 512 
 513         if (error == 0 && vn_ismntpt(vp)) {
 514                 error = rfs_cross_mnt(&vp, &exi);
 515                 if (error)
 516                         VN_RELE(vp);
 517         }
 518 
 519         if (is_system_labeled() && error == 0) {
 520                 bslabel_t *clabel = req->rq_label;
 521 
 522                 ASSERT(clabel != NULL);
 523                 DTRACE_PROBE2(tx__rfs3__log__info__oplookup__clabel, char *,
 524                     "got client label from request(1)", struct svc_req *, req);
 525 
 526                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
 527                         if (!do_rfs_label_check(clabel, dvp,
 528                             DOMINANCE_CHECK, exi)) {
 529                                 VN_RELE(vp);
 530                                 error = EACCES;
 531                         }
 532                 }
 533         }
 534 
 535         dva.va_mask = AT_ALL;
 536         dvap = VOP_GETATTR(dvp, &dva, 0, cr, NULL) ? NULL : &dva;
 537 
 538         if (error)
 539                 goto out;
 540 
 541         if (sec.sec_flags & SEC_QUERY) {
 542                 error = makefh3_ol(&resp->resok.object, exi, sec.sec_index);
 543         } else {
 544                 error = makefh3(&resp->resok.object, vp, exi);
 545                 if (!error && publicfh_flag && !chk_clnt_sec(exi, req))
 546                         auth_weak = TRUE;
 547         }
 548 
 549         if (error) {
 550                 VN_RELE(vp);
 551                 goto out;
 552         }
 553 
 554         va.va_mask = AT_ALL;
 555         vap = rfs4_delegated_getattr(vp, &va, 0, cr) ? NULL : &va;
 556 
 557         exi_rele(exi);
 558         VN_RELE(vp);
 559 
 560         resp->status = NFS3_OK;
 561         vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
 562         vattr_to_post_op_attr(dvap, &resp->resok.dir_attributes);
 563 
 564         /*
 565          * If it's public fh, no 0x81, and client's flavor is
 566          * invalid, set WebNFS status to WNFSERR_CLNT_FLAVOR now.
 567          * Then set RPC status to AUTH_TOOWEAK in common_dispatch.
 568          */
 569         if (auth_weak)
 570                 resp->status = (enum nfsstat3)WNFSERR_CLNT_FLAVOR;
 571 
 572         DTRACE_NFSV3_5(op__lookup__done, struct svc_req *, req,
 573             cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
 574             LOOKUP3res *, resp);
 575         VN_RELE(dvp);
 576 
 577         return;
 578 
 579 out:
 580         if (curthread->t_flag & T_WOULDBLOCK) {
 581                 curthread->t_flag &= ~T_WOULDBLOCK;
 582                 resp->status = NFS3ERR_JUKEBOX;
 583         } else
 584                 resp->status = puterrno3(error);
 585 out1:
 586         DTRACE_NFSV3_5(op__lookup__done, struct svc_req *, req,
 587             cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
 588             LOOKUP3res *, resp);
 589 
 590         if (exi != NULL)
 591                 exi_rele(exi);
 592 
 593         if (dvp != NULL)
 594                 VN_RELE(dvp);
 595         vattr_to_post_op_attr(dvap, &resp->resfail.dir_attributes);
 596 
 597 }
 598 
 599 void *
 600 rfs3_lookup_getfh(LOOKUP3args *args)
 601 {
 602 
 603         return (&args->what.dir);
 604 }
 605 
 606 /* ARGSUSED */
 607 void
 608 rfs3_access(ACCESS3args *args, ACCESS3res *resp, struct exportinfo *exi,
 609     struct svc_req *req, cred_t *cr, bool_t ro)
 610 {
 611         int error;
 612         vnode_t *vp;
 613         struct vattr *vap;
 614         struct vattr va;
 615         int checkwriteperm;
 616         boolean_t dominant_label = B_FALSE;
 617         boolean_t equal_label = B_FALSE;
 618         boolean_t admin_low_client;
 619 
 620         vap = NULL;
 621 
 622         vp = nfs3_fhtovp(&args->object, exi);
 623 
 624         DTRACE_NFSV3_5(op__access__start, struct svc_req *, req,
 625             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
 626             ACCESS3args *, args);
 627 
 628         if (vp == NULL) {
 629                 error = ESTALE;
 630                 goto out;
 631         }
 632 
 633         /*
 634          * If the file system is exported read only, it is not appropriate
 635          * to check write permissions for regular files and directories.
 636          * Special files are interpreted by the client, so the underlying
 637          * permissions are sent back to the client for interpretation.
 638          */
 639         if (rdonly(ro, vp) && (vp->v_type == VREG || vp->v_type == VDIR))
 640                 checkwriteperm = 0;
 641         else
 642                 checkwriteperm = 1;
 643 
 644         /*
 645          * We need the mode so that we can correctly determine access
 646          * permissions relative to a mandatory lock file.  Access to
 647          * mandatory lock files is denied on the server, so it might
 648          * as well be reflected to the server during the open.
 649          */
 650         va.va_mask = AT_MODE;
 651         error = VOP_GETATTR(vp, &va, 0, cr, NULL);
 652         if (error)
 653                 goto out;
 654 
 655         vap = &va;
 656 
 657         resp->resok.access = 0;
 658 
 659         if (is_system_labeled()) {
 660                 bslabel_t *clabel = req->rq_label;
 661 
 662                 ASSERT(clabel != NULL);
 663                 DTRACE_PROBE2(tx__rfs3__log__info__opaccess__clabel, char *,
 664                     "got client label from request(1)", struct svc_req *, req);
 665 
 666                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
 667                         if ((equal_label = do_rfs_label_check(clabel, vp,
 668                             EQUALITY_CHECK, exi)) == B_FALSE) {
 669                                 dominant_label = do_rfs_label_check(clabel,
 670                                     vp, DOMINANCE_CHECK, exi);
 671                         } else
 672                                 dominant_label = B_TRUE;
 673                         admin_low_client = B_FALSE;
 674                 } else
 675                         admin_low_client = B_TRUE;
 676         }
 677 
 678         if (args->access & ACCESS3_READ) {
 679                 error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
 680                 if (error) {
 681                         if (curthread->t_flag & T_WOULDBLOCK)
 682                                 goto out;
 683                 } else if (!MANDLOCK(vp, va.va_mode) &&
 684                     (!is_system_labeled() || admin_low_client ||
 685                     dominant_label))
 686                         resp->resok.access |= ACCESS3_READ;
 687         }
 688         if ((args->access & ACCESS3_LOOKUP) && vp->v_type == VDIR) {
 689                 error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL);
 690                 if (error) {
 691                         if (curthread->t_flag & T_WOULDBLOCK)
 692                                 goto out;
 693                 } else if (!is_system_labeled() || admin_low_client ||
 694                     dominant_label)
 695                         resp->resok.access |= ACCESS3_LOOKUP;
 696         }
 697         if (checkwriteperm &&
 698             (args->access & (ACCESS3_MODIFY|ACCESS3_EXTEND))) {
 699                 error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
 700                 if (error) {
 701                         if (curthread->t_flag & T_WOULDBLOCK)
 702                                 goto out;
 703                 } else if (!MANDLOCK(vp, va.va_mode) &&
 704                     (!is_system_labeled() || admin_low_client || equal_label)) {
 705                         resp->resok.access |=
 706                             (args->access & (ACCESS3_MODIFY|ACCESS3_EXTEND));
 707                 }
 708         }
 709         if (checkwriteperm &&
 710             (args->access & ACCESS3_DELETE) && vp->v_type == VDIR) {
 711                 error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
 712                 if (error) {
 713                         if (curthread->t_flag & T_WOULDBLOCK)
 714                                 goto out;
 715                 } else if (!is_system_labeled() || admin_low_client ||
 716                     equal_label)
 717                         resp->resok.access |= ACCESS3_DELETE;
 718         }
 719         if (args->access & ACCESS3_EXECUTE) {
 720                 error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL);
 721                 if (error) {
 722                         if (curthread->t_flag & T_WOULDBLOCK)
 723                                 goto out;
 724                 } else if (!MANDLOCK(vp, va.va_mode) &&
 725                     (!is_system_labeled() || admin_low_client ||
 726                     dominant_label))
 727                         resp->resok.access |= ACCESS3_EXECUTE;
 728         }
 729 
 730         va.va_mask = AT_ALL;
 731         vap = rfs4_delegated_getattr(vp, &va, 0, cr) ? NULL : &va;
 732 
 733         resp->status = NFS3_OK;
 734         vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
 735 
 736         DTRACE_NFSV3_5(op__access__done, struct svc_req *, req,
 737             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
 738             ACCESS3res *, resp);
 739 
 740         VN_RELE(vp);
 741 
 742         return;
 743 
 744 out:
 745         if (curthread->t_flag & T_WOULDBLOCK) {
 746                 curthread->t_flag &= ~T_WOULDBLOCK;
 747                 resp->status = NFS3ERR_JUKEBOX;
 748         } else
 749                 resp->status = puterrno3(error);
 750         DTRACE_NFSV3_5(op__access__done, struct svc_req *, req,
 751             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
 752             ACCESS3res *, resp);
 753         if (vp != NULL)
 754                 VN_RELE(vp);
 755         vattr_to_post_op_attr(vap, &resp->resfail.obj_attributes);
 756 }
 757 
 758 void *
 759 rfs3_access_getfh(ACCESS3args *args)
 760 {
 761 
 762         return (&args->object);
 763 }
 764 
 765 /* ARGSUSED */
 766 void
 767 rfs3_readlink(READLINK3args *args, READLINK3res *resp, struct exportinfo *exi,
 768     struct svc_req *req, cred_t *cr, bool_t ro)
 769 {
 770         int error;
 771         vnode_t *vp;
 772         struct vattr *vap;
 773         struct vattr va;
 774         struct iovec iov;
 775         struct uio uio;
 776         char *data;
 777         struct sockaddr *ca;
 778         char *name = NULL;
 779         int is_referral = 0;
 780 
 781         vap = NULL;
 782 
 783         vp = nfs3_fhtovp(&args->symlink, exi);
 784 
 785         DTRACE_NFSV3_5(op__readlink__start, struct svc_req *, req,
 786             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
 787             READLINK3args *, args);
 788 
 789         if (vp == NULL) {
 790                 error = ESTALE;
 791                 goto out;
 792         }
 793 
 794         va.va_mask = AT_ALL;
 795         error = VOP_GETATTR(vp, &va, 0, cr, NULL);
 796         if (error)
 797                 goto out;
 798 
 799         vap = &va;
 800 
 801         /* We lied about the object type for a referral */
 802         if (vn_is_nfs_reparse(vp, cr))
 803                 is_referral = 1;
 804 
 805         if (vp->v_type != VLNK && !is_referral) {
 806                 resp->status = NFS3ERR_INVAL;
 807                 goto out1;
 808         }
 809 
 810         if (MANDLOCK(vp, va.va_mode)) {
 811                 resp->status = NFS3ERR_ACCES;
 812                 goto out1;
 813         }
 814 
 815         if (is_system_labeled()) {
 816                 bslabel_t *clabel = req->rq_label;
 817 
 818                 ASSERT(clabel != NULL);
 819                 DTRACE_PROBE2(tx__rfs3__log__info__opreadlink__clabel, char *,
 820                     "got client label from request(1)", struct svc_req *, req);
 821 
 822                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
 823                         if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
 824                             exi)) {
 825                                 resp->status = NFS3ERR_ACCES;
 826                                 goto out1;
 827                         }
 828                 }
 829         }
 830 
 831         data = kmem_alloc(MAXPATHLEN + 1, KM_SLEEP);
 832 
 833         if (is_referral) {
 834                 char *s;
 835                 size_t strsz;
 836 
 837                 /* Get an artificial symlink based on a referral */
 838                 s = build_symlink(vp, cr, &strsz);
 839                 global_svstat_ptr[3][NFS_REFERLINKS].value.ui64++;
 840                 DTRACE_PROBE2(nfs3serv__func__referral__reflink,
 841                     vnode_t *, vp, char *, s);
 842                 if (s == NULL)
 843                         error = EINVAL;
 844                 else {
 845                         error = 0;
 846                         (void) strlcpy(data, s, MAXPATHLEN + 1);
 847                         kmem_free(s, strsz);
 848                 }
 849 
 850         } else {
 851 
 852                 iov.iov_base = data;
 853                 iov.iov_len = MAXPATHLEN;
 854                 uio.uio_iov = &iov;
 855                 uio.uio_iovcnt = 1;
 856                 uio.uio_segflg = UIO_SYSSPACE;
 857                 uio.uio_extflg = UIO_COPY_CACHED;
 858                 uio.uio_loffset = 0;
 859                 uio.uio_resid = MAXPATHLEN;
 860 
 861                 error = VOP_READLINK(vp, &uio, cr, NULL);
 862 
 863                 if (!error)
 864                         *(data + MAXPATHLEN - uio.uio_resid) = '\0';
 865         }
 866 
 867         va.va_mask = AT_ALL;
 868         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
 869 
 870         /* Lie about object type again just to be consistent */
 871         if (is_referral && vap != NULL)
 872                 vap->va_type = VLNK;
 873 
 874 #if 0 /* notyet */
 875         /*
 876          * Don't do this.  It causes local disk writes when just
 877          * reading the file and the overhead is deemed larger
 878          * than the benefit.
 879          */
 880         /*
 881          * Force modified metadata out to stable storage.
 882          */
 883         (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
 884 #endif
 885 
 886         if (error) {
 887                 kmem_free(data, MAXPATHLEN + 1);
 888                 goto out;
 889         }
 890 
 891         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
 892         name = nfscmd_convname(ca, exi, data, NFSCMD_CONV_OUTBOUND,
 893             MAXPATHLEN + 1);
 894 
 895         if (name == NULL) {
 896                 /*
 897                  * Even though the conversion failed, we return
 898                  * something. We just don't translate it.
 899                  */
 900                 name = data;
 901         }
 902 
 903         resp->status = NFS3_OK;
 904         vattr_to_post_op_attr(vap, &resp->resok.symlink_attributes);
 905         resp->resok.data = name;
 906 
 907         DTRACE_NFSV3_5(op__readlink__done, struct svc_req *, req,
 908             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
 909             READLINK3res *, resp);
 910         VN_RELE(vp);
 911 
 912         if (name != data)
 913                 kmem_free(data, MAXPATHLEN + 1);
 914 
 915         return;
 916 
 917 out:
 918         if (curthread->t_flag & T_WOULDBLOCK) {
 919                 curthread->t_flag &= ~T_WOULDBLOCK;
 920                 resp->status = NFS3ERR_JUKEBOX;
 921         } else
 922                 resp->status = puterrno3(error);
 923 out1:
 924         DTRACE_NFSV3_5(op__readlink__done, struct svc_req *, req,
 925             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
 926             READLINK3res *, resp);
 927         if (vp != NULL)
 928                 VN_RELE(vp);
 929         vattr_to_post_op_attr(vap, &resp->resfail.symlink_attributes);
 930 }
 931 
 932 void *
 933 rfs3_readlink_getfh(READLINK3args *args)
 934 {
 935 
 936         return (&args->symlink);
 937 }
 938 
 939 void
 940 rfs3_readlink_free(READLINK3res *resp)
 941 {
 942 
 943         if (resp->status == NFS3_OK)
 944                 kmem_free(resp->resok.data, MAXPATHLEN + 1);
 945 }
 946 
 947 /*
 948  * Server routine to handle read
 949  * May handle RDMA data as well as mblks
 950  */
 951 /* ARGSUSED */
 952 void
 953 rfs3_read(READ3args *args, READ3res *resp, struct exportinfo *exi,
 954     struct svc_req *req, cred_t *cr, bool_t ro)
 955 {
 956         int error;
 957         vnode_t *vp;
 958         struct vattr *vap;
 959         struct vattr va;
 960         struct iovec iov, *iovp = NULL;
 961         int iovcnt;
 962         struct uio uio;
 963         u_offset_t offset;
 964         mblk_t *mp = NULL;
 965         int in_crit = 0;
 966         int need_rwunlock = 0;
 967         caller_context_t ct;
 968         int rdma_used = 0;
 969         int loaned_buffers;
 970         struct uio *uiop;
 971 
 972         vap = NULL;
 973 
 974         vp = nfs3_fhtovp(&args->file, exi);
 975 
 976         DTRACE_NFSV3_5(op__read__start, struct svc_req *, req,
 977             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
 978             READ3args *, args);
 979 
 980 
 981         if (vp == NULL) {
 982                 error = ESTALE;
 983                 goto out;
 984         }
 985 
 986         if (args->wlist) {
 987                 if (args->count > clist_len(args->wlist)) {
 988                         error = EINVAL;
 989                         goto out;
 990                 }
 991                 rdma_used = 1;
 992         }
 993 
 994         /* use loaned buffers for TCP */
 995         loaned_buffers = (nfs_loaned_buffers && !rdma_used) ? 1 : 0;
 996 
 997         if (is_system_labeled()) {
 998                 bslabel_t *clabel = req->rq_label;
 999 
1000                 ASSERT(clabel != NULL);
1001                 DTRACE_PROBE2(tx__rfs3__log__info__opread__clabel, char *,
1002                     "got client label from request(1)", struct svc_req *, req);
1003 
1004                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
1005                         if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
1006                             exi)) {
1007                                 resp->status = NFS3ERR_ACCES;
1008                                 goto out1;
1009                         }
1010                 }
1011         }
1012 
1013         ct.cc_sysid = 0;
1014         ct.cc_pid = 0;
1015         ct.cc_caller_id = nfs3_srv_caller_id;
1016         ct.cc_flags = CC_DONTBLOCK;
1017 
1018         /*
1019          * Enter the critical region before calling VOP_RWLOCK
1020          * to avoid a deadlock with write requests.
1021          */
1022         if (nbl_need_check(vp)) {
1023                 nbl_start_crit(vp, RW_READER);
1024                 in_crit = 1;
1025                 if (nbl_conflict(vp, NBL_READ, args->offset, args->count, 0,
1026                     NULL)) {
1027                         error = EACCES;
1028                         goto out;
1029                 }
1030         }
1031 
1032         error = VOP_RWLOCK(vp, V_WRITELOCK_FALSE, &ct);
1033 
1034         /* check if a monitor detected a delegation conflict */
1035         if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1036                 resp->status = NFS3ERR_JUKEBOX;
1037                 goto out1;
1038         }
1039 
1040         need_rwunlock = 1;
1041 
1042         va.va_mask = AT_ALL;
1043         error = VOP_GETATTR(vp, &va, 0, cr, &ct);
1044 
1045         /*
1046          * If we can't get the attributes, then we can't do the
1047          * right access checking.  So, we'll fail the request.
1048          */
1049         if (error)
1050                 goto out;
1051 
1052         vap = &va;
1053 
1054         if (vp->v_type != VREG) {
1055                 resp->status = NFS3ERR_INVAL;
1056                 goto out1;
1057         }
1058 
1059         if (crgetuid(cr) != va.va_uid) {
1060                 error = VOP_ACCESS(vp, VREAD, 0, cr, &ct);
1061                 if (error) {
1062                         if (curthread->t_flag & T_WOULDBLOCK)
1063                                 goto out;
1064                         error = VOP_ACCESS(vp, VEXEC, 0, cr, &ct);
1065                         if (error)
1066                                 goto out;
1067                 }
1068         }
1069 
1070         if (MANDLOCK(vp, va.va_mode)) {
1071                 resp->status = NFS3ERR_ACCES;
1072                 goto out1;
1073         }
1074 
1075         offset = args->offset;
1076         if (offset >= va.va_size) {
1077                 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
1078                 if (in_crit)
1079                         nbl_end_crit(vp);
1080                 resp->status = NFS3_OK;
1081                 vattr_to_post_op_attr(vap, &resp->resok.file_attributes);
1082                 resp->resok.count = 0;
1083                 resp->resok.eof = TRUE;
1084                 resp->resok.data.data_len = 0;
1085                 resp->resok.data.data_val = NULL;
1086                 resp->resok.data.mp = NULL;
1087                 /* RDMA */
1088                 resp->resok.wlist = args->wlist;
1089                 resp->resok.wlist_len = resp->resok.count;
1090                 if (resp->resok.wlist)
1091                         clist_zero_len(resp->resok.wlist);
1092                 goto done;
1093         }
1094 
1095         if (args->count == 0) {
1096                 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
1097                 if (in_crit)
1098                         nbl_end_crit(vp);
1099                 resp->status = NFS3_OK;
1100                 vattr_to_post_op_attr(vap, &resp->resok.file_attributes);
1101                 resp->resok.count = 0;
1102                 resp->resok.eof = FALSE;
1103                 resp->resok.data.data_len = 0;
1104                 resp->resok.data.data_val = NULL;
1105                 resp->resok.data.mp = NULL;
1106                 /* RDMA */
1107                 resp->resok.wlist = args->wlist;
1108                 resp->resok.wlist_len = resp->resok.count;
1109                 if (resp->resok.wlist)
1110                         clist_zero_len(resp->resok.wlist);
1111                 goto done;
1112         }
1113 
1114         /*
1115          * do not allocate memory more the max. allowed
1116          * transfer size
1117          */
1118         if (args->count > rfs3_tsize(req))
1119                 args->count = rfs3_tsize(req);
1120 
1121         if (loaned_buffers) {
1122                 uiop = (uio_t *)rfs_setup_xuio(vp);
1123                 ASSERT(uiop != NULL);
1124                 uiop->uio_segflg = UIO_SYSSPACE;
1125                 uiop->uio_loffset = args->offset;
1126                 uiop->uio_resid = args->count;
1127 
1128                 /* Jump to do the read if successful */
1129                 if (VOP_REQZCBUF(vp, UIO_READ, (xuio_t *)uiop, cr, &ct) == 0) {
1130                         /*
1131                          * Need to hold the vnode until after VOP_RETZCBUF()
1132                          * is called.
1133                          */
1134                         VN_HOLD(vp);
1135                         goto doio_read;
1136                 }
1137 
1138                 DTRACE_PROBE2(nfss__i__reqzcbuf_failed, int,
1139                     uiop->uio_loffset, int, uiop->uio_resid);
1140 
1141                 uiop->uio_extflg = 0;
1142                 /* failure to setup for zero copy */
1143                 rfs_free_xuio((void *)uiop);
1144                 loaned_buffers = 0;
1145         }
1146 
1147         /*
1148          * If returning data via RDMA Write, then grab the chunk list.
1149          * If we aren't returning READ data w/RDMA_WRITE, then grab
1150          * a mblk.
1151          */
1152         if (rdma_used) {
1153                 (void) rdma_get_wchunk(req, &iov, args->wlist);
1154                 uio.uio_iov = &iov;
1155                 uio.uio_iovcnt = 1;
1156         } else {
1157                 /*
1158                  * mp will contain the data to be sent out in the read reply.
1159                  * For UDP, this will be freed after the reply has been sent
1160                  * out by the driver.  For TCP, it will be freed after the last
1161                  * segment associated with the reply has been ACKed by the
1162                  * client.
1163                  */
1164                 mp = rfs_read_alloc(args->count, &iovp, &iovcnt);
1165                 uio.uio_iov = iovp;
1166                 uio.uio_iovcnt = iovcnt;
1167         }
1168 
1169         uio.uio_segflg = UIO_SYSSPACE;
1170         uio.uio_extflg = UIO_COPY_CACHED;
1171         uio.uio_loffset = args->offset;
1172         uio.uio_resid = args->count;
1173         uiop = &uio;
1174 
1175 doio_read:
1176         error = VOP_READ(vp, uiop, 0, cr, &ct);
1177 
1178         if (error) {
1179                 if (mp)
1180                         freemsg(mp);
1181                 /* check if a monitor detected a delegation conflict */
1182                 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1183                         resp->status = NFS3ERR_JUKEBOX;
1184                         goto out1;
1185                 }
1186                 goto out;
1187         }
1188 
1189         /* make mblk using zc buffers */
1190         if (loaned_buffers) {
1191                 mp = uio_to_mblk(uiop);
1192                 ASSERT(mp != NULL);
1193         }
1194 
1195         va.va_mask = AT_ALL;
1196         error = VOP_GETATTR(vp, &va, 0, cr, &ct);
1197 
1198         if (error)
1199                 vap = NULL;
1200         else
1201                 vap = &va;
1202 
1203         VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
1204 
1205         if (in_crit)
1206                 nbl_end_crit(vp);
1207 
1208         resp->status = NFS3_OK;
1209         vattr_to_post_op_attr(vap, &resp->resok.file_attributes);
1210         resp->resok.count = args->count - uiop->uio_resid;
1211         if (!error && offset + resp->resok.count == va.va_size)
1212                 resp->resok.eof = TRUE;
1213         else
1214                 resp->resok.eof = FALSE;
1215         resp->resok.data.data_len = resp->resok.count;
1216 
1217         if (mp)
1218                 rfs_rndup_mblks(mp, resp->resok.count, loaned_buffers);
1219 
1220         resp->resok.data.mp = mp;
1221         resp->resok.size = (uint_t)args->count;
1222 
1223         if (rdma_used) {
1224                 resp->resok.data.data_val = (caddr_t)iov.iov_base;
1225                 if (!rdma_setup_read_data3(args, &(resp->resok))) {
1226                         resp->status = NFS3ERR_INVAL;
1227                 }
1228         } else {
1229                 resp->resok.data.data_val = (caddr_t)mp->b_datap->db_base;
1230                 (resp->resok).wlist = NULL;
1231         }
1232 
1233 done:
1234         DTRACE_NFSV3_5(op__read__done, struct svc_req *, req,
1235             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
1236             READ3res *, resp);
1237 
1238         VN_RELE(vp);
1239 
1240         if (iovp != NULL)
1241                 kmem_free(iovp, iovcnt * sizeof (struct iovec));
1242 
1243         return;
1244 
1245 out:
1246         if (curthread->t_flag & T_WOULDBLOCK) {
1247                 curthread->t_flag &= ~T_WOULDBLOCK;
1248                 resp->status = NFS3ERR_JUKEBOX;
1249         } else
1250                 resp->status = puterrno3(error);
1251 out1:
1252         DTRACE_NFSV3_5(op__read__done, struct svc_req *, req,
1253             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
1254             READ3res *, resp);
1255 
1256         if (vp != NULL) {
1257                 if (need_rwunlock)
1258                         VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
1259                 if (in_crit)
1260                         nbl_end_crit(vp);
1261                 VN_RELE(vp);
1262         }
1263         vattr_to_post_op_attr(vap, &resp->resfail.file_attributes);
1264 
1265         if (iovp != NULL)
1266                 kmem_free(iovp, iovcnt * sizeof (struct iovec));
1267 }
1268 
1269 void
1270 rfs3_read_free(READ3res *resp)
1271 {
1272         mblk_t *mp;
1273 
1274         if (resp->status == NFS3_OK) {
1275                 mp = resp->resok.data.mp;
1276                 if (mp != NULL)
1277                         freemsg(mp);
1278         }
1279 }
1280 
1281 void *
1282 rfs3_read_getfh(READ3args *args)
1283 {
1284 
1285         return (&args->file);
1286 }
1287 
1288 #define MAX_IOVECS      12
1289 
1290 #ifdef DEBUG
1291 static int rfs3_write_hits = 0;
1292 static int rfs3_write_misses = 0;
1293 #endif
1294 
1295 void
1296 rfs3_write(WRITE3args *args, WRITE3res *resp, struct exportinfo *exi,
1297     struct svc_req *req, cred_t *cr, bool_t ro)
1298 {
1299         nfs3_srv_t *ns;
1300         int error;
1301         vnode_t *vp;
1302         struct vattr *bvap = NULL;
1303         struct vattr bva;
1304         struct vattr *avap = NULL;
1305         struct vattr ava;
1306         u_offset_t rlimit;
1307         struct uio uio;
1308         struct iovec iov[MAX_IOVECS];
1309         mblk_t *m;
1310         struct iovec *iovp;
1311         int iovcnt;
1312         int ioflag;
1313         cred_t *savecred;
1314         int in_crit = 0;
1315         int rwlock_ret = -1;
1316         caller_context_t ct;
1317 
1318         vp = nfs3_fhtovp(&args->file, exi);
1319 
1320         DTRACE_NFSV3_5(op__write__start, struct svc_req *, req,
1321             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
1322             WRITE3args *, args);
1323 
1324         if (vp == NULL) {
1325                 error = ESTALE;
1326                 goto err;
1327         }
1328 
1329         ASSERT3P(curzone, ==, exi->exi_zone); /* exi is guaranteed non-NULL. */
1330         ns = zone_getspecific(rfs3_zone_key, curzone);
1331         if (is_system_labeled()) {
1332                 bslabel_t *clabel = req->rq_label;
1333 
1334                 ASSERT(clabel != NULL);
1335                 DTRACE_PROBE2(tx__rfs3__log__info__opwrite__clabel, char *,
1336                     "got client label from request(1)", struct svc_req *, req);
1337 
1338                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
1339                         if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
1340                             exi)) {
1341                                 resp->status = NFS3ERR_ACCES;
1342                                 goto err1;
1343                         }
1344                 }
1345         }
1346 
1347         ct.cc_sysid = 0;
1348         ct.cc_pid = 0;
1349         ct.cc_caller_id = nfs3_srv_caller_id;
1350         ct.cc_flags = CC_DONTBLOCK;
1351 
1352         /*
1353          * We have to enter the critical region before calling VOP_RWLOCK
1354          * to avoid a deadlock with ufs.
1355          */
1356         if (nbl_need_check(vp)) {
1357                 nbl_start_crit(vp, RW_READER);
1358                 in_crit = 1;
1359                 if (nbl_conflict(vp, NBL_WRITE, args->offset, args->count, 0,
1360                     NULL)) {
1361                         error = EACCES;
1362                         goto err;
1363                 }
1364         }
1365 
1366         rwlock_ret = VOP_RWLOCK(vp, V_WRITELOCK_TRUE, &ct);
1367 
1368         /* check if a monitor detected a delegation conflict */
1369         if (rwlock_ret == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1370                 resp->status = NFS3ERR_JUKEBOX;
1371                 rwlock_ret = -1;
1372                 goto err1;
1373         }
1374 
1375 
1376         bva.va_mask = AT_ALL;
1377         error = VOP_GETATTR(vp, &bva, 0, cr, &ct);
1378 
1379         /*
1380          * If we can't get the attributes, then we can't do the
1381          * right access checking.  So, we'll fail the request.
1382          */
1383         if (error)
1384                 goto err;
1385 
1386         bvap = &bva;
1387         avap = bvap;
1388 
1389         if (args->count != args->data.data_len) {
1390                 resp->status = NFS3ERR_INVAL;
1391                 goto err1;
1392         }
1393 
1394         if (rdonly(ro, vp)) {
1395                 resp->status = NFS3ERR_ROFS;
1396                 goto err1;
1397         }
1398 
1399         if (vp->v_type != VREG) {
1400                 resp->status = NFS3ERR_INVAL;
1401                 goto err1;
1402         }
1403 
1404         if (crgetuid(cr) != bva.va_uid &&
1405             (error = VOP_ACCESS(vp, VWRITE, 0, cr, &ct)))
1406                 goto err;
1407 
1408         if (MANDLOCK(vp, bva.va_mode)) {
1409                 resp->status = NFS3ERR_ACCES;
1410                 goto err1;
1411         }
1412 
1413         if (args->count == 0) {
1414                 resp->status = NFS3_OK;
1415                 vattr_to_wcc_data(bvap, avap, &resp->resok.file_wcc);
1416                 resp->resok.count = 0;
1417                 resp->resok.committed = args->stable;
1418                 resp->resok.verf = ns->write3verf;
1419                 goto out;
1420         }
1421 
1422         if (args->mblk != NULL) {
1423                 iovcnt = 0;
1424                 for (m = args->mblk; m != NULL; m = m->b_cont)
1425                         iovcnt++;
1426                 if (iovcnt <= MAX_IOVECS) {
1427 #ifdef DEBUG
1428                         rfs3_write_hits++;
1429 #endif
1430                         iovp = iov;
1431                 } else {
1432 #ifdef DEBUG
1433                         rfs3_write_misses++;
1434 #endif
1435                         iovp = kmem_alloc(sizeof (*iovp) * iovcnt, KM_SLEEP);
1436                 }
1437                 mblk_to_iov(args->mblk, iovcnt, iovp);
1438 
1439         } else if (args->rlist != NULL) {
1440                 iovcnt = 1;
1441                 iovp = iov;
1442                 iovp->iov_base = (char *)((args->rlist)->u.c_daddr3);
1443                 iovp->iov_len = args->count;
1444         } else {
1445                 iovcnt = 1;
1446                 iovp = iov;
1447                 iovp->iov_base = args->data.data_val;
1448                 iovp->iov_len = args->count;
1449         }
1450 
1451         uio.uio_iov = iovp;
1452         uio.uio_iovcnt = iovcnt;
1453 
1454         uio.uio_segflg = UIO_SYSSPACE;
1455         uio.uio_extflg = UIO_COPY_DEFAULT;
1456         uio.uio_loffset = args->offset;
1457         uio.uio_resid = args->count;
1458         uio.uio_llimit = curproc->p_fsz_ctl;
1459         rlimit = uio.uio_llimit - args->offset;
1460         if (rlimit < (u_offset_t)uio.uio_resid)
1461                 uio.uio_resid = (int)rlimit;
1462 
1463         if (args->stable == UNSTABLE)
1464                 ioflag = 0;
1465         else if (args->stable == FILE_SYNC)
1466                 ioflag = FSYNC;
1467         else if (args->stable == DATA_SYNC)
1468                 ioflag = FDSYNC;
1469         else {
1470                 if (iovp != iov)
1471                         kmem_free(iovp, sizeof (*iovp) * iovcnt);
1472                 resp->status = NFS3ERR_INVAL;
1473                 goto err1;
1474         }
1475 
1476         /*
1477          * We're changing creds because VM may fault and we need
1478          * the cred of the current thread to be used if quota
1479          * checking is enabled.
1480          */
1481         savecred = curthread->t_cred;
1482         curthread->t_cred = cr;
1483         error = VOP_WRITE(vp, &uio, ioflag, cr, &ct);
1484         curthread->t_cred = savecred;
1485 
1486         if (iovp != iov)
1487                 kmem_free(iovp, sizeof (*iovp) * iovcnt);
1488 
1489         /* check if a monitor detected a delegation conflict */
1490         if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1491                 resp->status = NFS3ERR_JUKEBOX;
1492                 goto err1;
1493         }
1494 
1495         ava.va_mask = AT_ALL;
1496         avap = VOP_GETATTR(vp, &ava, 0, cr, &ct) ? NULL : &ava;
1497 
1498         if (error)
1499                 goto err;
1500 
1501         /*
1502          * If we were unable to get the V_WRITELOCK_TRUE, then we
1503          * may not have accurate after attrs, so check if
1504          * we have both attributes, they have a non-zero va_seq, and
1505          * va_seq has changed by exactly one,
1506          * if not, turn off the before attr.
1507          */
1508         if (rwlock_ret != V_WRITELOCK_TRUE) {
1509                 if (bvap == NULL || avap == NULL ||
1510                     bvap->va_seq == 0 || avap->va_seq == 0 ||
1511                     avap->va_seq != (bvap->va_seq + 1)) {
1512                         bvap = NULL;
1513                 }
1514         }
1515 
1516         resp->status = NFS3_OK;
1517         vattr_to_wcc_data(bvap, avap, &resp->resok.file_wcc);
1518         resp->resok.count = args->count - uio.uio_resid;
1519         resp->resok.committed = args->stable;
1520         resp->resok.verf = ns->write3verf;
1521         goto out;
1522 
1523 err:
1524         if (curthread->t_flag & T_WOULDBLOCK) {
1525                 curthread->t_flag &= ~T_WOULDBLOCK;
1526                 resp->status = NFS3ERR_JUKEBOX;
1527         } else
1528                 resp->status = puterrno3(error);
1529 err1:
1530         vattr_to_wcc_data(bvap, avap, &resp->resfail.file_wcc);
1531 out:
1532         DTRACE_NFSV3_5(op__write__done, struct svc_req *, req,
1533             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
1534             WRITE3res *, resp);
1535 
1536         if (vp != NULL) {
1537                 if (rwlock_ret != -1)
1538                         VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, &ct);
1539                 if (in_crit)
1540                         nbl_end_crit(vp);
1541                 VN_RELE(vp);
1542         }
1543 }
1544 
1545 void *
1546 rfs3_write_getfh(WRITE3args *args)
1547 {
1548 
1549         return (&args->file);
1550 }
1551 
1552 void
1553 rfs3_create(CREATE3args *args, CREATE3res *resp, struct exportinfo *exi,
1554     struct svc_req *req, cred_t *cr, bool_t ro)
1555 {
1556         int error;
1557         int in_crit = 0;
1558         vnode_t *vp;
1559         vnode_t *tvp = NULL;
1560         vnode_t *dvp;
1561         struct vattr *vap;
1562         struct vattr va;
1563         struct vattr *dbvap;
1564         struct vattr dbva;
1565         struct vattr *davap;
1566         struct vattr dava;
1567         enum vcexcl excl;
1568         nfstime3 *mtime;
1569         len_t reqsize;
1570         bool_t trunc;
1571         struct sockaddr *ca;
1572         char *name = NULL;
1573 
1574         dbvap = NULL;
1575         davap = NULL;
1576 
1577         dvp = nfs3_fhtovp(&args->where.dir, exi);
1578 
1579         DTRACE_NFSV3_5(op__create__start, struct svc_req *, req,
1580             cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
1581             CREATE3args *, args);
1582 
1583         if (dvp == NULL) {
1584                 error = ESTALE;
1585                 goto out;
1586         }
1587 
1588         dbva.va_mask = AT_ALL;
1589         dbvap = VOP_GETATTR(dvp, &dbva, 0, cr, NULL) ? NULL : &dbva;
1590         davap = dbvap;
1591 
1592         if (args->where.name == nfs3nametoolong) {
1593                 resp->status = NFS3ERR_NAMETOOLONG;
1594                 goto out1;
1595         }
1596 
1597         if (args->where.name == NULL || *(args->where.name) == '\0') {
1598                 resp->status = NFS3ERR_ACCES;
1599                 goto out1;
1600         }
1601 
1602         if (rdonly(ro, dvp)) {
1603                 resp->status = NFS3ERR_ROFS;
1604                 goto out1;
1605         }
1606 
1607         if (is_system_labeled()) {
1608                 bslabel_t *clabel = req->rq_label;
1609 
1610                 ASSERT(clabel != NULL);
1611                 DTRACE_PROBE2(tx__rfs3__log__info__opcreate__clabel, char *,
1612                     "got client label from request(1)", struct svc_req *, req);
1613 
1614                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
1615                         if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
1616                             exi)) {
1617                                 resp->status = NFS3ERR_ACCES;
1618                                 goto out1;
1619                         }
1620                 }
1621         }
1622 
1623         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
1624         name = nfscmd_convname(ca, exi, args->where.name,
1625             NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
1626 
1627         if (name == NULL) {
1628                 /* This is really a Solaris EILSEQ */
1629                 resp->status = NFS3ERR_INVAL;
1630                 goto out1;
1631         }
1632 
1633         if (args->how.mode == EXCLUSIVE) {
1634                 va.va_mask = AT_TYPE | AT_MODE | AT_MTIME;
1635                 va.va_type = VREG;
1636                 va.va_mode = (mode_t)0;
1637                 /*
1638                  * Ensure no time overflows and that types match
1639                  */
1640                 mtime = (nfstime3 *)&args->how.createhow3_u.verf;
1641                 va.va_mtime.tv_sec = mtime->seconds % INT32_MAX;
1642                 va.va_mtime.tv_nsec = mtime->nseconds;
1643                 excl = EXCL;
1644         } else {
1645                 error = sattr3_to_vattr(&args->how.createhow3_u.obj_attributes,
1646                     &va);
1647                 if (error)
1648                         goto out;
1649                 va.va_mask |= AT_TYPE;
1650                 va.va_type = VREG;
1651                 if (args->how.mode == GUARDED)
1652                         excl = EXCL;
1653                 else {
1654                         excl = NONEXCL;
1655 
1656                         /*
1657                          * During creation of file in non-exclusive mode
1658                          * if size of file is being set then make sure
1659                          * that if the file already exists that no conflicting
1660                          * non-blocking mandatory locks exists in the region
1661                          * being modified. If there are conflicting locks fail
1662                          * the operation with EACCES.
1663                          */
1664                         if (va.va_mask & AT_SIZE) {
1665                                 struct vattr tva;
1666 
1667                                 /*
1668                                  * Does file already exist?
1669                                  */
1670                                 error = VOP_LOOKUP(dvp, name, &tvp,
1671                                     NULL, 0, NULL, cr, NULL, NULL, NULL);
1672 
1673                                 /*
1674                                  * Check to see if the file has been delegated
1675                                  * to a v4 client.  If so, then begin recall of
1676                                  * the delegation and return JUKEBOX to allow
1677                                  * the client to retrasmit its request.
1678                                  */
1679 
1680                                 trunc = va.va_size == 0;
1681                                 if (!error &&
1682                                     rfs4_check_delegated(FWRITE, tvp, trunc)) {
1683                                         resp->status = NFS3ERR_JUKEBOX;
1684                                         goto out1;
1685                                 }
1686 
1687                                 /*
1688                                  * Check for NBMAND lock conflicts
1689                                  */
1690                                 if (!error && nbl_need_check(tvp)) {
1691                                         u_offset_t offset;
1692                                         ssize_t len;
1693 
1694                                         nbl_start_crit(tvp, RW_READER);
1695                                         in_crit = 1;
1696 
1697                                         tva.va_mask = AT_SIZE;
1698                                         error = VOP_GETATTR(tvp, &tva, 0, cr,
1699                                             NULL);
1700                                         /*
1701                                          * Can't check for conflicts, so return
1702                                          * error.
1703                                          */
1704                                         if (error)
1705                                                 goto out;
1706 
1707                                         offset = tva.va_size < va.va_size ?
1708                                             tva.va_size : va.va_size;
1709                                         len = tva.va_size < va.va_size ?
1710                                             va.va_size - tva.va_size :
1711                                             tva.va_size - va.va_size;
1712                                         if (nbl_conflict(tvp, NBL_WRITE,
1713                                             offset, len, 0, NULL)) {
1714                                                 error = EACCES;
1715                                                 goto out;
1716                                         }
1717                                 } else if (tvp) {
1718                                         VN_RELE(tvp);
1719                                         tvp = NULL;
1720                                 }
1721                         }
1722                 }
1723                 if (va.va_mask & AT_SIZE)
1724                         reqsize = va.va_size;
1725         }
1726 
1727         /*
1728          * Must specify the mode.
1729          */
1730         if (!(va.va_mask & AT_MODE)) {
1731                 resp->status = NFS3ERR_INVAL;
1732                 goto out1;
1733         }
1734 
1735         /*
1736          * If the filesystem is exported with nosuid, then mask off
1737          * the setuid and setgid bits.
1738          */
1739         if (va.va_type == VREG && (exi->exi_export.ex_flags & EX_NOSUID))
1740                 va.va_mode &= ~(VSUID | VSGID);
1741 
1742 tryagain:
1743         /*
1744          * The file open mode used is VWRITE.  If the client needs
1745          * some other semantic, then it should do the access checking
1746          * itself.  It would have been nice to have the file open mode
1747          * passed as part of the arguments.
1748          */
1749         error = VOP_CREATE(dvp, name, &va, excl, VWRITE,
1750             &vp, cr, 0, NULL, NULL);
1751 
1752         dava.va_mask = AT_ALL;
1753         davap = VOP_GETATTR(dvp, &dava, 0, cr, NULL) ? NULL : &dava;
1754 
1755         if (error) {
1756                 /*
1757                  * If we got something other than file already exists
1758                  * then just return this error.  Otherwise, we got
1759                  * EEXIST.  If we were doing a GUARDED create, then
1760                  * just return this error.  Otherwise, we need to
1761                  * make sure that this wasn't a duplicate of an
1762                  * exclusive create request.
1763                  *
1764                  * The assumption is made that a non-exclusive create
1765                  * request will never return EEXIST.
1766                  */
1767                 if (error != EEXIST || args->how.mode == GUARDED)
1768                         goto out;
1769                 /*
1770                  * Lookup the file so that we can get a vnode for it.
1771                  */
1772                 error = VOP_LOOKUP(dvp, name, &vp, NULL, 0,
1773                     NULL, cr, NULL, NULL, NULL);
1774                 if (error) {
1775                         /*
1776                          * We couldn't find the file that we thought that
1777                          * we just created.  So, we'll just try creating
1778                          * it again.
1779                          */
1780                         if (error == ENOENT)
1781                                 goto tryagain;
1782                         goto out;
1783                 }
1784 
1785                 /*
1786                  * If the file is delegated to a v4 client, go ahead
1787                  * and initiate recall, this create is a hint that a
1788                  * conflicting v3 open has occurred.
1789                  */
1790 
1791                 if (rfs4_check_delegated(FWRITE, vp, FALSE)) {
1792                         VN_RELE(vp);
1793                         resp->status = NFS3ERR_JUKEBOX;
1794                         goto out1;
1795                 }
1796 
1797                 va.va_mask = AT_ALL;
1798                 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
1799 
1800                 mtime = (nfstime3 *)&args->how.createhow3_u.verf;
1801                 /* % with INT32_MAX to prevent overflows */
1802                 if (args->how.mode == EXCLUSIVE && (vap == NULL ||
1803                     vap->va_mtime.tv_sec !=
1804                     (mtime->seconds % INT32_MAX) ||
1805                     vap->va_mtime.tv_nsec != mtime->nseconds)) {
1806                         VN_RELE(vp);
1807                         error = EEXIST;
1808                         goto out;
1809                 }
1810         } else {
1811 
1812                 if ((args->how.mode == UNCHECKED ||
1813                     args->how.mode == GUARDED) &&
1814                     args->how.createhow3_u.obj_attributes.size.set_it &&
1815                     va.va_size == 0)
1816                         trunc = TRUE;
1817                 else
1818                         trunc = FALSE;
1819 
1820                 if (rfs4_check_delegated(FWRITE, vp, trunc)) {
1821                         VN_RELE(vp);
1822                         resp->status = NFS3ERR_JUKEBOX;
1823                         goto out1;
1824                 }
1825 
1826                 va.va_mask = AT_ALL;
1827                 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
1828 
1829                 /*
1830                  * We need to check to make sure that the file got
1831                  * created to the indicated size.  If not, we do a
1832                  * setattr to try to change the size, but we don't
1833                  * try too hard.  This shouldn't a problem as most
1834                  * clients will only specifiy a size of zero which
1835                  * local file systems handle.  However, even if
1836                  * the client does specify a non-zero size, it can
1837                  * still recover by checking the size of the file
1838                  * after it has created it and then issue a setattr
1839                  * request of its own to set the size of the file.
1840                  */
1841                 if (vap != NULL &&
1842                     (args->how.mode == UNCHECKED ||
1843                     args->how.mode == GUARDED) &&
1844                     args->how.createhow3_u.obj_attributes.size.set_it &&
1845                     vap->va_size != reqsize) {
1846                         va.va_mask = AT_SIZE;
1847                         va.va_size = reqsize;
1848                         (void) VOP_SETATTR(vp, &va, 0, cr, NULL);
1849                         va.va_mask = AT_ALL;
1850                         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
1851                 }
1852         }
1853 
1854         if (name != args->where.name)
1855                 kmem_free(name, MAXPATHLEN + 1);
1856 
1857         error = makefh3(&resp->resok.obj.handle, vp, exi);
1858         if (error)
1859                 resp->resok.obj.handle_follows = FALSE;
1860         else
1861                 resp->resok.obj.handle_follows = TRUE;
1862 
1863         /*
1864          * Force modified data and metadata out to stable storage.
1865          */
1866         (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
1867         (void) VOP_FSYNC(dvp, 0, cr, NULL);
1868 
1869         VN_RELE(vp);
1870         if (tvp != NULL) {
1871                 if (in_crit)
1872                         nbl_end_crit(tvp);
1873                 VN_RELE(tvp);
1874         }
1875 
1876         resp->status = NFS3_OK;
1877         vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
1878         vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
1879 
1880         DTRACE_NFSV3_5(op__create__done, struct svc_req *, req,
1881             cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
1882             CREATE3res *, resp);
1883 
1884         VN_RELE(dvp);
1885         return;
1886 
1887 out:
1888         if (curthread->t_flag & T_WOULDBLOCK) {
1889                 curthread->t_flag &= ~T_WOULDBLOCK;
1890                 resp->status = NFS3ERR_JUKEBOX;
1891         } else
1892                 resp->status = puterrno3(error);
1893 out1:
1894         DTRACE_NFSV3_5(op__create__done, struct svc_req *, req,
1895             cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
1896             CREATE3res *, resp);
1897 
1898         if (name != NULL && name != args->where.name)
1899                 kmem_free(name, MAXPATHLEN + 1);
1900 
1901         if (tvp != NULL) {
1902                 if (in_crit)
1903                         nbl_end_crit(tvp);
1904                 VN_RELE(tvp);
1905         }
1906         if (dvp != NULL)
1907                 VN_RELE(dvp);
1908         vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc);
1909 }
1910 
1911 void *
1912 rfs3_create_getfh(CREATE3args *args)
1913 {
1914 
1915         return (&args->where.dir);
1916 }
1917 
1918 void
1919 rfs3_mkdir(MKDIR3args *args, MKDIR3res *resp, struct exportinfo *exi,
1920     struct svc_req *req, cred_t *cr, bool_t ro)
1921 {
1922         int error;
1923         vnode_t *vp = NULL;
1924         vnode_t *dvp;
1925         struct vattr *vap;
1926         struct vattr va;
1927         struct vattr *dbvap;
1928         struct vattr dbva;
1929         struct vattr *davap;
1930         struct vattr dava;
1931         struct sockaddr *ca;
1932         char *name = NULL;
1933 
1934         dbvap = NULL;
1935         davap = NULL;
1936 
1937         dvp = nfs3_fhtovp(&args->where.dir, exi);
1938 
1939         DTRACE_NFSV3_5(op__mkdir__start, struct svc_req *, req,
1940             cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
1941             MKDIR3args *, args);
1942 
1943         if (dvp == NULL) {
1944                 error = ESTALE;
1945                 goto out;
1946         }
1947 
1948         dbva.va_mask = AT_ALL;
1949         dbvap = VOP_GETATTR(dvp, &dbva, 0, cr, NULL) ? NULL : &dbva;
1950         davap = dbvap;
1951 
1952         if (args->where.name == nfs3nametoolong) {
1953                 resp->status = NFS3ERR_NAMETOOLONG;
1954                 goto out1;
1955         }
1956 
1957         if (args->where.name == NULL || *(args->where.name) == '\0') {
1958                 resp->status = NFS3ERR_ACCES;
1959                 goto out1;
1960         }
1961 
1962         if (rdonly(ro, dvp)) {
1963                 resp->status = NFS3ERR_ROFS;
1964                 goto out1;
1965         }
1966 
1967         if (is_system_labeled()) {
1968                 bslabel_t *clabel = req->rq_label;
1969 
1970                 ASSERT(clabel != NULL);
1971                 DTRACE_PROBE2(tx__rfs3__log__info__opmkdir__clabel, char *,
1972                     "got client label from request(1)", struct svc_req *, req);
1973 
1974                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
1975                         if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
1976                             exi)) {
1977                                 resp->status = NFS3ERR_ACCES;
1978                                 goto out1;
1979                         }
1980                 }
1981         }
1982 
1983         error = sattr3_to_vattr(&args->attributes, &va);
1984         if (error)
1985                 goto out;
1986 
1987         if (!(va.va_mask & AT_MODE)) {
1988                 resp->status = NFS3ERR_INVAL;
1989                 goto out1;
1990         }
1991 
1992         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
1993         name = nfscmd_convname(ca, exi, args->where.name,
1994             NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
1995 
1996         if (name == NULL) {
1997                 resp->status = NFS3ERR_INVAL;
1998                 goto out1;
1999         }
2000 
2001         va.va_mask |= AT_TYPE;
2002         va.va_type = VDIR;
2003 
2004         error = VOP_MKDIR(dvp, name, &va, &vp, cr, NULL, 0, NULL);
2005 
2006         if (name != args->where.name)
2007                 kmem_free(name, MAXPATHLEN + 1);
2008 
2009         dava.va_mask = AT_ALL;
2010         davap = VOP_GETATTR(dvp, &dava, 0, cr, NULL) ? NULL : &dava;
2011 
2012         /*
2013          * Force modified data and metadata out to stable storage.
2014          */
2015         (void) VOP_FSYNC(dvp, 0, cr, NULL);
2016 
2017         if (error)
2018                 goto out;
2019 
2020         error = makefh3(&resp->resok.obj.handle, vp, exi);
2021         if (error)
2022                 resp->resok.obj.handle_follows = FALSE;
2023         else
2024                 resp->resok.obj.handle_follows = TRUE;
2025 
2026         va.va_mask = AT_ALL;
2027         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
2028 
2029         /*
2030          * Force modified data and metadata out to stable storage.
2031          */
2032         (void) VOP_FSYNC(vp, 0, cr, NULL);
2033 
2034         VN_RELE(vp);
2035 
2036         resp->status = NFS3_OK;
2037         vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
2038         vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
2039 
2040         DTRACE_NFSV3_5(op__mkdir__done, struct svc_req *, req,
2041             cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
2042             MKDIR3res *, resp);
2043         VN_RELE(dvp);
2044 
2045         return;
2046 
2047 out:
2048         if (curthread->t_flag & T_WOULDBLOCK) {
2049                 curthread->t_flag &= ~T_WOULDBLOCK;
2050                 resp->status = NFS3ERR_JUKEBOX;
2051         } else
2052                 resp->status = puterrno3(error);
2053 out1:
2054         DTRACE_NFSV3_5(op__mkdir__done, struct svc_req *, req,
2055             cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
2056             MKDIR3res *, resp);
2057         if (dvp != NULL)
2058                 VN_RELE(dvp);
2059         vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc);
2060 }
2061 
2062 void *
2063 rfs3_mkdir_getfh(MKDIR3args *args)
2064 {
2065 
2066         return (&args->where.dir);
2067 }
2068 
2069 void
2070 rfs3_symlink(SYMLINK3args *args, SYMLINK3res *resp, struct exportinfo *exi,
2071     struct svc_req *req, cred_t *cr, bool_t ro)
2072 {
2073         int error;
2074         vnode_t *vp;
2075         vnode_t *dvp;
2076         struct vattr *vap;
2077         struct vattr va;
2078         struct vattr *dbvap;
2079         struct vattr dbva;
2080         struct vattr *davap;
2081         struct vattr dava;
2082         struct sockaddr *ca;
2083         char *name = NULL;
2084         char *symdata = NULL;
2085 
2086         dbvap = NULL;
2087         davap = NULL;
2088 
2089         dvp = nfs3_fhtovp(&args->where.dir, exi);
2090 
2091         DTRACE_NFSV3_5(op__symlink__start, struct svc_req *, req,
2092             cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
2093             SYMLINK3args *, args);
2094 
2095         if (dvp == NULL) {
2096                 error = ESTALE;
2097                 goto err;
2098         }
2099 
2100         dbva.va_mask = AT_ALL;
2101         dbvap = VOP_GETATTR(dvp, &dbva, 0, cr, NULL) ? NULL : &dbva;
2102         davap = dbvap;
2103 
2104         if (args->where.name == nfs3nametoolong) {
2105                 resp->status = NFS3ERR_NAMETOOLONG;
2106                 goto err1;
2107         }
2108 
2109         if (args->where.name == NULL || *(args->where.name) == '\0') {
2110                 resp->status = NFS3ERR_ACCES;
2111                 goto err1;
2112         }
2113 
2114         if (rdonly(ro, dvp)) {
2115                 resp->status = NFS3ERR_ROFS;
2116                 goto err1;
2117         }
2118 
2119         if (is_system_labeled()) {
2120                 bslabel_t *clabel = req->rq_label;
2121 
2122                 ASSERT(clabel != NULL);
2123                 DTRACE_PROBE2(tx__rfs3__log__info__opsymlink__clabel, char *,
2124                     "got client label from request(1)", struct svc_req *, req);
2125 
2126                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2127                         if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
2128                             exi)) {
2129                                 resp->status = NFS3ERR_ACCES;
2130                                 goto err1;
2131                         }
2132                 }
2133         }
2134 
2135         error = sattr3_to_vattr(&args->symlink.symlink_attributes, &va);
2136         if (error)
2137                 goto err;
2138 
2139         if (!(va.va_mask & AT_MODE)) {
2140                 resp->status = NFS3ERR_INVAL;
2141                 goto err1;
2142         }
2143 
2144         if (args->symlink.symlink_data == nfs3nametoolong) {
2145                 resp->status = NFS3ERR_NAMETOOLONG;
2146                 goto err1;
2147         }
2148 
2149         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2150         name = nfscmd_convname(ca, exi, args->where.name,
2151             NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2152 
2153         if (name == NULL) {
2154                 /* This is really a Solaris EILSEQ */
2155                 resp->status = NFS3ERR_INVAL;
2156                 goto err1;
2157         }
2158 
2159         symdata = nfscmd_convname(ca, exi, args->symlink.symlink_data,
2160             NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2161         if (symdata == NULL) {
2162                 /* This is really a Solaris EILSEQ */
2163                 resp->status = NFS3ERR_INVAL;
2164                 goto err1;
2165         }
2166 
2167 
2168         va.va_mask |= AT_TYPE;
2169         va.va_type = VLNK;
2170 
2171         error = VOP_SYMLINK(dvp, name, &va, symdata, cr, NULL, 0);
2172 
2173         dava.va_mask = AT_ALL;
2174         davap = VOP_GETATTR(dvp, &dava, 0, cr, NULL) ? NULL : &dava;
2175 
2176         if (error)
2177                 goto err;
2178 
2179         error = VOP_LOOKUP(dvp, name, &vp, NULL, 0, NULL, cr,
2180             NULL, NULL, NULL);
2181 
2182         /*
2183          * Force modified data and metadata out to stable storage.
2184          */
2185         (void) VOP_FSYNC(dvp, 0, cr, NULL);
2186 
2187 
2188         resp->status = NFS3_OK;
2189         if (error) {
2190                 resp->resok.obj.handle_follows = FALSE;
2191                 vattr_to_post_op_attr(NULL, &resp->resok.obj_attributes);
2192                 vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
2193                 goto out;
2194         }
2195 
2196         error = makefh3(&resp->resok.obj.handle, vp, exi);
2197         if (error)
2198                 resp->resok.obj.handle_follows = FALSE;
2199         else
2200                 resp->resok.obj.handle_follows = TRUE;
2201 
2202         va.va_mask = AT_ALL;
2203         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
2204 
2205         /*
2206          * Force modified data and metadata out to stable storage.
2207          */
2208         (void) VOP_FSYNC(vp, 0, cr, NULL);
2209 
2210         VN_RELE(vp);
2211 
2212         vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
2213         vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
2214         goto out;
2215 
2216 err:
2217         if (curthread->t_flag & T_WOULDBLOCK) {
2218                 curthread->t_flag &= ~T_WOULDBLOCK;
2219                 resp->status = NFS3ERR_JUKEBOX;
2220         } else
2221                 resp->status = puterrno3(error);
2222 err1:
2223         vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc);
2224 out:
2225         if (name != NULL && name != args->where.name)
2226                 kmem_free(name, MAXPATHLEN + 1);
2227         if (symdata != NULL && symdata != args->symlink.symlink_data)
2228                 kmem_free(symdata, MAXPATHLEN + 1);
2229 
2230         DTRACE_NFSV3_5(op__symlink__done, struct svc_req *, req,
2231             cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
2232             SYMLINK3res *, resp);
2233 
2234         if (dvp != NULL)
2235                 VN_RELE(dvp);
2236 }
2237 
2238 void *
2239 rfs3_symlink_getfh(SYMLINK3args *args)
2240 {
2241 
2242         return (&args->where.dir);
2243 }
2244 
2245 void
2246 rfs3_mknod(MKNOD3args *args, MKNOD3res *resp, struct exportinfo *exi,
2247     struct svc_req *req, cred_t *cr, bool_t ro)
2248 {
2249         int error;
2250         vnode_t *vp;
2251         vnode_t *realvp;
2252         vnode_t *dvp;
2253         struct vattr *vap;
2254         struct vattr va;
2255         struct vattr *dbvap;
2256         struct vattr dbva;
2257         struct vattr *davap;
2258         struct vattr dava;
2259         int mode;
2260         enum vcexcl excl;
2261         struct sockaddr *ca;
2262         char *name = NULL;
2263 
2264         dbvap = NULL;
2265         davap = NULL;
2266 
2267         dvp = nfs3_fhtovp(&args->where.dir, exi);
2268 
2269         DTRACE_NFSV3_5(op__mknod__start, struct svc_req *, req,
2270             cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
2271             MKNOD3args *, args);
2272 
2273         if (dvp == NULL) {
2274                 error = ESTALE;
2275                 goto out;
2276         }
2277 
2278         dbva.va_mask = AT_ALL;
2279         dbvap = VOP_GETATTR(dvp, &dbva, 0, cr, NULL) ? NULL : &dbva;
2280         davap = dbvap;
2281 
2282         if (args->where.name == nfs3nametoolong) {
2283                 resp->status = NFS3ERR_NAMETOOLONG;
2284                 goto out1;
2285         }
2286 
2287         if (args->where.name == NULL || *(args->where.name) == '\0') {
2288                 resp->status = NFS3ERR_ACCES;
2289                 goto out1;
2290         }
2291 
2292         if (rdonly(ro, dvp)) {
2293                 resp->status = NFS3ERR_ROFS;
2294                 goto out1;
2295         }
2296 
2297         if (is_system_labeled()) {
2298                 bslabel_t *clabel = req->rq_label;
2299 
2300                 ASSERT(clabel != NULL);
2301                 DTRACE_PROBE2(tx__rfs3__log__info__opmknod__clabel, char *,
2302                     "got client label from request(1)", struct svc_req *, req);
2303 
2304                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2305                         if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
2306                             exi)) {
2307                                 resp->status = NFS3ERR_ACCES;
2308                                 goto out1;
2309                         }
2310                 }
2311         }
2312 
2313         switch (args->what.type) {
2314         case NF3CHR:
2315         case NF3BLK:
2316                 error = sattr3_to_vattr(
2317                     &args->what.mknoddata3_u.device.dev_attributes, &va);
2318                 if (error)
2319                         goto out;
2320                 if (secpolicy_sys_devices(cr) != 0) {
2321                         resp->status = NFS3ERR_PERM;
2322                         goto out1;
2323                 }
2324                 if (args->what.type == NF3CHR)
2325                         va.va_type = VCHR;
2326                 else
2327                         va.va_type = VBLK;
2328                 va.va_rdev = makedevice(
2329                     args->what.mknoddata3_u.device.spec.specdata1,
2330                     args->what.mknoddata3_u.device.spec.specdata2);
2331                 va.va_mask |= AT_TYPE | AT_RDEV;
2332                 break;
2333         case NF3SOCK:
2334                 error = sattr3_to_vattr(
2335                     &args->what.mknoddata3_u.pipe_attributes, &va);
2336                 if (error)
2337                         goto out;
2338                 va.va_type = VSOCK;
2339                 va.va_mask |= AT_TYPE;
2340                 break;
2341         case NF3FIFO:
2342                 error = sattr3_to_vattr(
2343                     &args->what.mknoddata3_u.pipe_attributes, &va);
2344                 if (error)
2345                         goto out;
2346                 va.va_type = VFIFO;
2347                 va.va_mask |= AT_TYPE;
2348                 break;
2349         default:
2350                 resp->status = NFS3ERR_BADTYPE;
2351                 goto out1;
2352         }
2353 
2354         /*
2355          * Must specify the mode.
2356          */
2357         if (!(va.va_mask & AT_MODE)) {
2358                 resp->status = NFS3ERR_INVAL;
2359                 goto out1;
2360         }
2361 
2362         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2363         name = nfscmd_convname(ca, exi, args->where.name,
2364             NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2365 
2366         if (name == NULL) {
2367                 resp->status = NFS3ERR_INVAL;
2368                 goto out1;
2369         }
2370 
2371         excl = EXCL;
2372 
2373         mode = 0;
2374 
2375         error = VOP_CREATE(dvp, name, &va, excl, mode,
2376             &vp, cr, 0, NULL, NULL);
2377 
2378         if (name != args->where.name)
2379                 kmem_free(name, MAXPATHLEN + 1);
2380 
2381         dava.va_mask = AT_ALL;
2382         davap = VOP_GETATTR(dvp, &dava, 0, cr, NULL) ? NULL : &dava;
2383 
2384         /*
2385          * Force modified data and metadata out to stable storage.
2386          */
2387         (void) VOP_FSYNC(dvp, 0, cr, NULL);
2388 
2389         if (error)
2390                 goto out;
2391 
2392         resp->status = NFS3_OK;
2393 
2394         error = makefh3(&resp->resok.obj.handle, vp, exi);
2395         if (error)
2396                 resp->resok.obj.handle_follows = FALSE;
2397         else
2398                 resp->resok.obj.handle_follows = TRUE;
2399 
2400         va.va_mask = AT_ALL;
2401         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
2402 
2403         /*
2404          * Force modified metadata out to stable storage.
2405          *
2406          * if a underlying vp exists, pass it to VOP_FSYNC
2407          */
2408         if (VOP_REALVP(vp, &realvp, NULL) == 0)
2409                 (void) VOP_FSYNC(realvp, FNODSYNC, cr, NULL);
2410         else
2411                 (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
2412 
2413         VN_RELE(vp);
2414 
2415         vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
2416         vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
2417         DTRACE_NFSV3_5(op__mknod__done, struct svc_req *, req,
2418             cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
2419             MKNOD3res *, resp);
2420         VN_RELE(dvp);
2421         return;
2422 
2423 out:
2424         if (curthread->t_flag & T_WOULDBLOCK) {
2425                 curthread->t_flag &= ~T_WOULDBLOCK;
2426                 resp->status = NFS3ERR_JUKEBOX;
2427         } else
2428                 resp->status = puterrno3(error);
2429 out1:
2430         DTRACE_NFSV3_5(op__mknod__done, struct svc_req *, req,
2431             cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
2432             MKNOD3res *, resp);
2433         if (dvp != NULL)
2434                 VN_RELE(dvp);
2435         vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc);
2436 }
2437 
2438 void *
2439 rfs3_mknod_getfh(MKNOD3args *args)
2440 {
2441 
2442         return (&args->where.dir);
2443 }
2444 
2445 void
2446 rfs3_remove(REMOVE3args *args, REMOVE3res *resp, struct exportinfo *exi,
2447     struct svc_req *req, cred_t *cr, bool_t ro)
2448 {
2449         int error = 0;
2450         vnode_t *vp;
2451         struct vattr *bvap;
2452         struct vattr bva;
2453         struct vattr *avap;
2454         struct vattr ava;
2455         vnode_t *targvp = NULL;
2456         struct sockaddr *ca;
2457         char *name = NULL;
2458 
2459         bvap = NULL;
2460         avap = NULL;
2461 
2462         vp = nfs3_fhtovp(&args->object.dir, exi);
2463 
2464         DTRACE_NFSV3_5(op__remove__start, struct svc_req *, req,
2465             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
2466             REMOVE3args *, args);
2467 
2468         if (vp == NULL) {
2469                 error = ESTALE;
2470                 goto err;
2471         }
2472 
2473         bva.va_mask = AT_ALL;
2474         bvap = VOP_GETATTR(vp, &bva, 0, cr, NULL) ? NULL : &bva;
2475         avap = bvap;
2476 
2477         if (vp->v_type != VDIR) {
2478                 resp->status = NFS3ERR_NOTDIR;
2479                 goto err1;
2480         }
2481 
2482         if (args->object.name == nfs3nametoolong) {
2483                 resp->status = NFS3ERR_NAMETOOLONG;
2484                 goto err1;
2485         }
2486 
2487         if (args->object.name == NULL || *(args->object.name) == '\0') {
2488                 resp->status = NFS3ERR_ACCES;
2489                 goto err1;
2490         }
2491 
2492         if (rdonly(ro, vp)) {
2493                 resp->status = NFS3ERR_ROFS;
2494                 goto err1;
2495         }
2496 
2497         if (is_system_labeled()) {
2498                 bslabel_t *clabel = req->rq_label;
2499 
2500                 ASSERT(clabel != NULL);
2501                 DTRACE_PROBE2(tx__rfs3__log__info__opremove__clabel, char *,
2502                     "got client label from request(1)", struct svc_req *, req);
2503 
2504                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2505                         if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
2506                             exi)) {
2507                                 resp->status = NFS3ERR_ACCES;
2508                                 goto err1;
2509                         }
2510                 }
2511         }
2512 
2513         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2514         name = nfscmd_convname(ca, exi, args->object.name,
2515             NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2516 
2517         if (name == NULL) {
2518                 resp->status = NFS3ERR_INVAL;
2519                 goto err1;
2520         }
2521 
2522         /*
2523          * Check for a conflict with a non-blocking mandatory share
2524          * reservation and V4 delegations
2525          */
2526         error = VOP_LOOKUP(vp, name, &targvp, NULL, 0,
2527             NULL, cr, NULL, NULL, NULL);
2528         if (error != 0)
2529                 goto err;
2530 
2531         if (rfs4_check_delegated(FWRITE, targvp, TRUE)) {
2532                 resp->status = NFS3ERR_JUKEBOX;
2533                 goto err1;
2534         }
2535 
2536         if (!nbl_need_check(targvp)) {
2537                 error = VOP_REMOVE(vp, name, cr, NULL, 0);
2538         } else {
2539                 nbl_start_crit(targvp, RW_READER);
2540                 if (nbl_conflict(targvp, NBL_REMOVE, 0, 0, 0, NULL)) {
2541                         error = EACCES;
2542                 } else {
2543                         error = VOP_REMOVE(vp, name, cr, NULL, 0);
2544                 }
2545                 nbl_end_crit(targvp);
2546         }
2547         VN_RELE(targvp);
2548         targvp = NULL;
2549 
2550         ava.va_mask = AT_ALL;
2551         avap = VOP_GETATTR(vp, &ava, 0, cr, NULL) ? NULL : &ava;
2552 
2553         /*
2554          * Force modified data and metadata out to stable storage.
2555          */
2556         (void) VOP_FSYNC(vp, 0, cr, NULL);
2557 
2558         if (error)
2559                 goto err;
2560 
2561         resp->status = NFS3_OK;
2562         vattr_to_wcc_data(bvap, avap, &resp->resok.dir_wcc);
2563         goto out;
2564 
2565 err:
2566         if (curthread->t_flag & T_WOULDBLOCK) {
2567                 curthread->t_flag &= ~T_WOULDBLOCK;
2568                 resp->status = NFS3ERR_JUKEBOX;
2569         } else
2570                 resp->status = puterrno3(error);
2571 err1:
2572         vattr_to_wcc_data(bvap, avap, &resp->resfail.dir_wcc);
2573 out:
2574         DTRACE_NFSV3_5(op__remove__done, struct svc_req *, req,
2575             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
2576             REMOVE3res *, resp);
2577 
2578         if (name != NULL && name != args->object.name)
2579                 kmem_free(name, MAXPATHLEN + 1);
2580 
2581         if (vp != NULL)
2582                 VN_RELE(vp);
2583 }
2584 
2585 void *
2586 rfs3_remove_getfh(REMOVE3args *args)
2587 {
2588 
2589         return (&args->object.dir);
2590 }
2591 
2592 void
2593 rfs3_rmdir(RMDIR3args *args, RMDIR3res *resp, struct exportinfo *exi,
2594     struct svc_req *req, cred_t *cr, bool_t ro)
2595 {
2596         int error;
2597         vnode_t *vp;
2598         struct vattr *bvap;
2599         struct vattr bva;
2600         struct vattr *avap;
2601         struct vattr ava;
2602         struct sockaddr *ca;
2603         char *name = NULL;
2604 
2605         bvap = NULL;
2606         avap = NULL;
2607 
2608         vp = nfs3_fhtovp(&args->object.dir, exi);
2609 
2610         DTRACE_NFSV3_5(op__rmdir__start, struct svc_req *, req,
2611             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
2612             RMDIR3args *, args);
2613 
2614         if (vp == NULL) {
2615                 error = ESTALE;
2616                 goto err;
2617         }
2618 
2619         bva.va_mask = AT_ALL;
2620         bvap = VOP_GETATTR(vp, &bva, 0, cr, NULL) ? NULL : &bva;
2621         avap = bvap;
2622 
2623         if (vp->v_type != VDIR) {
2624                 resp->status = NFS3ERR_NOTDIR;
2625                 goto err1;
2626         }
2627 
2628         if (args->object.name == nfs3nametoolong) {
2629                 resp->status = NFS3ERR_NAMETOOLONG;
2630                 goto err1;
2631         }
2632 
2633         if (args->object.name == NULL || *(args->object.name) == '\0') {
2634                 resp->status = NFS3ERR_ACCES;
2635                 goto err1;
2636         }
2637 
2638         if (rdonly(ro, vp)) {
2639                 resp->status = NFS3ERR_ROFS;
2640                 goto err1;
2641         }
2642 
2643         if (is_system_labeled()) {
2644                 bslabel_t *clabel = req->rq_label;
2645 
2646                 ASSERT(clabel != NULL);
2647                 DTRACE_PROBE2(tx__rfs3__log__info__opremovedir__clabel, char *,
2648                     "got client label from request(1)", struct svc_req *, req);
2649 
2650                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2651                         if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
2652                             exi)) {
2653                                 resp->status = NFS3ERR_ACCES;
2654                                 goto err1;
2655                         }
2656                 }
2657         }
2658 
2659         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2660         name = nfscmd_convname(ca, exi, args->object.name,
2661             NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2662 
2663         if (name == NULL) {
2664                 resp->status = NFS3ERR_INVAL;
2665                 goto err1;
2666         }
2667 
2668         error = VOP_RMDIR(vp, name, ZONE_ROOTVP(), cr, NULL, 0);
2669 
2670         if (name != args->object.name)
2671                 kmem_free(name, MAXPATHLEN + 1);
2672 
2673         ava.va_mask = AT_ALL;
2674         avap = VOP_GETATTR(vp, &ava, 0, cr, NULL) ? NULL : &ava;
2675 
2676         /*
2677          * Force modified data and metadata out to stable storage.
2678          */
2679         (void) VOP_FSYNC(vp, 0, cr, NULL);
2680 
2681         if (error) {
2682                 /*
2683                  * System V defines rmdir to return EEXIST, not ENOTEMPTY,
2684                  * if the directory is not empty.  A System V NFS server
2685                  * needs to map NFS3ERR_EXIST to NFS3ERR_NOTEMPTY to transmit
2686                  * over the wire.
2687                  */
2688                 if (error == EEXIST)
2689                         error = ENOTEMPTY;
2690                 goto err;
2691         }
2692 
2693         resp->status = NFS3_OK;
2694         vattr_to_wcc_data(bvap, avap, &resp->resok.dir_wcc);
2695         goto out;
2696 
2697 err:
2698         if (curthread->t_flag & T_WOULDBLOCK) {
2699                 curthread->t_flag &= ~T_WOULDBLOCK;
2700                 resp->status = NFS3ERR_JUKEBOX;
2701         } else
2702                 resp->status = puterrno3(error);
2703 err1:
2704         vattr_to_wcc_data(bvap, avap, &resp->resfail.dir_wcc);
2705 out:
2706         DTRACE_NFSV3_5(op__rmdir__done, struct svc_req *, req,
2707             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
2708             RMDIR3res *, resp);
2709         if (vp != NULL)
2710                 VN_RELE(vp);
2711 
2712 }
2713 
2714 void *
2715 rfs3_rmdir_getfh(RMDIR3args *args)
2716 {
2717 
2718         return (&args->object.dir);
2719 }
2720 
2721 void
2722 rfs3_rename(RENAME3args *args, RENAME3res *resp, struct exportinfo *exi,
2723     struct svc_req *req, cred_t *cr, bool_t ro)
2724 {
2725         int error = 0;
2726         vnode_t *fvp;
2727         vnode_t *tvp;
2728         vnode_t *targvp;
2729         struct vattr *fbvap;
2730         struct vattr fbva;
2731         struct vattr *favap;
2732         struct vattr fava;
2733         struct vattr *tbvap;
2734         struct vattr tbva;
2735         struct vattr *tavap;
2736         struct vattr tava;
2737         nfs_fh3 *fh3;
2738         struct exportinfo *to_exi;
2739         vnode_t *srcvp = NULL;
2740         bslabel_t *clabel;
2741         struct sockaddr *ca;
2742         char *name = NULL;
2743         char *toname = NULL;
2744 
2745         fbvap = NULL;
2746         favap = NULL;
2747         tbvap = NULL;
2748         tavap = NULL;
2749         tvp = NULL;
2750 
2751         fvp = nfs3_fhtovp(&args->from.dir, exi);
2752 
2753         DTRACE_NFSV3_5(op__rename__start, struct svc_req *, req,
2754             cred_t *, cr, vnode_t *, fvp, struct exportinfo *, exi,
2755             RENAME3args *, args);
2756 
2757         if (fvp == NULL) {
2758                 error = ESTALE;
2759                 goto err;
2760         }
2761 
2762         if (is_system_labeled()) {
2763                 clabel = req->rq_label;
2764                 ASSERT(clabel != NULL);
2765                 DTRACE_PROBE2(tx__rfs3__log__info__oprename__clabel, char *,
2766                     "got client label from request(1)", struct svc_req *, req);
2767 
2768                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2769                         if (!do_rfs_label_check(clabel, fvp, EQUALITY_CHECK,
2770                             exi)) {
2771                                 resp->status = NFS3ERR_ACCES;
2772                                 goto err1;
2773                         }
2774                 }
2775         }
2776 
2777         fbva.va_mask = AT_ALL;
2778         fbvap = VOP_GETATTR(fvp, &fbva, 0, cr, NULL) ? NULL : &fbva;
2779         favap = fbvap;
2780 
2781         fh3 = &args->to.dir;
2782         to_exi = checkexport(&fh3->fh3_fsid, FH3TOXFIDP(fh3));
2783         if (to_exi == NULL) {
2784                 resp->status = NFS3ERR_ACCES;
2785                 goto err1;
2786         }
2787         exi_rele(to_exi);
2788 
2789         if (to_exi != exi) {
2790                 resp->status = NFS3ERR_XDEV;
2791                 goto err1;
2792         }
2793 
2794         tvp = nfs3_fhtovp(&args->to.dir, exi);
2795         if (tvp == NULL) {
2796                 error = ESTALE;
2797                 goto err;
2798         }
2799 
2800         tbva.va_mask = AT_ALL;
2801         tbvap = VOP_GETATTR(tvp, &tbva, 0, cr, NULL) ? NULL : &tbva;
2802         tavap = tbvap;
2803 
2804         if (fvp->v_type != VDIR || tvp->v_type != VDIR) {
2805                 resp->status = NFS3ERR_NOTDIR;
2806                 goto err1;
2807         }
2808 
2809         if (args->from.name == nfs3nametoolong ||
2810             args->to.name == nfs3nametoolong) {
2811                 resp->status = NFS3ERR_NAMETOOLONG;
2812                 goto err1;
2813         }
2814         if (args->from.name == NULL || *(args->from.name) == '\0' ||
2815             args->to.name == NULL || *(args->to.name) == '\0') {
2816                 resp->status = NFS3ERR_ACCES;
2817                 goto err1;
2818         }
2819 
2820         if (rdonly(ro, tvp)) {
2821                 resp->status = NFS3ERR_ROFS;
2822                 goto err1;
2823         }
2824 
2825         if (is_system_labeled()) {
2826                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2827                         if (!do_rfs_label_check(clabel, tvp, EQUALITY_CHECK,
2828                             exi)) {
2829                                 resp->status = NFS3ERR_ACCES;
2830                                 goto err1;
2831                         }
2832                 }
2833         }
2834 
2835         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2836         name = nfscmd_convname(ca, exi, args->from.name,
2837             NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2838 
2839         if (name == NULL) {
2840                 resp->status = NFS3ERR_INVAL;
2841                 goto err1;
2842         }
2843 
2844         toname = nfscmd_convname(ca, exi, args->to.name,
2845             NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2846 
2847         if (toname == NULL) {
2848                 resp->status = NFS3ERR_INVAL;
2849                 goto err1;
2850         }
2851 
2852         /*
2853          * Check for a conflict with a non-blocking mandatory share
2854          * reservation or V4 delegations.
2855          */
2856         error = VOP_LOOKUP(fvp, name, &srcvp, NULL, 0,
2857             NULL, cr, NULL, NULL, NULL);
2858         if (error != 0)
2859                 goto err;
2860 
2861         /*
2862          * If we rename a delegated file we should recall the
2863          * delegation, since future opens should fail or would
2864          * refer to a new file.
2865          */
2866         if (rfs4_check_delegated(FWRITE, srcvp, FALSE)) {
2867                 resp->status = NFS3ERR_JUKEBOX;
2868                 goto err1;
2869         }
2870 
2871         /*
2872          * Check for renaming over a delegated file.  Check nfs4_deleg_policy
2873          * first to avoid VOP_LOOKUP if possible.
2874          */
2875         if (nfs4_get_deleg_policy() != SRV_NEVER_DELEGATE &&
2876             VOP_LOOKUP(tvp, toname, &targvp, NULL, 0, NULL, cr,
2877             NULL, NULL, NULL) == 0) {
2878 
2879                 if (rfs4_check_delegated(FWRITE, targvp, TRUE)) {
2880                         VN_RELE(targvp);
2881                         resp->status = NFS3ERR_JUKEBOX;
2882                         goto err1;
2883                 }
2884                 VN_RELE(targvp);
2885         }
2886 
2887         if (!nbl_need_check(srcvp)) {
2888                 error = VOP_RENAME(fvp, name, tvp, toname, cr, NULL, 0);
2889         } else {
2890                 nbl_start_crit(srcvp, RW_READER);
2891                 if (nbl_conflict(srcvp, NBL_RENAME, 0, 0, 0, NULL))
2892                         error = EACCES;
2893                 else
2894                         error = VOP_RENAME(fvp, name, tvp, toname, cr, NULL, 0);
2895                 nbl_end_crit(srcvp);
2896         }
2897         if (error == 0)
2898                 vn_renamepath(tvp, srcvp, args->to.name,
2899                     strlen(args->to.name));
2900         VN_RELE(srcvp);
2901         srcvp = NULL;
2902 
2903         fava.va_mask = AT_ALL;
2904         favap = VOP_GETATTR(fvp, &fava, 0, cr, NULL) ? NULL : &fava;
2905         tava.va_mask = AT_ALL;
2906         tavap = VOP_GETATTR(tvp, &tava, 0, cr, NULL) ? NULL : &tava;
2907 
2908         /*
2909          * Force modified data and metadata out to stable storage.
2910          */
2911         (void) VOP_FSYNC(fvp, 0, cr, NULL);
2912         (void) VOP_FSYNC(tvp, 0, cr, NULL);
2913 
2914         if (error)
2915                 goto err;
2916 
2917         resp->status = NFS3_OK;
2918         vattr_to_wcc_data(fbvap, favap, &resp->resok.fromdir_wcc);
2919         vattr_to_wcc_data(tbvap, tavap, &resp->resok.todir_wcc);
2920         goto out;
2921 
2922 err:
2923         if (curthread->t_flag & T_WOULDBLOCK) {
2924                 curthread->t_flag &= ~T_WOULDBLOCK;
2925                 resp->status = NFS3ERR_JUKEBOX;
2926         } else {
2927                 resp->status = puterrno3(error);
2928         }
2929 err1:
2930         vattr_to_wcc_data(fbvap, favap, &resp->resfail.fromdir_wcc);
2931         vattr_to_wcc_data(tbvap, tavap, &resp->resfail.todir_wcc);
2932 
2933 out:
2934         if (name != NULL && name != args->from.name)
2935                 kmem_free(name, MAXPATHLEN + 1);
2936         if (toname != NULL && toname != args->to.name)
2937                 kmem_free(toname, MAXPATHLEN + 1);
2938 
2939         DTRACE_NFSV3_5(op__rename__done, struct svc_req *, req,
2940             cred_t *, cr, vnode_t *, fvp, struct exportinfo *, exi,
2941             RENAME3res *, resp);
2942         if (fvp != NULL)
2943                 VN_RELE(fvp);
2944         if (tvp != NULL)
2945                 VN_RELE(tvp);
2946 }
2947 
2948 void *
2949 rfs3_rename_getfh(RENAME3args *args)
2950 {
2951 
2952         return (&args->from.dir);
2953 }
2954 
2955 void
2956 rfs3_link(LINK3args *args, LINK3res *resp, struct exportinfo *exi,
2957     struct svc_req *req, cred_t *cr, bool_t ro)
2958 {
2959         int error;
2960         vnode_t *vp;
2961         vnode_t *dvp;
2962         struct vattr *vap;
2963         struct vattr va;
2964         struct vattr *bvap;
2965         struct vattr bva;
2966         struct vattr *avap;
2967         struct vattr ava;
2968         nfs_fh3 *fh3;
2969         struct exportinfo *to_exi;
2970         bslabel_t *clabel;
2971         struct sockaddr *ca;
2972         char *name = NULL;
2973 
2974         vap = NULL;
2975         bvap = NULL;
2976         avap = NULL;
2977         dvp = NULL;
2978 
2979         vp = nfs3_fhtovp(&args->file, exi);
2980 
2981         DTRACE_NFSV3_5(op__link__start, struct svc_req *, req,
2982             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
2983             LINK3args *, args);
2984 
2985         if (vp == NULL) {
2986                 error = ESTALE;
2987                 goto out;
2988         }
2989 
2990         va.va_mask = AT_ALL;
2991         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
2992 
2993         fh3 = &args->link.dir;
2994         to_exi = checkexport(&fh3->fh3_fsid, FH3TOXFIDP(fh3));
2995         if (to_exi == NULL) {
2996                 resp->status = NFS3ERR_ACCES;
2997                 goto out1;
2998         }
2999         exi_rele(to_exi);
3000 
3001         if (to_exi != exi) {
3002                 resp->status = NFS3ERR_XDEV;
3003                 goto out1;
3004         }
3005 
3006         if (is_system_labeled()) {
3007                 clabel = req->rq_label;
3008 
3009                 ASSERT(clabel != NULL);
3010                 DTRACE_PROBE2(tx__rfs3__log__info__oplink__clabel, char *,
3011                     "got client label from request(1)", struct svc_req *, req);
3012 
3013                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3014                         if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3015                             exi)) {
3016                                 resp->status = NFS3ERR_ACCES;
3017                                 goto out1;
3018                         }
3019                 }
3020         }
3021 
3022         dvp = nfs3_fhtovp(&args->link.dir, exi);
3023         if (dvp == NULL) {
3024                 error = ESTALE;
3025                 goto out;
3026         }
3027 
3028         bva.va_mask = AT_ALL;
3029         bvap = VOP_GETATTR(dvp, &bva, 0, cr, NULL) ? NULL : &bva;
3030 
3031         if (dvp->v_type != VDIR) {
3032                 resp->status = NFS3ERR_NOTDIR;
3033                 goto out1;
3034         }
3035 
3036         if (args->link.name == nfs3nametoolong) {
3037                 resp->status = NFS3ERR_NAMETOOLONG;
3038                 goto out1;
3039         }
3040 
3041         if (args->link.name == NULL || *(args->link.name) == '\0') {
3042                 resp->status = NFS3ERR_ACCES;
3043                 goto out1;
3044         }
3045 
3046         if (rdonly(ro, dvp)) {
3047                 resp->status = NFS3ERR_ROFS;
3048                 goto out1;
3049         }
3050 
3051         if (is_system_labeled()) {
3052                 DTRACE_PROBE2(tx__rfs3__log__info__oplinkdir__clabel, char *,
3053                     "got client label from request(1)", struct svc_req *, req);
3054 
3055                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3056                         if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
3057                             exi)) {
3058                                 resp->status = NFS3ERR_ACCES;
3059                                 goto out1;
3060                         }
3061                 }
3062         }
3063 
3064         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
3065         name = nfscmd_convname(ca, exi, args->link.name,
3066             NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
3067 
3068         if (name == NULL) {
3069                 resp->status = NFS3ERR_SERVERFAULT;
3070                 goto out1;
3071         }
3072 
3073         error = VOP_LINK(dvp, vp, name, cr, NULL, 0);
3074 
3075         va.va_mask = AT_ALL;
3076         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3077         ava.va_mask = AT_ALL;
3078         avap = VOP_GETATTR(dvp, &ava, 0, cr, NULL) ? NULL : &ava;
3079 
3080         /*
3081          * Force modified data and metadata out to stable storage.
3082          */
3083         (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
3084         (void) VOP_FSYNC(dvp, 0, cr, NULL);
3085 
3086         if (error)
3087                 goto out;
3088 
3089         VN_RELE(dvp);
3090 
3091         resp->status = NFS3_OK;
3092         vattr_to_post_op_attr(vap, &resp->resok.file_attributes);
3093         vattr_to_wcc_data(bvap, avap, &resp->resok.linkdir_wcc);
3094 
3095         DTRACE_NFSV3_5(op__link__done, struct svc_req *, req,
3096             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3097             LINK3res *, resp);
3098 
3099         VN_RELE(vp);
3100 
3101         return;
3102 
3103 out:
3104         if (curthread->t_flag & T_WOULDBLOCK) {
3105                 curthread->t_flag &= ~T_WOULDBLOCK;
3106                 resp->status = NFS3ERR_JUKEBOX;
3107         } else
3108                 resp->status = puterrno3(error);
3109 out1:
3110         if (name != NULL && name != args->link.name)
3111                 kmem_free(name, MAXPATHLEN + 1);
3112 
3113         DTRACE_NFSV3_5(op__link__done, struct svc_req *, req,
3114             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3115             LINK3res *, resp);
3116 
3117         if (vp != NULL)
3118                 VN_RELE(vp);
3119         if (dvp != NULL)
3120                 VN_RELE(dvp);
3121         vattr_to_post_op_attr(vap, &resp->resfail.file_attributes);
3122         vattr_to_wcc_data(bvap, avap, &resp->resfail.linkdir_wcc);
3123 }
3124 
3125 void *
3126 rfs3_link_getfh(LINK3args *args)
3127 {
3128 
3129         return (&args->file);
3130 }
3131 
3132 /*
3133  * This macro defines the size of a response which contains attribute
3134  * information and one directory entry (whose length is specified by
3135  * the macro parameter).  If the incoming request is larger than this,
3136  * then we are guaranteed to be able to return at one directory entry
3137  * if one exists.  Therefore, we do not need to check for
3138  * NFS3ERR_TOOSMALL if the requested size is larger then this.  If it
3139  * is not, then we need to check to make sure that this error does not
3140  * need to be returned.
3141  *
3142  * NFS3_READDIR_MIN_COUNT is comprised of following :
3143  *
3144  * status - 1 * BYTES_PER_XDR_UNIT
3145  * attr. flag - 1 * BYTES_PER_XDR_UNIT
3146  * cookie verifier - 2 * BYTES_PER_XDR_UNIT
3147  * attributes  - NFS3_SIZEOF_FATTR3 * BYTES_PER_XDR_UNIT
3148  * boolean - 1 * BYTES_PER_XDR_UNIT
3149  * file id - 2 * BYTES_PER_XDR_UNIT
3150  * directory name length - 1 * BYTES_PER_XDR_UNIT
3151  * cookie - 2 * BYTES_PER_XDR_UNIT
3152  * end of list - 1 * BYTES_PER_XDR_UNIT
3153  * end of file - 1 * BYTES_PER_XDR_UNIT
3154  * Name length of directory to the nearest byte
3155  */
3156 
3157 #define NFS3_READDIR_MIN_COUNT(length)  \
3158         ((1 + 1 + 2 + NFS3_SIZEOF_FATTR3 + 1 + 2 + 1 + 2 + 1 + 1) * \
3159                 BYTES_PER_XDR_UNIT + roundup((length), BYTES_PER_XDR_UNIT))
3160 
3161 /* ARGSUSED */
3162 void
3163 rfs3_readdir(READDIR3args *args, READDIR3res *resp, struct exportinfo *exi,
3164     struct svc_req *req, cred_t *cr, bool_t ro)
3165 {
3166         int error;
3167         vnode_t *vp;
3168         struct vattr *vap;
3169         struct vattr va;
3170         struct iovec iov;
3171         struct uio uio;
3172         char *data;
3173         int iseof;
3174         int bufsize;
3175         int namlen;
3176         uint_t count;
3177         struct sockaddr *ca;
3178 
3179         vap = NULL;
3180 
3181         vp = nfs3_fhtovp(&args->dir, exi);
3182 
3183         DTRACE_NFSV3_5(op__readdir__start, struct svc_req *, req,
3184             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3185             READDIR3args *, args);
3186 
3187         if (vp == NULL) {
3188                 error = ESTALE;
3189                 goto out;
3190         }
3191 
3192         if (is_system_labeled()) {
3193                 bslabel_t *clabel = req->rq_label;
3194 
3195                 ASSERT(clabel != NULL);
3196                 DTRACE_PROBE2(tx__rfs3__log__info__opreaddir__clabel, char *,
3197                     "got client label from request(1)", struct svc_req *, req);
3198 
3199                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3200                         if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3201                             exi)) {
3202                                 resp->status = NFS3ERR_ACCES;
3203                                 goto out1;
3204                         }
3205                 }
3206         }
3207 
3208         (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL);
3209 
3210         va.va_mask = AT_ALL;
3211         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3212 
3213         if (vp->v_type != VDIR) {
3214                 resp->status = NFS3ERR_NOTDIR;
3215                 goto out1;
3216         }
3217 
3218         error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
3219         if (error)
3220                 goto out;
3221 
3222         /*
3223          * Now don't allow arbitrary count to alloc;
3224          * allow the maximum not to exceed rfs3_tsize()
3225          */
3226         if (args->count > rfs3_tsize(req))
3227                 args->count = rfs3_tsize(req);
3228 
3229         /*
3230          * Make sure that there is room to read at least one entry
3231          * if any are available.
3232          */
3233         if (args->count < DIRENT64_RECLEN(MAXNAMELEN))
3234                 count = DIRENT64_RECLEN(MAXNAMELEN);
3235         else
3236                 count = args->count;
3237 
3238         data = kmem_alloc(count, KM_SLEEP);
3239 
3240         iov.iov_base = data;
3241         iov.iov_len = count;
3242         uio.uio_iov = &iov;
3243         uio.uio_iovcnt = 1;
3244         uio.uio_segflg = UIO_SYSSPACE;
3245         uio.uio_extflg = UIO_COPY_CACHED;
3246         uio.uio_loffset = (offset_t)args->cookie;
3247         uio.uio_resid = count;
3248 
3249         error = VOP_READDIR(vp, &uio, cr, &iseof, NULL, 0);
3250 
3251         va.va_mask = AT_ALL;
3252         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3253 
3254         if (error) {
3255                 kmem_free(data, count);
3256                 goto out;
3257         }
3258 
3259         /*
3260          * If the count was not large enough to be able to guarantee
3261          * to be able to return at least one entry, then need to
3262          * check to see if NFS3ERR_TOOSMALL should be returned.
3263          */
3264         if (args->count < NFS3_READDIR_MIN_COUNT(MAXNAMELEN)) {
3265                 /*
3266                  * bufsize is used to keep track of the size of the response.
3267                  * It is primed with:
3268                  *      1 for the status +
3269                  *      1 for the dir_attributes.attributes boolean +
3270                  *      2 for the cookie verifier
3271                  * all times BYTES_PER_XDR_UNIT to convert from XDR units
3272                  * to bytes.  If there are directory attributes to be
3273                  * returned, then:
3274                  *      NFS3_SIZEOF_FATTR3 for the dir_attributes.attr fattr3
3275                  * time BYTES_PER_XDR_UNIT is added to account for them.
3276                  */
3277                 bufsize = (1 + 1 + 2) * BYTES_PER_XDR_UNIT;
3278                 if (vap != NULL)
3279                         bufsize += NFS3_SIZEOF_FATTR3 * BYTES_PER_XDR_UNIT;
3280                 /*
3281                  * An entry is composed of:
3282                  *      1 for the true/false list indicator +
3283                  *      2 for the fileid +
3284                  *      1 for the length of the name +
3285                  *      2 for the cookie +
3286                  * all times BYTES_PER_XDR_UNIT to convert from
3287                  * XDR units to bytes, plus the length of the name
3288                  * rounded up to the nearest BYTES_PER_XDR_UNIT.
3289                  */
3290                 if (count != uio.uio_resid) {
3291                         namlen = strlen(((struct dirent64 *)data)->d_name);
3292                         bufsize += (1 + 2 + 1 + 2) * BYTES_PER_XDR_UNIT +
3293                             roundup(namlen, BYTES_PER_XDR_UNIT);
3294                 }
3295                 /*
3296                  * We need to check to see if the number of bytes left
3297                  * to go into the buffer will actually fit into the
3298                  * buffer.  This is calculated as the size of this
3299                  * entry plus:
3300                  *      1 for the true/false list indicator +
3301                  *      1 for the eof indicator
3302                  * times BYTES_PER_XDR_UNIT to convert from from
3303                  * XDR units to bytes.
3304                  */
3305                 bufsize += (1 + 1) * BYTES_PER_XDR_UNIT;
3306                 if (bufsize > args->count) {
3307                         kmem_free(data, count);
3308                         resp->status = NFS3ERR_TOOSMALL;
3309                         goto out1;
3310                 }
3311         }
3312 
3313         /*
3314          * Have a valid readir buffer for the native character
3315          * set. Need to check if a conversion is necessary and
3316          * potentially rewrite the whole buffer. Note that if the
3317          * conversion expands names enough, the structure may not
3318          * fit. In this case, we need to drop entries until if fits
3319          * and patch the counts in order that the next readdir will
3320          * get the correct entries.
3321          */
3322         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
3323         data = nfscmd_convdirent(ca, exi, data, count, &resp->status);
3324 
3325 
3326         VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
3327 
3328 #if 0 /* notyet */
3329         /*
3330          * Don't do this.  It causes local disk writes when just
3331          * reading the file and the overhead is deemed larger
3332          * than the benefit.
3333          */
3334         /*
3335          * Force modified metadata out to stable storage.
3336          */
3337         (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
3338 #endif
3339 
3340         resp->status = NFS3_OK;
3341         vattr_to_post_op_attr(vap, &resp->resok.dir_attributes);
3342         resp->resok.cookieverf = 0;
3343         resp->resok.reply.entries = (entry3 *)data;
3344         resp->resok.reply.eof = iseof;
3345         resp->resok.size = count - uio.uio_resid;
3346         resp->resok.count = args->count;
3347         resp->resok.freecount = count;
3348 
3349         DTRACE_NFSV3_5(op__readdir__done, struct svc_req *, req,
3350             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3351             READDIR3res *, resp);
3352 
3353         VN_RELE(vp);
3354 
3355         return;
3356 
3357 out:
3358         if (curthread->t_flag & T_WOULDBLOCK) {
3359                 curthread->t_flag &= ~T_WOULDBLOCK;
3360                 resp->status = NFS3ERR_JUKEBOX;
3361         } else
3362                 resp->status = puterrno3(error);
3363 out1:
3364         vattr_to_post_op_attr(vap, &resp->resfail.dir_attributes);
3365 
3366         DTRACE_NFSV3_5(op__readdir__done, struct svc_req *, req,
3367             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3368             READDIR3res *, resp);
3369 
3370         if (vp != NULL) {
3371                 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
3372                 VN_RELE(vp);
3373         }
3374         vattr_to_post_op_attr(vap, &resp->resfail.dir_attributes);
3375 }
3376 
3377 void *
3378 rfs3_readdir_getfh(READDIR3args *args)
3379 {
3380 
3381         return (&args->dir);
3382 }
3383 
3384 void
3385 rfs3_readdir_free(READDIR3res *resp)
3386 {
3387 
3388         if (resp->status == NFS3_OK)
3389                 kmem_free(resp->resok.reply.entries, resp->resok.freecount);
3390 }
3391 
3392 #ifdef nextdp
3393 #undef nextdp
3394 #endif
3395 #define nextdp(dp)      ((struct dirent64 *)((char *)(dp) + (dp)->d_reclen))
3396 
3397 /*
3398  * This macro computes the size of a response which contains
3399  * one directory entry including the attributes as well as file handle.
3400  * If the incoming request is larger than this, then we are guaranteed to be
3401  * able to return at least one more directory entry if one exists.
3402  *
3403  * NFS3_READDIRPLUS_ENTRY is made up of the following:
3404  *
3405  * boolean - 1 * BYTES_PER_XDR_UNIT
3406  * file id - 2 * BYTES_PER_XDR_UNIT
3407  * directory name length - 1 * BYTES_PER_XDR_UNIT
3408  * cookie - 2 * BYTES_PER_XDR_UNIT
3409  * attribute flag - 1 * BYTES_PER_XDR_UNIT
3410  * attributes - NFS3_SIZEOF_FATTR3 * BYTES_PER_XDR_UNIT
3411  * status byte for file handle - 1 *  BYTES_PER_XDR_UNIT
3412  * length of a file handle - 1 * BYTES_PER_XDR_UNIT
3413  * Maximum length of a file handle (NFS3_MAXFHSIZE)
3414  * name length of the entry to the nearest bytes
3415  */
3416 #define NFS3_READDIRPLUS_ENTRY(namelen) \
3417         ((1 + 2 + 1 + 2 + 1 + NFS3_SIZEOF_FATTR3 + 1 + 1) * \
3418                 BYTES_PER_XDR_UNIT + \
3419         NFS3_MAXFHSIZE + roundup(namelen, BYTES_PER_XDR_UNIT))
3420 
3421 static int rfs3_readdir_unit = MAXBSIZE;
3422 
3423 /* ARGSUSED */
3424 void
3425 rfs3_readdirplus(READDIRPLUS3args *args, READDIRPLUS3res *resp,
3426     struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
3427 {
3428         int error;
3429         vnode_t *vp;
3430         struct vattr *vap;
3431         struct vattr va;
3432         struct iovec iov;
3433         struct uio uio;
3434         char *data;
3435         int iseof;
3436         struct dirent64 *dp;
3437         vnode_t *nvp;
3438         struct vattr *nvap;
3439         struct vattr nva;
3440         entryplus3_info *infop = NULL;
3441         int size = 0;
3442         int nents = 0;
3443         int bufsize = 0;
3444         int entrysize = 0;
3445         int tofit = 0;
3446         int rd_unit = rfs3_readdir_unit;
3447         int prev_len;
3448         int space_left;
3449         int i;
3450         uint_t *namlen = NULL;
3451         char *ndata = NULL;
3452         struct sockaddr *ca;
3453         size_t ret;
3454 
3455         vap = NULL;
3456 
3457         vp = nfs3_fhtovp(&args->dir, exi);
3458 
3459         DTRACE_NFSV3_5(op__readdirplus__start, struct svc_req *, req,
3460             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3461             READDIRPLUS3args *, args);
3462 
3463         if (vp == NULL) {
3464                 error = ESTALE;
3465                 goto out;
3466         }
3467 
3468         if (is_system_labeled()) {
3469                 bslabel_t *clabel = req->rq_label;
3470 
3471                 ASSERT(clabel != NULL);
3472                 DTRACE_PROBE2(tx__rfs3__log__info__opreaddirplus__clabel,
3473                     char *, "got client label from request(1)",
3474                     struct svc_req *, req);
3475 
3476                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3477                         if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3478                             exi)) {
3479                                 resp->status = NFS3ERR_ACCES;
3480                                 goto out1;
3481                         }
3482                 }
3483         }
3484 
3485         (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL);
3486 
3487         va.va_mask = AT_ALL;
3488         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3489 
3490         if (vp->v_type != VDIR) {
3491                 error = ENOTDIR;
3492                 goto out;
3493         }
3494 
3495         error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
3496         if (error)
3497                 goto out;
3498 
3499         /*
3500          * Don't allow arbitrary counts for allocation
3501          */
3502         if (args->maxcount > rfs3_tsize(req))
3503                 args->maxcount = rfs3_tsize(req);
3504 
3505         /*
3506          * Make sure that there is room to read at least one entry
3507          * if any are available
3508          */
3509         args->dircount = MIN(args->dircount, args->maxcount);
3510 
3511         if (args->dircount < DIRENT64_RECLEN(MAXNAMELEN))
3512                 args->dircount = DIRENT64_RECLEN(MAXNAMELEN);
3513 
3514         /*
3515          * This allocation relies on a minimum directory entry
3516          * being roughly 24 bytes.  Therefore, the namlen array
3517          * will have enough space based on the maximum number of
3518          * entries to read.
3519          */
3520         namlen = kmem_alloc(args->dircount, KM_SLEEP);
3521 
3522         space_left = args->dircount;
3523         data = kmem_alloc(args->dircount, KM_SLEEP);
3524         dp = (struct dirent64 *)data;
3525         uio.uio_iov = &iov;
3526         uio.uio_iovcnt = 1;
3527         uio.uio_segflg = UIO_SYSSPACE;
3528         uio.uio_extflg = UIO_COPY_CACHED;
3529         uio.uio_loffset = (offset_t)args->cookie;
3530 
3531         /*
3532          * bufsize is used to keep track of the size of the response as we
3533          * get post op attributes and filehandles for each entry.  This is
3534          * an optimization as the server may have read more entries than will
3535          * fit in the buffer specified by maxcount.  We stop calculating
3536          * post op attributes and filehandles once we have exceeded maxcount.
3537          * This will minimize the effect of truncation.
3538          *
3539          * It is primed with:
3540          *      1 for the status +
3541          *      1 for the dir_attributes.attributes boolean +
3542          *      2 for the cookie verifier
3543          * all times BYTES_PER_XDR_UNIT to convert from XDR units
3544          * to bytes.  If there are directory attributes to be
3545          * returned, then:
3546          *      NFS3_SIZEOF_FATTR3 for the dir_attributes.attr fattr3
3547          * time BYTES_PER_XDR_UNIT is added to account for them.
3548          */
3549         bufsize = (1 + 1 + 2) * BYTES_PER_XDR_UNIT;
3550         if (vap != NULL)
3551                 bufsize += NFS3_SIZEOF_FATTR3 * BYTES_PER_XDR_UNIT;
3552 
3553 getmoredents:
3554         /*
3555          * Here we make a check so that our read unit is not larger than
3556          * the space left in the buffer.
3557          */
3558         rd_unit = MIN(rd_unit, space_left);
3559         iov.iov_base = (char *)dp;
3560         iov.iov_len = rd_unit;
3561         uio.uio_resid = rd_unit;
3562         prev_len = rd_unit;
3563 
3564         error = VOP_READDIR(vp, &uio, cr, &iseof, NULL, 0);
3565 
3566         if (error) {
3567                 kmem_free(data, args->dircount);
3568                 goto out;
3569         }
3570 
3571         if (uio.uio_resid == prev_len && !iseof) {
3572                 if (nents == 0) {
3573                         kmem_free(data, args->dircount);
3574                         resp->status = NFS3ERR_TOOSMALL;
3575                         goto out1;
3576                 }
3577 
3578                 /*
3579                  * We could not get any more entries, so get the attributes
3580                  * and filehandle for the entries already obtained.
3581                  */
3582                 goto good;
3583         }
3584 
3585         /*
3586          * We estimate the size of the response by assuming the
3587          * entry exists and attributes and filehandle are also valid
3588          */
3589         for (size = prev_len - uio.uio_resid;
3590             size > 0;
3591             size -= dp->d_reclen, dp = nextdp(dp)) {
3592 
3593                 if (dp->d_ino == 0) {
3594                         nents++;
3595                         continue;
3596                 }
3597 
3598                 namlen[nents] = strlen(dp->d_name);
3599                 entrysize = NFS3_READDIRPLUS_ENTRY(namlen[nents]);
3600 
3601                 /*
3602                  * We need to check to see if the number of bytes left
3603                  * to go into the buffer will actually fit into the
3604                  * buffer.  This is calculated as the size of this
3605                  * entry plus:
3606                  *      1 for the true/false list indicator +
3607                  *      1 for the eof indicator
3608                  * times BYTES_PER_XDR_UNIT to convert from XDR units
3609                  * to bytes.
3610                  *
3611                  * Also check the dircount limit against the first entry read
3612                  *
3613                  */
3614                 tofit = entrysize + (1 + 1) * BYTES_PER_XDR_UNIT;
3615                 if (bufsize + tofit > args->maxcount) {
3616                         /*
3617                          * We make a check here to see if this was the
3618                          * first entry being measured.  If so, then maxcount
3619                          * was too small to begin with and so we need to
3620                          * return with NFS3ERR_TOOSMALL.
3621                          */
3622                         if (nents == 0) {
3623                                 kmem_free(data, args->dircount);
3624                                 resp->status = NFS3ERR_TOOSMALL;
3625                                 goto out1;
3626                         }
3627                         iseof = FALSE;
3628                         goto good;
3629                 }
3630                 bufsize += entrysize;
3631                 nents++;
3632         }
3633 
3634         /*
3635          * If there is enough room to fit at least 1 more entry including
3636          * post op attributes and filehandle in the buffer AND that we haven't
3637          * exceeded dircount then go back and get some more.
3638          */
3639         if (!iseof &&
3640             (args->maxcount - bufsize) >= NFS3_READDIRPLUS_ENTRY(MAXNAMELEN)) {
3641                 space_left -= (prev_len - uio.uio_resid);
3642                 if (space_left >= DIRENT64_RECLEN(MAXNAMELEN))
3643                         goto getmoredents;
3644 
3645                 /* else, fall through */
3646         }
3647 good:
3648         va.va_mask = AT_ALL;
3649         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3650 
3651         VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
3652 
3653         infop = kmem_alloc(nents * sizeof (struct entryplus3_info), KM_SLEEP);
3654         resp->resok.infop = infop;
3655 
3656         dp = (struct dirent64 *)data;
3657         for (i = 0; i < nents; i++) {
3658 
3659                 if (dp->d_ino == 0) {
3660                         infop[i].attr.attributes = FALSE;
3661                         infop[i].fh.handle_follows = FALSE;
3662                         dp = nextdp(dp);
3663                         continue;
3664                 }
3665 
3666                 infop[i].namelen = namlen[i];
3667 
3668                 error = VOP_LOOKUP(vp, dp->d_name, &nvp, NULL, 0, NULL, cr,
3669                     NULL, NULL, NULL);
3670                 if (error) {
3671                         infop[i].attr.attributes = FALSE;
3672                         infop[i].fh.handle_follows = FALSE;
3673                         dp = nextdp(dp);
3674                         continue;
3675                 }
3676 
3677                 nva.va_mask = AT_ALL;
3678                 nvap = rfs4_delegated_getattr(nvp, &nva, 0, cr) ? NULL : &nva;
3679 
3680                 /* Lie about the object type for a referral */
3681                 if (vn_is_nfs_reparse(nvp, cr))
3682                         nvap->va_type = VLNK;
3683 
3684                 if (vn_ismntpt(nvp)) {
3685                         infop[i].attr.attributes = FALSE;
3686                         infop[i].fh.handle_follows = FALSE;
3687                 } else {
3688                         vattr_to_post_op_attr(nvap, &infop[i].attr);
3689 
3690                         error = makefh3(&infop[i].fh.handle, nvp, exi);
3691                         if (!error)
3692                                 infop[i].fh.handle_follows = TRUE;
3693                         else
3694                                 infop[i].fh.handle_follows = FALSE;
3695                 }
3696 
3697                 VN_RELE(nvp);
3698                 dp = nextdp(dp);
3699         }
3700 
3701         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
3702         ret = nfscmd_convdirplus(ca, exi, data, nents, args->dircount, &ndata);
3703         if (ndata == NULL)
3704                 ndata = data;
3705 
3706         if (ret > 0) {
3707                 /*
3708                  * We had to drop one or more entries in order to fit
3709                  * during the character conversion.  We need to patch
3710                  * up the size and eof info.
3711                  */
3712                 if (iseof)
3713                         iseof = FALSE;
3714 
3715                 ret = nfscmd_dropped_entrysize((struct dirent64 *)data,
3716                     nents, ret);
3717         }
3718 
3719 
3720 #if 0 /* notyet */
3721         /*
3722          * Don't do this.  It causes local disk writes when just
3723          * reading the file and the overhead is deemed larger
3724          * than the benefit.
3725          */
3726         /*
3727          * Force modified metadata out to stable storage.
3728          */
3729         (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
3730 #endif
3731 
3732         kmem_free(namlen, args->dircount);
3733 
3734         resp->status = NFS3_OK;
3735         vattr_to_post_op_attr(vap, &resp->resok.dir_attributes);
3736         resp->resok.cookieverf = 0;
3737         resp->resok.reply.entries = (entryplus3 *)ndata;
3738         resp->resok.reply.eof = iseof;
3739         resp->resok.size = nents;
3740         resp->resok.count = args->dircount - ret;
3741         resp->resok.maxcount = args->maxcount;
3742 
3743         DTRACE_NFSV3_5(op__readdirplus__done, struct svc_req *, req,
3744             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3745             READDIRPLUS3res *, resp);
3746 
3747         VN_RELE(vp);
3748 
3749         return;
3750 
3751 out:
3752         if (curthread->t_flag & T_WOULDBLOCK) {
3753                 curthread->t_flag &= ~T_WOULDBLOCK;
3754                 resp->status = NFS3ERR_JUKEBOX;
3755         } else {
3756                 resp->status = puterrno3(error);
3757         }
3758 out1:
3759         vattr_to_post_op_attr(vap, &resp->resfail.dir_attributes);
3760 
3761         DTRACE_NFSV3_5(op__readdirplus__done, struct svc_req *, req,
3762             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3763             READDIRPLUS3res *, resp);
3764 
3765         if (vp != NULL) {
3766                 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
3767                 VN_RELE(vp);
3768         }
3769 
3770         if (namlen != NULL)
3771                 kmem_free(namlen, args->dircount);
3772 
3773         vattr_to_post_op_attr(vap, &resp->resfail.dir_attributes);
3774 }
3775 
3776 void *
3777 rfs3_readdirplus_getfh(READDIRPLUS3args *args)
3778 {
3779 
3780         return (&args->dir);
3781 }
3782 
3783 void
3784 rfs3_readdirplus_free(READDIRPLUS3res *resp)
3785 {
3786 
3787         if (resp->status == NFS3_OK) {
3788                 kmem_free(resp->resok.reply.entries, resp->resok.count);
3789                 kmem_free(resp->resok.infop,
3790                     resp->resok.size * sizeof (struct entryplus3_info));
3791         }
3792 }
3793 
3794 /* ARGSUSED */
3795 void
3796 rfs3_fsstat(FSSTAT3args *args, FSSTAT3res *resp, struct exportinfo *exi,
3797     struct svc_req *req, cred_t *cr, bool_t ro)
3798 {
3799         int error;
3800         vnode_t *vp;
3801         struct vattr *vap;
3802         struct vattr va;
3803         struct statvfs64 sb;
3804 
3805         vap = NULL;
3806 
3807         vp = nfs3_fhtovp(&args->fsroot, exi);
3808 
3809         DTRACE_NFSV3_5(op__fsstat__start, struct svc_req *, req,
3810             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3811             FSSTAT3args *, args);
3812 
3813         if (vp == NULL) {
3814                 error = ESTALE;
3815                 goto out;
3816         }
3817 
3818         if (is_system_labeled()) {
3819                 bslabel_t *clabel = req->rq_label;
3820 
3821                 ASSERT(clabel != NULL);
3822                 DTRACE_PROBE2(tx__rfs3__log__info__opfsstat__clabel, char *,
3823                     "got client label from request(1)", struct svc_req *, req);
3824 
3825                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3826                         if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3827                             exi)) {
3828                                 resp->status = NFS3ERR_ACCES;
3829                                 goto out1;
3830                         }
3831                 }
3832         }
3833 
3834         error = VFS_STATVFS(vp->v_vfsp, &sb);
3835 
3836         va.va_mask = AT_ALL;
3837         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3838 
3839         if (error)
3840                 goto out;
3841 
3842         resp->status = NFS3_OK;
3843         vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
3844         if (sb.f_blocks != (fsblkcnt64_t)-1)
3845                 resp->resok.tbytes = (size3)sb.f_frsize * (size3)sb.f_blocks;
3846         else
3847                 resp->resok.tbytes = (size3)sb.f_blocks;
3848         if (sb.f_bfree != (fsblkcnt64_t)-1)
3849                 resp->resok.fbytes = (size3)sb.f_frsize * (size3)sb.f_bfree;
3850         else
3851                 resp->resok.fbytes = (size3)sb.f_bfree;
3852         if (sb.f_bavail != (fsblkcnt64_t)-1)
3853                 resp->resok.abytes = (size3)sb.f_frsize * (size3)sb.f_bavail;
3854         else
3855                 resp->resok.abytes = (size3)sb.f_bavail;
3856         resp->resok.tfiles = (size3)sb.f_files;
3857         resp->resok.ffiles = (size3)sb.f_ffree;
3858         resp->resok.afiles = (size3)sb.f_favail;
3859         resp->resok.invarsec = 0;
3860 
3861         DTRACE_NFSV3_5(op__fsstat__done, struct svc_req *, req,
3862             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3863             FSSTAT3res *, resp);
3864         VN_RELE(vp);
3865 
3866         return;
3867 
3868 out:
3869         if (curthread->t_flag & T_WOULDBLOCK) {
3870                 curthread->t_flag &= ~T_WOULDBLOCK;
3871                 resp->status = NFS3ERR_JUKEBOX;
3872         } else
3873                 resp->status = puterrno3(error);
3874 out1:
3875         DTRACE_NFSV3_5(op__fsstat__done, struct svc_req *, req,
3876             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3877             FSSTAT3res *, resp);
3878 
3879         if (vp != NULL)
3880                 VN_RELE(vp);
3881         vattr_to_post_op_attr(vap, &resp->resfail.obj_attributes);
3882 }
3883 
3884 void *
3885 rfs3_fsstat_getfh(FSSTAT3args *args)
3886 {
3887 
3888         return (&args->fsroot);
3889 }
3890 
3891 /* ARGSUSED */
3892 void
3893 rfs3_fsinfo(FSINFO3args *args, FSINFO3res *resp, struct exportinfo *exi,
3894     struct svc_req *req, cred_t *cr, bool_t ro)
3895 {
3896         vnode_t *vp;
3897         struct vattr *vap;
3898         struct vattr va;
3899         uint32_t xfer_size;
3900         ulong_t l = 0;
3901         int error;
3902 
3903         vp = nfs3_fhtovp(&args->fsroot, exi);
3904 
3905         DTRACE_NFSV3_5(op__fsinfo__start, struct svc_req *, req,
3906             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3907             FSINFO3args *, args);
3908 
3909         if (vp == NULL) {
3910                 if (curthread->t_flag & T_WOULDBLOCK) {
3911                         curthread->t_flag &= ~T_WOULDBLOCK;
3912                         resp->status = NFS3ERR_JUKEBOX;
3913                 } else
3914                         resp->status = NFS3ERR_STALE;
3915                 vattr_to_post_op_attr(NULL, &resp->resfail.obj_attributes);
3916                 goto out;
3917         }
3918 
3919         if (is_system_labeled()) {
3920                 bslabel_t *clabel = req->rq_label;
3921 
3922                 ASSERT(clabel != NULL);
3923                 DTRACE_PROBE2(tx__rfs3__log__info__opfsinfo__clabel, char *,
3924                     "got client label from request(1)", struct svc_req *, req);
3925 
3926                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3927                         if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3928                             exi)) {
3929                                 resp->status = NFS3ERR_STALE;
3930                                 vattr_to_post_op_attr(NULL,
3931                                     &resp->resfail.obj_attributes);
3932                                 goto out;
3933                         }
3934                 }
3935         }
3936 
3937         va.va_mask = AT_ALL;
3938         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3939 
3940         resp->status = NFS3_OK;
3941         vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
3942         xfer_size = rfs3_tsize(req);
3943         resp->resok.rtmax = xfer_size;
3944         resp->resok.rtpref = xfer_size;
3945         resp->resok.rtmult = DEV_BSIZE;
3946         resp->resok.wtmax = xfer_size;
3947         resp->resok.wtpref = xfer_size;
3948         resp->resok.wtmult = DEV_BSIZE;
3949         resp->resok.dtpref = MAXBSIZE;
3950 
3951         /*
3952          * Large file spec: want maxfilesize based on limit of
3953          * underlying filesystem.  We can guess 2^31-1 if need be.
3954          */
3955         error = VOP_PATHCONF(vp, _PC_FILESIZEBITS, &l, cr, NULL);
3956         if (error) {
3957                 resp->status = puterrno3(error);
3958                 goto out;
3959         }
3960 
3961         /*
3962          * If the underlying file system does not support _PC_FILESIZEBITS,
3963          * return a reasonable default. Note that error code on VOP_PATHCONF
3964          * will be 0, even if the underlying file system does not support
3965          * _PC_FILESIZEBITS.
3966          */
3967         if (l == (ulong_t)-1) {
3968                 resp->resok.maxfilesize = MAXOFF32_T;
3969         } else {
3970                 if (l >= (sizeof (uint64_t) * 8))
3971                         resp->resok.maxfilesize = INT64_MAX;
3972                 else
3973                         resp->resok.maxfilesize = (1LL << (l-1)) - 1;
3974         }
3975 
3976         resp->resok.time_delta.seconds = 0;
3977         resp->resok.time_delta.nseconds = 1000;
3978         resp->resok.properties = FSF3_LINK | FSF3_SYMLINK |
3979             FSF3_HOMOGENEOUS | FSF3_CANSETTIME;
3980 
3981         DTRACE_NFSV3_5(op__fsinfo__done, struct svc_req *, req,
3982             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3983             FSINFO3res *, resp);
3984 
3985         VN_RELE(vp);
3986 
3987         return;
3988 
3989 out:
3990         DTRACE_NFSV3_5(op__fsinfo__done, struct svc_req *, req,
3991             cred_t *, cr, vnode_t *, NULL, struct exportinfo *, exi,
3992             FSINFO3res *, resp);
3993         if (vp != NULL)
3994                 VN_RELE(vp);
3995 }
3996 
3997 void *
3998 rfs3_fsinfo_getfh(FSINFO3args *args)
3999 {
4000         return (&args->fsroot);
4001 }
4002 
4003 /* ARGSUSED */
4004 void
4005 rfs3_pathconf(PATHCONF3args *args, PATHCONF3res *resp, struct exportinfo *exi,
4006     struct svc_req *req, cred_t *cr, bool_t ro)
4007 {
4008         int error;
4009         vnode_t *vp;
4010         struct vattr *vap;
4011         struct vattr va;
4012         ulong_t val;
4013 
4014         vap = NULL;
4015 
4016         vp = nfs3_fhtovp(&args->object, exi);
4017 
4018         DTRACE_NFSV3_5(op__pathconf__start, struct svc_req *, req,
4019             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
4020             PATHCONF3args *, args);
4021 
4022         if (vp == NULL) {
4023                 error = ESTALE;
4024                 goto out;
4025         }
4026 
4027         if (is_system_labeled()) {
4028                 bslabel_t *clabel = req->rq_label;
4029 
4030                 ASSERT(clabel != NULL);
4031                 DTRACE_PROBE2(tx__rfs3__log__info__oppathconf__clabel, char *,
4032                     "got client label from request(1)", struct svc_req *, req);
4033 
4034                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
4035                         if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
4036                             exi)) {
4037                                 resp->status = NFS3ERR_ACCES;
4038                                 goto out1;
4039                         }
4040                 }
4041         }
4042 
4043         va.va_mask = AT_ALL;
4044         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
4045 
4046         error = VOP_PATHCONF(vp, _PC_LINK_MAX, &val, cr, NULL);
4047         if (error)
4048                 goto out;
4049         resp->resok.info.link_max = (uint32)val;
4050 
4051         error = VOP_PATHCONF(vp, _PC_NAME_MAX, &val, cr, NULL);
4052         if (error)
4053                 goto out;
4054         resp->resok.info.name_max = (uint32)val;
4055 
4056         error = VOP_PATHCONF(vp, _PC_NO_TRUNC, &val, cr, NULL);
4057         if (error)
4058                 goto out;
4059         if (val == 1)
4060                 resp->resok.info.no_trunc = TRUE;
4061         else
4062                 resp->resok.info.no_trunc = FALSE;
4063 
4064         error = VOP_PATHCONF(vp, _PC_CHOWN_RESTRICTED, &val, cr, NULL);
4065         if (error)
4066                 goto out;
4067         if (val == 1)
4068                 resp->resok.info.chown_restricted = TRUE;
4069         else
4070                 resp->resok.info.chown_restricted = FALSE;
4071 
4072         resp->status = NFS3_OK;
4073         vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
4074         resp->resok.info.case_insensitive = FALSE;
4075         resp->resok.info.case_preserving = TRUE;
4076         DTRACE_NFSV3_5(op__pathconf__done, struct svc_req *, req,
4077             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
4078             PATHCONF3res *, resp);
4079         VN_RELE(vp);
4080         return;
4081 
4082 out:
4083         if (curthread->t_flag & T_WOULDBLOCK) {
4084                 curthread->t_flag &= ~T_WOULDBLOCK;
4085                 resp->status = NFS3ERR_JUKEBOX;
4086         } else
4087                 resp->status = puterrno3(error);
4088 out1:
4089         DTRACE_NFSV3_5(op__pathconf__done, struct svc_req *, req,
4090             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
4091             PATHCONF3res *, resp);
4092         if (vp != NULL)
4093                 VN_RELE(vp);
4094         vattr_to_post_op_attr(vap, &resp->resfail.obj_attributes);
4095 }
4096 
4097 void *
4098 rfs3_pathconf_getfh(PATHCONF3args *args)
4099 {
4100 
4101         return (&args->object);
4102 }
4103 
4104 void
4105 rfs3_commit(COMMIT3args *args, COMMIT3res *resp, struct exportinfo *exi,
4106     struct svc_req *req, cred_t *cr, bool_t ro)
4107 {
4108         nfs3_srv_t *ns;
4109         int error;
4110         vnode_t *vp;
4111         struct vattr *bvap;
4112         struct vattr bva;
4113         struct vattr *avap;
4114         struct vattr ava;
4115 
4116         bvap = NULL;
4117         avap = NULL;
4118 
4119         vp = nfs3_fhtovp(&args->file, exi);
4120 
4121         DTRACE_NFSV3_5(op__commit__start, struct svc_req *, req,
4122             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
4123             COMMIT3args *, args);
4124 
4125         if (vp == NULL) {
4126                 error = ESTALE;
4127                 goto out;
4128         }
4129 
4130         ASSERT3P(curzone, ==, exi->exi_zone); /* exi is guaranteed non-NULL. */
4131         ns = zone_getspecific(rfs3_zone_key, curzone);
4132         bva.va_mask = AT_ALL;
4133         error = VOP_GETATTR(vp, &bva, 0, cr, NULL);
4134 
4135         /*
4136          * If we can't get the attributes, then we can't do the
4137          * right access checking.  So, we'll fail the request.
4138          */
4139         if (error)
4140                 goto out;
4141 
4142         bvap = &bva;
4143 
4144         if (rdonly(ro, vp)) {
4145                 resp->status = NFS3ERR_ROFS;
4146                 goto out1;
4147         }
4148 
4149         if (vp->v_type != VREG) {
4150                 resp->status = NFS3ERR_INVAL;
4151                 goto out1;
4152         }
4153 
4154         if (is_system_labeled()) {
4155                 bslabel_t *clabel = req->rq_label;
4156 
4157                 ASSERT(clabel != NULL);
4158                 DTRACE_PROBE2(tx__rfs3__log__info__opcommit__clabel, char *,
4159                     "got client label from request(1)", struct svc_req *, req);
4160 
4161                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
4162                         if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
4163                             exi)) {
4164                                 resp->status = NFS3ERR_ACCES;
4165                                 goto out1;
4166                         }
4167                 }
4168         }
4169 
4170         if (crgetuid(cr) != bva.va_uid &&
4171             (error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL)))
4172                 goto out;
4173 
4174         error = VOP_FSYNC(vp, FSYNC, cr, NULL);
4175 
4176         ava.va_mask = AT_ALL;
4177         avap = VOP_GETATTR(vp, &ava, 0, cr, NULL) ? NULL : &ava;
4178 
4179         if (error)
4180                 goto out;
4181 
4182         resp->status = NFS3_OK;
4183         vattr_to_wcc_data(bvap, avap, &resp->resok.file_wcc);
4184         resp->resok.verf = ns->write3verf;
4185 
4186         DTRACE_NFSV3_5(op__commit__done, struct svc_req *, req,
4187             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
4188             COMMIT3res *, resp);
4189 
4190         VN_RELE(vp);
4191 
4192         return;
4193 
4194 out:
4195         if (curthread->t_flag & T_WOULDBLOCK) {
4196                 curthread->t_flag &= ~T_WOULDBLOCK;
4197                 resp->status = NFS3ERR_JUKEBOX;
4198         } else
4199                 resp->status = puterrno3(error);
4200 out1:
4201         DTRACE_NFSV3_5(op__commit__done, struct svc_req *, req,
4202             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
4203             COMMIT3res *, resp);
4204 
4205         if (vp != NULL)
4206                 VN_RELE(vp);
4207         vattr_to_wcc_data(bvap, avap, &resp->resfail.file_wcc);
4208 }
4209 
4210 void *
4211 rfs3_commit_getfh(COMMIT3args *args)
4212 {
4213 
4214         return (&args->file);
4215 }
4216 
4217 static int
4218 sattr3_to_vattr(sattr3 *sap, struct vattr *vap)
4219 {
4220 
4221         vap->va_mask = 0;
4222 
4223         if (sap->mode.set_it) {
4224                 vap->va_mode = (mode_t)sap->mode.mode;
4225                 vap->va_mask |= AT_MODE;
4226         }
4227         if (sap->uid.set_it) {
4228                 vap->va_uid = (uid_t)sap->uid.uid;
4229                 vap->va_mask |= AT_UID;
4230         }
4231         if (sap->gid.set_it) {
4232                 vap->va_gid = (gid_t)sap->gid.gid;
4233                 vap->va_mask |= AT_GID;
4234         }
4235         if (sap->size.set_it) {
4236                 if (sap->size.size > (size3)((u_longlong_t)-1))
4237                         return (EINVAL);
4238                 vap->va_size = sap->size.size;
4239                 vap->va_mask |= AT_SIZE;
4240         }
4241         if (sap->atime.set_it == SET_TO_CLIENT_TIME) {
4242 #ifndef _LP64
4243                 /* check time validity */
4244                 if (!NFS3_TIME_OK(sap->atime.atime.seconds))
4245                         return (EOVERFLOW);
4246 #endif
4247                 /*
4248                  * nfs protocol defines times as unsigned so don't extend sign,
4249                  * unless sysadmin set nfs_allow_preepoch_time.
4250                  */
4251                 NFS_TIME_T_CONVERT(vap->va_atime.tv_sec,
4252                     sap->atime.atime.seconds);
4253                 vap->va_atime.tv_nsec = (uint32_t)sap->atime.atime.nseconds;
4254                 vap->va_mask |= AT_ATIME;
4255         } else if (sap->atime.set_it == SET_TO_SERVER_TIME) {
4256                 gethrestime(&vap->va_atime);
4257                 vap->va_mask |= AT_ATIME;
4258         }
4259         if (sap->mtime.set_it == SET_TO_CLIENT_TIME) {
4260 #ifndef _LP64
4261                 /* check time validity */
4262                 if (!NFS3_TIME_OK(sap->mtime.mtime.seconds))
4263                         return (EOVERFLOW);
4264 #endif
4265                 /*
4266                  * nfs protocol defines times as unsigned so don't extend sign,
4267                  * unless sysadmin set nfs_allow_preepoch_time.
4268                  */
4269                 NFS_TIME_T_CONVERT(vap->va_mtime.tv_sec,
4270                     sap->mtime.mtime.seconds);
4271                 vap->va_mtime.tv_nsec = (uint32_t)sap->mtime.mtime.nseconds;
4272                 vap->va_mask |= AT_MTIME;
4273         } else if (sap->mtime.set_it == SET_TO_SERVER_TIME) {
4274                 gethrestime(&vap->va_mtime);
4275                 vap->va_mask |= AT_MTIME;
4276         }
4277 
4278         return (0);
4279 }
4280 
4281 static const ftype3 vt_to_nf3[] = {
4282         0, NF3REG, NF3DIR, NF3BLK, NF3CHR, NF3LNK, NF3FIFO, 0, 0, NF3SOCK, 0
4283 };
4284 
4285 static int
4286 vattr_to_fattr3(struct vattr *vap, fattr3 *fap)
4287 {
4288 
4289         ASSERT(vap->va_type >= VNON && vap->va_type <= VBAD);
4290         /* Return error if time or size overflow */
4291         if (! (NFS_VAP_TIME_OK(vap) && NFS3_SIZE_OK(vap->va_size))) {
4292                 return (EOVERFLOW);
4293         }
4294         fap->type = vt_to_nf3[vap->va_type];
4295         fap->mode = (mode3)(vap->va_mode & MODEMASK);
4296         fap->nlink = (uint32)vap->va_nlink;
4297         if (vap->va_uid == UID_NOBODY)
4298                 fap->uid = (uid3)NFS_UID_NOBODY;
4299         else
4300                 fap->uid = (uid3)vap->va_uid;
4301         if (vap->va_gid == GID_NOBODY)
4302                 fap->gid = (gid3)NFS_GID_NOBODY;
4303         else
4304                 fap->gid = (gid3)vap->va_gid;
4305         fap->size = (size3)vap->va_size;
4306         fap->used = (size3)DEV_BSIZE * (size3)vap->va_nblocks;
4307         fap->rdev.specdata1 = (uint32)getmajor(vap->va_rdev);
4308         fap->rdev.specdata2 = (uint32)getminor(vap->va_rdev);
4309         fap->fsid = (uint64)vap->va_fsid;
4310         fap->fileid = (fileid3)vap->va_nodeid;
4311         fap->atime.seconds = vap->va_atime.tv_sec;
4312         fap->atime.nseconds = vap->va_atime.tv_nsec;
4313         fap->mtime.seconds = vap->va_mtime.tv_sec;
4314         fap->mtime.nseconds = vap->va_mtime.tv_nsec;
4315         fap->ctime.seconds = vap->va_ctime.tv_sec;
4316         fap->ctime.nseconds = vap->va_ctime.tv_nsec;
4317         return (0);
4318 }
4319 
4320 static int
4321 vattr_to_wcc_attr(struct vattr *vap, wcc_attr *wccap)
4322 {
4323 
4324         /* Return error if time or size overflow */
4325         if (!(NFS_TIME_T_OK(vap->va_mtime.tv_sec) &&
4326             NFS_TIME_T_OK(vap->va_ctime.tv_sec) &&
4327             NFS3_SIZE_OK(vap->va_size))) {
4328                 return (EOVERFLOW);
4329         }
4330         wccap->size = (size3)vap->va_size;
4331         wccap->mtime.seconds = vap->va_mtime.tv_sec;
4332         wccap->mtime.nseconds = vap->va_mtime.tv_nsec;
4333         wccap->ctime.seconds = vap->va_ctime.tv_sec;
4334         wccap->ctime.nseconds = vap->va_ctime.tv_nsec;
4335         return (0);
4336 }
4337 
4338 static void
4339 vattr_to_pre_op_attr(struct vattr *vap, pre_op_attr *poap)
4340 {
4341 
4342         /* don't return attrs if time overflow */
4343         if ((vap != NULL) && !vattr_to_wcc_attr(vap, &poap->attr)) {
4344                 poap->attributes = TRUE;
4345         } else
4346                 poap->attributes = FALSE;
4347 }
4348 
4349 void
4350 vattr_to_post_op_attr(struct vattr *vap, post_op_attr *poap)
4351 {
4352 
4353         /* don't return attrs if time overflow */
4354         if ((vap != NULL) && !vattr_to_fattr3(vap, &poap->attr)) {
4355                 poap->attributes = TRUE;
4356         } else
4357                 poap->attributes = FALSE;
4358 }
4359 
4360 static void
4361 vattr_to_wcc_data(struct vattr *bvap, struct vattr *avap, wcc_data *wccp)
4362 {
4363         vattr_to_pre_op_attr(bvap, &wccp->before);
4364         vattr_to_post_op_attr(avap, &wccp->after);
4365 }
4366 
4367 static int
4368 rdma_setup_read_data3(READ3args *args, READ3resok *rok)
4369 {
4370         struct clist    *wcl;
4371         int             wlist_len;
4372         count3          count = rok->count;
4373 
4374         wcl = args->wlist;
4375         if (rdma_setup_read_chunks(wcl, count, &wlist_len) == FALSE)
4376                 return (FALSE);
4377 
4378         wcl = args->wlist;
4379         rok->wlist_len = wlist_len;
4380         rok->wlist = wcl;
4381         return (TRUE);
4382 }
4383 
4384 /* ARGSUSED */
4385 static void *
4386 rfs3_zone_init(zoneid_t zoneid)
4387 {
4388         nfs3_srv_t *ns;
4389         struct rfs3_verf_overlay {
4390                 uint_t id; /* a "unique" identifier */
4391                 int ts; /* a unique timestamp */
4392         } *verfp;
4393         timestruc_t now;
4394 
4395         ns = kmem_zalloc(sizeof (*ns), KM_SLEEP);
4396 
4397         /*
4398          * The following algorithm attempts to find a unique verifier
4399          * to be used as the write verifier returned from the server
4400          * to the client.  It is important that this verifier change
4401          * whenever the server reboots.  Of secondary importance, it
4402          * is important for the verifier to be unique between two
4403          * different servers.
4404          *
4405          * Thus, an attempt is made to use the system hostid and the
4406          * current time in seconds when the nfssrv kernel module is
4407          * loaded.  It is assumed that an NFS server will not be able
4408          * to boot and then to reboot in less than a second.  If the
4409          * hostid has not been set, then the current high resolution
4410          * time is used.  This will ensure different verifiers each
4411          * time the server reboots and minimize the chances that two
4412          * different servers will have the same verifier.
4413          */
4414 
4415 #ifndef lint
4416         /*
4417          * We ASSERT that this constant logic expression is
4418          * always true because in the past, it wasn't.
4419          */
4420         ASSERT(sizeof (*verfp) <= sizeof (ns->write3verf));
4421 #endif
4422 
4423         gethrestime(&now);
4424         verfp = (struct rfs3_verf_overlay *)&ns->write3verf;
4425         verfp->ts = (int)now.tv_sec;
4426         verfp->id = zone_get_hostid(NULL);
4427 
4428         if (verfp->id == 0)
4429                 verfp->id = (uint_t)now.tv_nsec;
4430 
4431         return (ns);
4432 }
4433 
4434 /* ARGSUSED */
4435 static void
4436 rfs3_zone_fini(zoneid_t zoneid, void *data)
4437 {
4438         nfs3_srv_t *ns = data;
4439 
4440         kmem_free(ns, sizeof (*ns));
4441 }
4442 
4443 void
4444 rfs3_srvrinit(void)
4445 {
4446         nfs3_srv_caller_id = fs_new_caller_id();
4447         zone_key_create(&rfs3_zone_key, rfs3_zone_init, NULL, rfs3_zone_fini);
4448 }
4449 
4450 void
4451 rfs3_srvrfini(void)
4452 {
4453         /* Nothing to do */
4454 }