1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright 2018 Nexenta Systems, Inc.
  24  * Copyright (c) 1994, 2010, Oracle and/or its affiliates. All rights reserved.
  25  * Copyright (c) 2013 by Delphix. All rights reserved.
  26  */
  27 
  28 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
  29 /* All Rights Reserved */
  30 
  31 
  32 #include <sys/param.h>
  33 #include <sys/types.h>
  34 #include <sys/systm.h>
  35 #include <sys/cred.h>
  36 #include <sys/buf.h>
  37 #include <sys/vfs.h>
  38 #include <sys/vnode.h>
  39 #include <sys/uio.h>
  40 #include <sys/errno.h>
  41 #include <sys/sysmacros.h>
  42 #include <sys/statvfs.h>
  43 #include <sys/kmem.h>
  44 #include <sys/dirent.h>
  45 #include <sys/cmn_err.h>
  46 #include <sys/debug.h>
  47 #include <sys/systeminfo.h>
  48 #include <sys/flock.h>
  49 #include <sys/nbmlock.h>
  50 #include <sys/policy.h>
  51 #include <sys/sdt.h>
  52 
  53 #include <rpc/types.h>
  54 #include <rpc/auth.h>
  55 #include <rpc/svc.h>
  56 #include <rpc/rpc_rdma.h>
  57 
  58 #include <nfs/nfs.h>
  59 #include <nfs/export.h>
  60 #include <nfs/nfs_cmd.h>
  61 
  62 #include <sys/strsubr.h>
  63 #include <sys/tsol/label.h>
  64 #include <sys/tsol/tndb.h>
  65 
  66 #include <sys/zone.h>
  67 
  68 #include <inet/ip.h>
  69 #include <inet/ip6.h>
  70 
  71 /*
  72  * Zone global variables of NFSv3 server
  73  */
  74 typedef struct nfs3_srv {
  75         writeverf3      write3verf;
  76 } nfs3_srv_t;
  77 
  78 /*
  79  * These are the interface routines for the server side of the
  80  * Network File System.  See the NFS version 3 protocol specification
  81  * for a description of this interface.
  82  */
  83 
  84 static int      sattr3_to_vattr(sattr3 *, struct vattr *);
  85 static int      vattr_to_fattr3(struct vattr *, fattr3 *);
  86 static int      vattr_to_wcc_attr(struct vattr *, wcc_attr *);
  87 static void     vattr_to_pre_op_attr(struct vattr *, pre_op_attr *);
  88 static void     vattr_to_wcc_data(struct vattr *, struct vattr *, wcc_data *);
  89 static int      rdma_setup_read_data3(READ3args *, READ3resok *);
  90 
  91 extern int nfs_loaned_buffers;
  92 
  93 u_longlong_t nfs3_srv_caller_id;
  94 static zone_key_t rfs3_zone_key;
  95 
  96 /* ARGSUSED */
  97 void
  98 rfs3_getattr(GETATTR3args *args, GETATTR3res *resp, struct exportinfo *exi,
  99     struct svc_req *req, cred_t *cr, bool_t ro)
 100 {
 101         int error;
 102         vnode_t *vp;
 103         struct vattr va;
 104 
 105         vp = nfs3_fhtovp(&args->object, exi);
 106 
 107         DTRACE_NFSV3_5(op__getattr__start, struct svc_req *, req,
 108             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
 109             GETATTR3args *, args);
 110 
 111         if (vp == NULL) {
 112                 error = ESTALE;
 113                 goto out;
 114         }
 115 
 116         va.va_mask = AT_ALL;
 117         error = rfs4_delegated_getattr(vp, &va, 0, cr);
 118 
 119         if (!error) {
 120                 /* Lie about the object type for a referral */
 121                 if (vn_is_nfs_reparse(vp, cr))
 122                         va.va_type = VLNK;
 123 
 124                 /* overflow error if time or size is out of range */
 125                 error = vattr_to_fattr3(&va, &resp->resok.obj_attributes);
 126                 if (error)
 127                         goto out;
 128                 resp->status = NFS3_OK;
 129 
 130                 DTRACE_NFSV3_5(op__getattr__done, struct svc_req *, req,
 131                     cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
 132                     GETATTR3res *, resp);
 133 
 134                 VN_RELE(vp);
 135 
 136                 return;
 137         }
 138 
 139 out:
 140         if (curthread->t_flag & T_WOULDBLOCK) {
 141                 curthread->t_flag &= ~T_WOULDBLOCK;
 142                 resp->status = NFS3ERR_JUKEBOX;
 143         } else
 144                 resp->status = puterrno3(error);
 145 
 146         DTRACE_NFSV3_5(op__getattr__done, struct svc_req *, req,
 147             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
 148             GETATTR3res *, resp);
 149 
 150         if (vp != NULL)
 151                 VN_RELE(vp);
 152 }
 153 
 154 void *
 155 rfs3_getattr_getfh(GETATTR3args *args)
 156 {
 157 
 158         return (&args->object);
 159 }
 160 
 161 void
 162 rfs3_setattr(SETATTR3args *args, SETATTR3res *resp, struct exportinfo *exi,
 163     struct svc_req *req, cred_t *cr, bool_t ro)
 164 {
 165         int error;
 166         vnode_t *vp;
 167         struct vattr *bvap;
 168         struct vattr bva;
 169         struct vattr *avap;
 170         struct vattr ava;
 171         int flag;
 172         int in_crit = 0;
 173         struct flock64 bf;
 174         caller_context_t ct;
 175 
 176         bvap = NULL;
 177         avap = NULL;
 178 
 179         vp = nfs3_fhtovp(&args->object, exi);
 180 
 181         DTRACE_NFSV3_5(op__setattr__start, struct svc_req *, req,
 182             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
 183             SETATTR3args *, args);
 184 
 185         if (vp == NULL) {
 186                 error = ESTALE;
 187                 goto out;
 188         }
 189 
 190         error = sattr3_to_vattr(&args->new_attributes, &ava);
 191         if (error)
 192                 goto out;
 193 
 194         if (is_system_labeled()) {
 195                 bslabel_t *clabel = req->rq_label;
 196 
 197                 ASSERT(clabel != NULL);
 198                 DTRACE_PROBE2(tx__rfs3__log__info__opsetattr__clabel, char *,
 199                     "got client label from request(1)", struct svc_req *, req);
 200 
 201                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
 202                         if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
 203                             exi)) {
 204                                 resp->status = NFS3ERR_ACCES;
 205                                 goto out1;
 206                         }
 207                 }
 208         }
 209 
 210         /*
 211          * We need to specially handle size changes because of
 212          * possible conflicting NBMAND locks. Get into critical
 213          * region before VOP_GETATTR, so the size attribute is
 214          * valid when checking conflicts.
 215          *
 216          * Also, check to see if the v4 side of the server has
 217          * delegated this file.  If so, then we return JUKEBOX to
 218          * allow the client to retrasmit its request.
 219          */
 220         if (vp->v_type == VREG && (ava.va_mask & AT_SIZE)) {
 221                 if (nbl_need_check(vp)) {
 222                         nbl_start_crit(vp, RW_READER);
 223                         in_crit = 1;
 224                 }
 225         }
 226 
 227         bva.va_mask = AT_ALL;
 228         error = rfs4_delegated_getattr(vp, &bva, 0, cr);
 229 
 230         /*
 231          * If we can't get the attributes, then we can't do the
 232          * right access checking.  So, we'll fail the request.
 233          */
 234         if (error)
 235                 goto out;
 236 
 237         bvap = &bva;
 238 
 239         if (rdonly(ro, vp)) {
 240                 resp->status = NFS3ERR_ROFS;
 241                 goto out1;
 242         }
 243 
 244         if (args->guard.check &&
 245             (args->guard.obj_ctime.seconds != bva.va_ctime.tv_sec ||
 246             args->guard.obj_ctime.nseconds != bva.va_ctime.tv_nsec)) {
 247                 resp->status = NFS3ERR_NOT_SYNC;
 248                 goto out1;
 249         }
 250 
 251         if (args->new_attributes.mtime.set_it == SET_TO_CLIENT_TIME)
 252                 flag = ATTR_UTIME;
 253         else
 254                 flag = 0;
 255 
 256         /*
 257          * If the filesystem is exported with nosuid, then mask off
 258          * the setuid and setgid bits.
 259          */
 260         if ((ava.va_mask & AT_MODE) && vp->v_type == VREG &&
 261             (exi->exi_export.ex_flags & EX_NOSUID))
 262                 ava.va_mode &= ~(VSUID | VSGID);
 263 
 264         ct.cc_sysid = 0;
 265         ct.cc_pid = 0;
 266         ct.cc_caller_id = nfs3_srv_caller_id;
 267         ct.cc_flags = CC_DONTBLOCK;
 268 
 269         /*
 270          * We need to specially handle size changes because it is
 271          * possible for the client to create a file with modes
 272          * which indicate read-only, but with the file opened for
 273          * writing.  If the client then tries to set the size of
 274          * the file, then the normal access checking done in
 275          * VOP_SETATTR would prevent the client from doing so,
 276          * although it should be legal for it to do so.  To get
 277          * around this, we do the access checking for ourselves
 278          * and then use VOP_SPACE which doesn't do the access
 279          * checking which VOP_SETATTR does. VOP_SPACE can only
 280          * operate on VREG files, let VOP_SETATTR handle the other
 281          * extremely rare cases.
 282          * Also the client should not be allowed to change the
 283          * size of the file if there is a conflicting non-blocking
 284          * mandatory lock in the region the change.
 285          */
 286         if (vp->v_type == VREG && (ava.va_mask & AT_SIZE)) {
 287                 if (in_crit) {
 288                         u_offset_t offset;
 289                         ssize_t length;
 290 
 291                         if (ava.va_size < bva.va_size) {
 292                                 offset = ava.va_size;
 293                                 length = bva.va_size - ava.va_size;
 294                         } else {
 295                                 offset = bva.va_size;
 296                                 length = ava.va_size - bva.va_size;
 297                         }
 298                         if (nbl_conflict(vp, NBL_WRITE, offset, length, 0,
 299                             NULL)) {
 300                                 error = EACCES;
 301                                 goto out;
 302                         }
 303                 }
 304 
 305                 if (crgetuid(cr) == bva.va_uid && ava.va_size != bva.va_size) {
 306                         ava.va_mask &= ~AT_SIZE;
 307                         bf.l_type = F_WRLCK;
 308                         bf.l_whence = 0;
 309                         bf.l_start = (off64_t)ava.va_size;
 310                         bf.l_len = 0;
 311                         bf.l_sysid = 0;
 312                         bf.l_pid = 0;
 313                         error = VOP_SPACE(vp, F_FREESP, &bf, FWRITE,
 314                             (offset_t)ava.va_size, cr, &ct);
 315                 }
 316         }
 317 
 318         if (!error && ava.va_mask)
 319                 error = VOP_SETATTR(vp, &ava, flag, cr, &ct);
 320 
 321         /* check if a monitor detected a delegation conflict */
 322         if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
 323                 resp->status = NFS3ERR_JUKEBOX;
 324                 goto out1;
 325         }
 326 
 327         ava.va_mask = AT_ALL;
 328         avap = rfs4_delegated_getattr(vp, &ava, 0, cr) ? NULL : &ava;
 329 
 330         /*
 331          * Force modified metadata out to stable storage.
 332          */
 333         (void) VOP_FSYNC(vp, FNODSYNC, cr, &ct);
 334 
 335         if (error)
 336                 goto out;
 337 
 338         if (in_crit)
 339                 nbl_end_crit(vp);
 340 
 341         resp->status = NFS3_OK;
 342         vattr_to_wcc_data(bvap, avap, &resp->resok.obj_wcc);
 343 
 344         DTRACE_NFSV3_5(op__setattr__done, struct svc_req *, req,
 345             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
 346             SETATTR3res *, resp);
 347 
 348         VN_RELE(vp);
 349 
 350         return;
 351 
 352 out:
 353         if (curthread->t_flag & T_WOULDBLOCK) {
 354                 curthread->t_flag &= ~T_WOULDBLOCK;
 355                 resp->status = NFS3ERR_JUKEBOX;
 356         } else
 357                 resp->status = puterrno3(error);
 358 out1:
 359         DTRACE_NFSV3_5(op__setattr__done, struct svc_req *, req,
 360             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
 361             SETATTR3res *, resp);
 362 
 363         if (vp != NULL) {
 364                 if (in_crit)
 365                         nbl_end_crit(vp);
 366                 VN_RELE(vp);
 367         }
 368         vattr_to_wcc_data(bvap, avap, &resp->resfail.obj_wcc);
 369 }
 370 
 371 void *
 372 rfs3_setattr_getfh(SETATTR3args *args)
 373 {
 374 
 375         return (&args->object);
 376 }
 377 
 378 /* ARGSUSED */
 379 void
 380 rfs3_lookup(LOOKUP3args *args, LOOKUP3res *resp, struct exportinfo *exi,
 381     struct svc_req *req, cred_t *cr, bool_t ro)
 382 {
 383         int error;
 384         vnode_t *vp;
 385         vnode_t *dvp;
 386         struct vattr *vap;
 387         struct vattr va;
 388         struct vattr *dvap;
 389         struct vattr dva;
 390         nfs_fh3 *fhp;
 391         struct sec_ol sec = {0, 0};
 392         bool_t publicfh_flag = FALSE, auth_weak = FALSE;
 393         struct sockaddr *ca;
 394         char *name = NULL;
 395 
 396         dvap = NULL;
 397 
 398         if (exi != NULL)
 399                 exi_hold(exi);
 400 
 401         /*
 402          * Allow lookups from the root - the default
 403          * location of the public filehandle.
 404          */
 405         if (exi != NULL && (exi->exi_export.ex_flags & EX_PUBLIC)) {
 406                 dvp = ZONE_ROOTVP();
 407                 VN_HOLD(dvp);
 408 
 409                 DTRACE_NFSV3_5(op__lookup__start, struct svc_req *, req,
 410                     cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
 411                     LOOKUP3args *, args);
 412         } else {
 413                 dvp = nfs3_fhtovp(&args->what.dir, exi);
 414 
 415                 DTRACE_NFSV3_5(op__lookup__start, struct svc_req *, req,
 416                     cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
 417                     LOOKUP3args *, args);
 418 
 419                 if (dvp == NULL) {
 420                         error = ESTALE;
 421                         goto out;
 422                 }
 423         }
 424 
 425         dva.va_mask = AT_ALL;
 426         dvap = VOP_GETATTR(dvp, &dva, 0, cr, NULL) ? NULL : &dva;
 427 
 428         if (args->what.name == nfs3nametoolong) {
 429                 resp->status = NFS3ERR_NAMETOOLONG;
 430                 goto out1;
 431         }
 432 
 433         if (args->what.name == NULL || *(args->what.name) == '\0') {
 434                 resp->status = NFS3ERR_ACCES;
 435                 goto out1;
 436         }
 437 
 438         fhp = &args->what.dir;
 439         if (strcmp(args->what.name, "..") == 0 &&
 440             EQFID(&exi->exi_fid, FH3TOFIDP(fhp))) {
 441                 if ((exi->exi_export.ex_flags & EX_NOHIDE) &&
 442                     (dvp->v_flag & VROOT)) {
 443                         /*
 444                          * special case for ".." and 'nohide'exported root
 445                          */
 446                         if (rfs_climb_crossmnt(&dvp, &exi, cr) != 0) {
 447                                 resp->status = NFS3ERR_ACCES;
 448                                 goto out1;
 449                         }
 450                 } else {
 451                         resp->status = NFS3ERR_NOENT;
 452                         goto out1;
 453                 }
 454         }
 455 
 456         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
 457         name = nfscmd_convname(ca, exi, args->what.name,
 458             NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
 459 
 460         if (name == NULL) {
 461                 resp->status = NFS3ERR_ACCES;
 462                 goto out1;
 463         }
 464 
 465         /*
 466          * If the public filehandle is used then allow
 467          * a multi-component lookup
 468          */
 469         if (PUBLIC_FH3(&args->what.dir)) {
 470                 publicfh_flag = TRUE;
 471 
 472                 exi_rele(exi);
 473 
 474                 error = rfs_publicfh_mclookup(name, dvp, cr, &vp,
 475                     &exi, &sec);
 476 
 477                 /*
 478                  * Since WebNFS may bypass MOUNT, we need to ensure this
 479                  * request didn't come from an unlabeled admin_low client.
 480                  */
 481                 if (is_system_labeled() && error == 0) {
 482                         int             addr_type;
 483                         void            *ipaddr;
 484                         tsol_tpc_t      *tp;
 485 
 486                         if (ca->sa_family == AF_INET) {
 487                                 addr_type = IPV4_VERSION;
 488                                 ipaddr = &((struct sockaddr_in *)ca)->sin_addr;
 489                         } else if (ca->sa_family == AF_INET6) {
 490                                 addr_type = IPV6_VERSION;
 491                                 ipaddr = &((struct sockaddr_in6 *)
 492                                     ca)->sin6_addr;
 493                         }
 494                         tp = find_tpc(ipaddr, addr_type, B_FALSE);
 495                         if (tp == NULL || tp->tpc_tp.tp_doi !=
 496                             l_admin_low->tsl_doi || tp->tpc_tp.host_type !=
 497                             SUN_CIPSO) {
 498                                 VN_RELE(vp);
 499                                 error = EACCES;
 500                         }
 501                         if (tp != NULL)
 502                                 TPC_RELE(tp);
 503                 }
 504         } else {
 505                 error = VOP_LOOKUP(dvp, name, &vp,
 506                     NULL, 0, NULL, cr, NULL, NULL, NULL);
 507         }
 508 
 509         if (name != args->what.name)
 510                 kmem_free(name, MAXPATHLEN + 1);
 511 
 512         if (error == 0 && vn_ismntpt(vp)) {
 513                 error = rfs_cross_mnt(&vp, &exi);
 514                 if (error)
 515                         VN_RELE(vp);
 516         }
 517 
 518         if (is_system_labeled() && error == 0) {
 519                 bslabel_t *clabel = req->rq_label;
 520 
 521                 ASSERT(clabel != NULL);
 522                 DTRACE_PROBE2(tx__rfs3__log__info__oplookup__clabel, char *,
 523                     "got client label from request(1)", struct svc_req *, req);
 524 
 525                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
 526                         if (!do_rfs_label_check(clabel, dvp,
 527                             DOMINANCE_CHECK, exi)) {
 528                                 VN_RELE(vp);
 529                                 error = EACCES;
 530                         }
 531                 }
 532         }
 533 
 534         dva.va_mask = AT_ALL;
 535         dvap = VOP_GETATTR(dvp, &dva, 0, cr, NULL) ? NULL : &dva;
 536 
 537         if (error)
 538                 goto out;
 539 
 540         if (sec.sec_flags & SEC_QUERY) {
 541                 error = makefh3_ol(&resp->resok.object, exi, sec.sec_index);
 542         } else {
 543                 error = makefh3(&resp->resok.object, vp, exi);
 544                 if (!error && publicfh_flag && !chk_clnt_sec(exi, req))
 545                         auth_weak = TRUE;
 546         }
 547 
 548         if (error) {
 549                 VN_RELE(vp);
 550                 goto out;
 551         }
 552 
 553         va.va_mask = AT_ALL;
 554         vap = rfs4_delegated_getattr(vp, &va, 0, cr) ? NULL : &va;
 555 
 556         exi_rele(exi);
 557         VN_RELE(vp);
 558 
 559         resp->status = NFS3_OK;
 560         vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
 561         vattr_to_post_op_attr(dvap, &resp->resok.dir_attributes);
 562 
 563         /*
 564          * If it's public fh, no 0x81, and client's flavor is
 565          * invalid, set WebNFS status to WNFSERR_CLNT_FLAVOR now.
 566          * Then set RPC status to AUTH_TOOWEAK in common_dispatch.
 567          */
 568         if (auth_weak)
 569                 resp->status = (enum nfsstat3)WNFSERR_CLNT_FLAVOR;
 570 
 571         DTRACE_NFSV3_5(op__lookup__done, struct svc_req *, req,
 572             cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
 573             LOOKUP3res *, resp);
 574         VN_RELE(dvp);
 575 
 576         return;
 577 
 578 out:
 579         if (curthread->t_flag & T_WOULDBLOCK) {
 580                 curthread->t_flag &= ~T_WOULDBLOCK;
 581                 resp->status = NFS3ERR_JUKEBOX;
 582         } else
 583                 resp->status = puterrno3(error);
 584 out1:
 585         DTRACE_NFSV3_5(op__lookup__done, struct svc_req *, req,
 586             cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
 587             LOOKUP3res *, resp);
 588 
 589         if (exi != NULL)
 590                 exi_rele(exi);
 591 
 592         if (dvp != NULL)
 593                 VN_RELE(dvp);
 594         vattr_to_post_op_attr(dvap, &resp->resfail.dir_attributes);
 595 
 596 }
 597 
 598 void *
 599 rfs3_lookup_getfh(LOOKUP3args *args)
 600 {
 601 
 602         return (&args->what.dir);
 603 }
 604 
 605 /* ARGSUSED */
 606 void
 607 rfs3_access(ACCESS3args *args, ACCESS3res *resp, struct exportinfo *exi,
 608     struct svc_req *req, cred_t *cr, bool_t ro)
 609 {
 610         int error;
 611         vnode_t *vp;
 612         struct vattr *vap;
 613         struct vattr va;
 614         int checkwriteperm;
 615         boolean_t dominant_label = B_FALSE;
 616         boolean_t equal_label = B_FALSE;
 617         boolean_t admin_low_client;
 618 
 619         vap = NULL;
 620 
 621         vp = nfs3_fhtovp(&args->object, exi);
 622 
 623         DTRACE_NFSV3_5(op__access__start, struct svc_req *, req,
 624             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
 625             ACCESS3args *, args);
 626 
 627         if (vp == NULL) {
 628                 error = ESTALE;
 629                 goto out;
 630         }
 631 
 632         /*
 633          * If the file system is exported read only, it is not appropriate
 634          * to check write permissions for regular files and directories.
 635          * Special files are interpreted by the client, so the underlying
 636          * permissions are sent back to the client for interpretation.
 637          */
 638         if (rdonly(ro, vp) && (vp->v_type == VREG || vp->v_type == VDIR))
 639                 checkwriteperm = 0;
 640         else
 641                 checkwriteperm = 1;
 642 
 643         /*
 644          * We need the mode so that we can correctly determine access
 645          * permissions relative to a mandatory lock file.  Access to
 646          * mandatory lock files is denied on the server, so it might
 647          * as well be reflected to the server during the open.
 648          */
 649         va.va_mask = AT_MODE;
 650         error = VOP_GETATTR(vp, &va, 0, cr, NULL);
 651         if (error)
 652                 goto out;
 653 
 654         vap = &va;
 655 
 656         resp->resok.access = 0;
 657 
 658         if (is_system_labeled()) {
 659                 bslabel_t *clabel = req->rq_label;
 660 
 661                 ASSERT(clabel != NULL);
 662                 DTRACE_PROBE2(tx__rfs3__log__info__opaccess__clabel, char *,
 663                     "got client label from request(1)", struct svc_req *, req);
 664 
 665                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
 666                         if ((equal_label = do_rfs_label_check(clabel, vp,
 667                             EQUALITY_CHECK, exi)) == B_FALSE) {
 668                                 dominant_label = do_rfs_label_check(clabel,
 669                                     vp, DOMINANCE_CHECK, exi);
 670                         } else
 671                                 dominant_label = B_TRUE;
 672                         admin_low_client = B_FALSE;
 673                 } else
 674                         admin_low_client = B_TRUE;
 675         }
 676 
 677         if (args->access & ACCESS3_READ) {
 678                 error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
 679                 if (error) {
 680                         if (curthread->t_flag & T_WOULDBLOCK)
 681                                 goto out;
 682                 } else if (!MANDLOCK(vp, va.va_mode) &&
 683                     (!is_system_labeled() || admin_low_client ||
 684                     dominant_label))
 685                         resp->resok.access |= ACCESS3_READ;
 686         }
 687         if ((args->access & ACCESS3_LOOKUP) && vp->v_type == VDIR) {
 688                 error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL);
 689                 if (error) {
 690                         if (curthread->t_flag & T_WOULDBLOCK)
 691                                 goto out;
 692                 } else if (!is_system_labeled() || admin_low_client ||
 693                     dominant_label)
 694                         resp->resok.access |= ACCESS3_LOOKUP;
 695         }
 696         if (checkwriteperm &&
 697             (args->access & (ACCESS3_MODIFY|ACCESS3_EXTEND))) {
 698                 error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
 699                 if (error) {
 700                         if (curthread->t_flag & T_WOULDBLOCK)
 701                                 goto out;
 702                 } else if (!MANDLOCK(vp, va.va_mode) &&
 703                     (!is_system_labeled() || admin_low_client || equal_label)) {
 704                         resp->resok.access |=
 705                             (args->access & (ACCESS3_MODIFY|ACCESS3_EXTEND));
 706                 }
 707         }
 708         if (checkwriteperm &&
 709             (args->access & ACCESS3_DELETE) && vp->v_type == VDIR) {
 710                 error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
 711                 if (error) {
 712                         if (curthread->t_flag & T_WOULDBLOCK)
 713                                 goto out;
 714                 } else if (!is_system_labeled() || admin_low_client ||
 715                     equal_label)
 716                         resp->resok.access |= ACCESS3_DELETE;
 717         }
 718         if (args->access & ACCESS3_EXECUTE) {
 719                 error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL);
 720                 if (error) {
 721                         if (curthread->t_flag & T_WOULDBLOCK)
 722                                 goto out;
 723                 } else if (!MANDLOCK(vp, va.va_mode) &&
 724                     (!is_system_labeled() || admin_low_client ||
 725                     dominant_label))
 726                         resp->resok.access |= ACCESS3_EXECUTE;
 727         }
 728 
 729         va.va_mask = AT_ALL;
 730         vap = rfs4_delegated_getattr(vp, &va, 0, cr) ? NULL : &va;
 731 
 732         resp->status = NFS3_OK;
 733         vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
 734 
 735         DTRACE_NFSV3_5(op__access__done, struct svc_req *, req,
 736             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
 737             ACCESS3res *, resp);
 738 
 739         VN_RELE(vp);
 740 
 741         return;
 742 
 743 out:
 744         if (curthread->t_flag & T_WOULDBLOCK) {
 745                 curthread->t_flag &= ~T_WOULDBLOCK;
 746                 resp->status = NFS3ERR_JUKEBOX;
 747         } else
 748                 resp->status = puterrno3(error);
 749         DTRACE_NFSV3_5(op__access__done, struct svc_req *, req,
 750             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
 751             ACCESS3res *, resp);
 752         if (vp != NULL)
 753                 VN_RELE(vp);
 754         vattr_to_post_op_attr(vap, &resp->resfail.obj_attributes);
 755 }
 756 
 757 void *
 758 rfs3_access_getfh(ACCESS3args *args)
 759 {
 760 
 761         return (&args->object);
 762 }
 763 
 764 /* ARGSUSED */
 765 void
 766 rfs3_readlink(READLINK3args *args, READLINK3res *resp, struct exportinfo *exi,
 767     struct svc_req *req, cred_t *cr, bool_t ro)
 768 {
 769         int error;
 770         vnode_t *vp;
 771         struct vattr *vap;
 772         struct vattr va;
 773         struct iovec iov;
 774         struct uio uio;
 775         char *data;
 776         struct sockaddr *ca;
 777         char *name = NULL;
 778         int is_referral = 0;
 779 
 780         vap = NULL;
 781 
 782         vp = nfs3_fhtovp(&args->symlink, exi);
 783 
 784         DTRACE_NFSV3_5(op__readlink__start, struct svc_req *, req,
 785             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
 786             READLINK3args *, args);
 787 
 788         if (vp == NULL) {
 789                 error = ESTALE;
 790                 goto out;
 791         }
 792 
 793         va.va_mask = AT_ALL;
 794         error = VOP_GETATTR(vp, &va, 0, cr, NULL);
 795         if (error)
 796                 goto out;
 797 
 798         vap = &va;
 799 
 800         /* We lied about the object type for a referral */
 801         if (vn_is_nfs_reparse(vp, cr))
 802                 is_referral = 1;
 803 
 804         if (vp->v_type != VLNK && !is_referral) {
 805                 resp->status = NFS3ERR_INVAL;
 806                 goto out1;
 807         }
 808 
 809         if (MANDLOCK(vp, va.va_mode)) {
 810                 resp->status = NFS3ERR_ACCES;
 811                 goto out1;
 812         }
 813 
 814         if (is_system_labeled()) {
 815                 bslabel_t *clabel = req->rq_label;
 816 
 817                 ASSERT(clabel != NULL);
 818                 DTRACE_PROBE2(tx__rfs3__log__info__opreadlink__clabel, char *,
 819                     "got client label from request(1)", struct svc_req *, req);
 820 
 821                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
 822                         if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
 823                             exi)) {
 824                                 resp->status = NFS3ERR_ACCES;
 825                                 goto out1;
 826                         }
 827                 }
 828         }
 829 
 830         data = kmem_alloc(MAXPATHLEN + 1, KM_SLEEP);
 831 
 832         if (is_referral) {
 833                 char *s;
 834                 size_t strsz;
 835 
 836                 /* Get an artificial symlink based on a referral */
 837                 s = build_symlink(vp, cr, &strsz);
 838                 global_svstat_ptr[3][NFS_REFERLINKS].value.ui64++;
 839                 DTRACE_PROBE2(nfs3serv__func__referral__reflink,
 840                     vnode_t *, vp, char *, s);
 841                 if (s == NULL)
 842                         error = EINVAL;
 843                 else {
 844                         error = 0;
 845                         (void) strlcpy(data, s, MAXPATHLEN + 1);
 846                         kmem_free(s, strsz);
 847                 }
 848 
 849         } else {
 850 
 851                 iov.iov_base = data;
 852                 iov.iov_len = MAXPATHLEN;
 853                 uio.uio_iov = &iov;
 854                 uio.uio_iovcnt = 1;
 855                 uio.uio_segflg = UIO_SYSSPACE;
 856                 uio.uio_extflg = UIO_COPY_CACHED;
 857                 uio.uio_loffset = 0;
 858                 uio.uio_resid = MAXPATHLEN;
 859 
 860                 error = VOP_READLINK(vp, &uio, cr, NULL);
 861 
 862                 if (!error)
 863                         *(data + MAXPATHLEN - uio.uio_resid) = '\0';
 864         }
 865 
 866         va.va_mask = AT_ALL;
 867         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
 868 
 869         /* Lie about object type again just to be consistent */
 870         if (is_referral && vap != NULL)
 871                 vap->va_type = VLNK;
 872 
 873 #if 0 /* notyet */
 874         /*
 875          * Don't do this.  It causes local disk writes when just
 876          * reading the file and the overhead is deemed larger
 877          * than the benefit.
 878          */
 879         /*
 880          * Force modified metadata out to stable storage.
 881          */
 882         (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
 883 #endif
 884 
 885         if (error) {
 886                 kmem_free(data, MAXPATHLEN + 1);
 887                 goto out;
 888         }
 889 
 890         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
 891         name = nfscmd_convname(ca, exi, data, NFSCMD_CONV_OUTBOUND,
 892             MAXPATHLEN + 1);
 893 
 894         if (name == NULL) {
 895                 /*
 896                  * Even though the conversion failed, we return
 897                  * something. We just don't translate it.
 898                  */
 899                 name = data;
 900         }
 901 
 902         resp->status = NFS3_OK;
 903         vattr_to_post_op_attr(vap, &resp->resok.symlink_attributes);
 904         resp->resok.data = name;
 905 
 906         DTRACE_NFSV3_5(op__readlink__done, struct svc_req *, req,
 907             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
 908             READLINK3res *, resp);
 909         VN_RELE(vp);
 910 
 911         if (name != data)
 912                 kmem_free(data, MAXPATHLEN + 1);
 913 
 914         return;
 915 
 916 out:
 917         if (curthread->t_flag & T_WOULDBLOCK) {
 918                 curthread->t_flag &= ~T_WOULDBLOCK;
 919                 resp->status = NFS3ERR_JUKEBOX;
 920         } else
 921                 resp->status = puterrno3(error);
 922 out1:
 923         DTRACE_NFSV3_5(op__readlink__done, struct svc_req *, req,
 924             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
 925             READLINK3res *, resp);
 926         if (vp != NULL)
 927                 VN_RELE(vp);
 928         vattr_to_post_op_attr(vap, &resp->resfail.symlink_attributes);
 929 }
 930 
 931 void *
 932 rfs3_readlink_getfh(READLINK3args *args)
 933 {
 934 
 935         return (&args->symlink);
 936 }
 937 
 938 void
 939 rfs3_readlink_free(READLINK3res *resp)
 940 {
 941 
 942         if (resp->status == NFS3_OK)
 943                 kmem_free(resp->resok.data, MAXPATHLEN + 1);
 944 }
 945 
 946 /*
 947  * Server routine to handle read
 948  * May handle RDMA data as well as mblks
 949  */
 950 /* ARGSUSED */
 951 void
 952 rfs3_read(READ3args *args, READ3res *resp, struct exportinfo *exi,
 953     struct svc_req *req, cred_t *cr, bool_t ro)
 954 {
 955         int error;
 956         vnode_t *vp;
 957         struct vattr *vap;
 958         struct vattr va;
 959         struct iovec iov, *iovp = NULL;
 960         int iovcnt;
 961         struct uio uio;
 962         u_offset_t offset;
 963         mblk_t *mp = NULL;
 964         int in_crit = 0;
 965         int need_rwunlock = 0;
 966         caller_context_t ct;
 967         int rdma_used = 0;
 968         int loaned_buffers;
 969         struct uio *uiop;
 970 
 971         vap = NULL;
 972 
 973         vp = nfs3_fhtovp(&args->file, exi);
 974 
 975         DTRACE_NFSV3_5(op__read__start, struct svc_req *, req,
 976             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
 977             READ3args *, args);
 978 
 979 
 980         if (vp == NULL) {
 981                 error = ESTALE;
 982                 goto out;
 983         }
 984 
 985         if (args->wlist) {
 986                 if (args->count > clist_len(args->wlist)) {
 987                         error = EINVAL;
 988                         goto out;
 989                 }
 990                 rdma_used = 1;
 991         }
 992 
 993         /* use loaned buffers for TCP */
 994         loaned_buffers = (nfs_loaned_buffers && !rdma_used) ? 1 : 0;
 995 
 996         if (is_system_labeled()) {
 997                 bslabel_t *clabel = req->rq_label;
 998 
 999                 ASSERT(clabel != NULL);
1000                 DTRACE_PROBE2(tx__rfs3__log__info__opread__clabel, char *,
1001                     "got client label from request(1)", struct svc_req *, req);
1002 
1003                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
1004                         if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
1005                             exi)) {
1006                                 resp->status = NFS3ERR_ACCES;
1007                                 goto out1;
1008                         }
1009                 }
1010         }
1011 
1012         ct.cc_sysid = 0;
1013         ct.cc_pid = 0;
1014         ct.cc_caller_id = nfs3_srv_caller_id;
1015         ct.cc_flags = CC_DONTBLOCK;
1016 
1017         /*
1018          * Enter the critical region before calling VOP_RWLOCK
1019          * to avoid a deadlock with write requests.
1020          */
1021         if (nbl_need_check(vp)) {
1022                 nbl_start_crit(vp, RW_READER);
1023                 in_crit = 1;
1024                 if (nbl_conflict(vp, NBL_READ, args->offset, args->count, 0,
1025                     NULL)) {
1026                         error = EACCES;
1027                         goto out;
1028                 }
1029         }
1030 
1031         error = VOP_RWLOCK(vp, V_WRITELOCK_FALSE, &ct);
1032 
1033         /* check if a monitor detected a delegation conflict */
1034         if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1035                 resp->status = NFS3ERR_JUKEBOX;
1036                 goto out1;
1037         }
1038 
1039         need_rwunlock = 1;
1040 
1041         va.va_mask = AT_ALL;
1042         error = VOP_GETATTR(vp, &va, 0, cr, &ct);
1043 
1044         /*
1045          * If we can't get the attributes, then we can't do the
1046          * right access checking.  So, we'll fail the request.
1047          */
1048         if (error)
1049                 goto out;
1050 
1051         vap = &va;
1052 
1053         if (vp->v_type != VREG) {
1054                 resp->status = NFS3ERR_INVAL;
1055                 goto out1;
1056         }
1057 
1058         if (crgetuid(cr) != va.va_uid) {
1059                 error = VOP_ACCESS(vp, VREAD, 0, cr, &ct);
1060                 if (error) {
1061                         if (curthread->t_flag & T_WOULDBLOCK)
1062                                 goto out;
1063                         error = VOP_ACCESS(vp, VEXEC, 0, cr, &ct);
1064                         if (error)
1065                                 goto out;
1066                 }
1067         }
1068 
1069         if (MANDLOCK(vp, va.va_mode)) {
1070                 resp->status = NFS3ERR_ACCES;
1071                 goto out1;
1072         }
1073 
1074         offset = args->offset;
1075         if (offset >= va.va_size) {
1076                 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
1077                 if (in_crit)
1078                         nbl_end_crit(vp);
1079                 resp->status = NFS3_OK;
1080                 vattr_to_post_op_attr(vap, &resp->resok.file_attributes);
1081                 resp->resok.count = 0;
1082                 resp->resok.eof = TRUE;
1083                 resp->resok.data.data_len = 0;
1084                 resp->resok.data.data_val = NULL;
1085                 resp->resok.data.mp = NULL;
1086                 /* RDMA */
1087                 resp->resok.wlist = args->wlist;
1088                 resp->resok.wlist_len = resp->resok.count;
1089                 if (resp->resok.wlist)
1090                         clist_zero_len(resp->resok.wlist);
1091                 goto done;
1092         }
1093 
1094         if (args->count == 0) {
1095                 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
1096                 if (in_crit)
1097                         nbl_end_crit(vp);
1098                 resp->status = NFS3_OK;
1099                 vattr_to_post_op_attr(vap, &resp->resok.file_attributes);
1100                 resp->resok.count = 0;
1101                 resp->resok.eof = FALSE;
1102                 resp->resok.data.data_len = 0;
1103                 resp->resok.data.data_val = NULL;
1104                 resp->resok.data.mp = NULL;
1105                 /* RDMA */
1106                 resp->resok.wlist = args->wlist;
1107                 resp->resok.wlist_len = resp->resok.count;
1108                 if (resp->resok.wlist)
1109                         clist_zero_len(resp->resok.wlist);
1110                 goto done;
1111         }
1112 
1113         /*
1114          * do not allocate memory more the max. allowed
1115          * transfer size
1116          */
1117         if (args->count > rfs3_tsize(req))
1118                 args->count = rfs3_tsize(req);
1119 
1120         if (loaned_buffers) {
1121                 uiop = (uio_t *)rfs_setup_xuio(vp);
1122                 ASSERT(uiop != NULL);
1123                 uiop->uio_segflg = UIO_SYSSPACE;
1124                 uiop->uio_loffset = args->offset;
1125                 uiop->uio_resid = args->count;
1126 
1127                 /* Jump to do the read if successful */
1128                 if (VOP_REQZCBUF(vp, UIO_READ, (xuio_t *)uiop, cr, &ct) == 0) {
1129                         /*
1130                          * Need to hold the vnode until after VOP_RETZCBUF()
1131                          * is called.
1132                          */
1133                         VN_HOLD(vp);
1134                         goto doio_read;
1135                 }
1136 
1137                 DTRACE_PROBE2(nfss__i__reqzcbuf_failed, int,
1138                     uiop->uio_loffset, int, uiop->uio_resid);
1139 
1140                 uiop->uio_extflg = 0;
1141                 /* failure to setup for zero copy */
1142                 rfs_free_xuio((void *)uiop);
1143                 loaned_buffers = 0;
1144         }
1145 
1146         /*
1147          * If returning data via RDMA Write, then grab the chunk list.
1148          * If we aren't returning READ data w/RDMA_WRITE, then grab
1149          * a mblk.
1150          */
1151         if (rdma_used) {
1152                 (void) rdma_get_wchunk(req, &iov, args->wlist);
1153                 uio.uio_iov = &iov;
1154                 uio.uio_iovcnt = 1;
1155         } else {
1156                 /*
1157                  * mp will contain the data to be sent out in the read reply.
1158                  * For UDP, this will be freed after the reply has been sent
1159                  * out by the driver.  For TCP, it will be freed after the last
1160                  * segment associated with the reply has been ACKed by the
1161                  * client.
1162                  */
1163                 mp = rfs_read_alloc(args->count, &iovp, &iovcnt);
1164                 uio.uio_iov = iovp;
1165                 uio.uio_iovcnt = iovcnt;
1166         }
1167 
1168         uio.uio_segflg = UIO_SYSSPACE;
1169         uio.uio_extflg = UIO_COPY_CACHED;
1170         uio.uio_loffset = args->offset;
1171         uio.uio_resid = args->count;
1172         uiop = &uio;
1173 
1174 doio_read:
1175         error = VOP_READ(vp, uiop, 0, cr, &ct);
1176 
1177         if (error) {
1178                 if (mp)
1179                         freemsg(mp);
1180                 /* check if a monitor detected a delegation conflict */
1181                 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1182                         resp->status = NFS3ERR_JUKEBOX;
1183                         goto out1;
1184                 }
1185                 goto out;
1186         }
1187 
1188         /* make mblk using zc buffers */
1189         if (loaned_buffers) {
1190                 mp = uio_to_mblk(uiop);
1191                 ASSERT(mp != NULL);
1192         }
1193 
1194         va.va_mask = AT_ALL;
1195         error = VOP_GETATTR(vp, &va, 0, cr, &ct);
1196 
1197         if (error)
1198                 vap = NULL;
1199         else
1200                 vap = &va;
1201 
1202         VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
1203 
1204         if (in_crit)
1205                 nbl_end_crit(vp);
1206 
1207         resp->status = NFS3_OK;
1208         vattr_to_post_op_attr(vap, &resp->resok.file_attributes);
1209         resp->resok.count = args->count - uiop->uio_resid;
1210         if (!error && offset + resp->resok.count == va.va_size)
1211                 resp->resok.eof = TRUE;
1212         else
1213                 resp->resok.eof = FALSE;
1214         resp->resok.data.data_len = resp->resok.count;
1215 
1216         if (mp)
1217                 rfs_rndup_mblks(mp, resp->resok.count, loaned_buffers);
1218 
1219         resp->resok.data.mp = mp;
1220         resp->resok.size = (uint_t)args->count;
1221 
1222         if (rdma_used) {
1223                 resp->resok.data.data_val = (caddr_t)iov.iov_base;
1224                 if (!rdma_setup_read_data3(args, &(resp->resok))) {
1225                         resp->status = NFS3ERR_INVAL;
1226                 }
1227         } else {
1228                 resp->resok.data.data_val = (caddr_t)mp->b_datap->db_base;
1229                 (resp->resok).wlist = NULL;
1230         }
1231 
1232 done:
1233         DTRACE_NFSV3_5(op__read__done, struct svc_req *, req,
1234             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
1235             READ3res *, resp);
1236 
1237         VN_RELE(vp);
1238 
1239         if (iovp != NULL)
1240                 kmem_free(iovp, iovcnt * sizeof (struct iovec));
1241 
1242         return;
1243 
1244 out:
1245         if (curthread->t_flag & T_WOULDBLOCK) {
1246                 curthread->t_flag &= ~T_WOULDBLOCK;
1247                 resp->status = NFS3ERR_JUKEBOX;
1248         } else
1249                 resp->status = puterrno3(error);
1250 out1:
1251         DTRACE_NFSV3_5(op__read__done, struct svc_req *, req,
1252             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
1253             READ3res *, resp);
1254 
1255         if (vp != NULL) {
1256                 if (need_rwunlock)
1257                         VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
1258                 if (in_crit)
1259                         nbl_end_crit(vp);
1260                 VN_RELE(vp);
1261         }
1262         vattr_to_post_op_attr(vap, &resp->resfail.file_attributes);
1263 
1264         if (iovp != NULL)
1265                 kmem_free(iovp, iovcnt * sizeof (struct iovec));
1266 }
1267 
1268 void
1269 rfs3_read_free(READ3res *resp)
1270 {
1271         mblk_t *mp;
1272 
1273         if (resp->status == NFS3_OK) {
1274                 mp = resp->resok.data.mp;
1275                 if (mp != NULL)
1276                         freemsg(mp);
1277         }
1278 }
1279 
1280 void *
1281 rfs3_read_getfh(READ3args *args)
1282 {
1283 
1284         return (&args->file);
1285 }
1286 
1287 #define MAX_IOVECS      12
1288 
1289 #ifdef DEBUG
1290 static int rfs3_write_hits = 0;
1291 static int rfs3_write_misses = 0;
1292 #endif
1293 
1294 void
1295 rfs3_write(WRITE3args *args, WRITE3res *resp, struct exportinfo *exi,
1296     struct svc_req *req, cred_t *cr, bool_t ro)
1297 {
1298         nfs3_srv_t *ns;
1299         int error;
1300         vnode_t *vp;
1301         struct vattr *bvap = NULL;
1302         struct vattr bva;
1303         struct vattr *avap = NULL;
1304         struct vattr ava;
1305         u_offset_t rlimit;
1306         struct uio uio;
1307         struct iovec iov[MAX_IOVECS];
1308         mblk_t *m;
1309         struct iovec *iovp;
1310         int iovcnt;
1311         int ioflag;
1312         cred_t *savecred;
1313         int in_crit = 0;
1314         int rwlock_ret = -1;
1315         caller_context_t ct;
1316 
1317         vp = nfs3_fhtovp(&args->file, exi);
1318 
1319         DTRACE_NFSV3_5(op__write__start, struct svc_req *, req,
1320             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
1321             WRITE3args *, args);
1322 
1323         if (vp == NULL) {
1324                 error = ESTALE;
1325                 goto err;
1326         }
1327 
1328         ns = zone_getspecific(rfs3_zone_key, curzone);
1329         if (is_system_labeled()) {
1330                 bslabel_t *clabel = req->rq_label;
1331 
1332                 ASSERT(clabel != NULL);
1333                 DTRACE_PROBE2(tx__rfs3__log__info__opwrite__clabel, char *,
1334                     "got client label from request(1)", struct svc_req *, req);
1335 
1336                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
1337                         if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
1338                             exi)) {
1339                                 resp->status = NFS3ERR_ACCES;
1340                                 goto err1;
1341                         }
1342                 }
1343         }
1344 
1345         ct.cc_sysid = 0;
1346         ct.cc_pid = 0;
1347         ct.cc_caller_id = nfs3_srv_caller_id;
1348         ct.cc_flags = CC_DONTBLOCK;
1349 
1350         /*
1351          * We have to enter the critical region before calling VOP_RWLOCK
1352          * to avoid a deadlock with ufs.
1353          */
1354         if (nbl_need_check(vp)) {
1355                 nbl_start_crit(vp, RW_READER);
1356                 in_crit = 1;
1357                 if (nbl_conflict(vp, NBL_WRITE, args->offset, args->count, 0,
1358                     NULL)) {
1359                         error = EACCES;
1360                         goto err;
1361                 }
1362         }
1363 
1364         rwlock_ret = VOP_RWLOCK(vp, V_WRITELOCK_TRUE, &ct);
1365 
1366         /* check if a monitor detected a delegation conflict */
1367         if (rwlock_ret == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1368                 resp->status = NFS3ERR_JUKEBOX;
1369                 rwlock_ret = -1;
1370                 goto err1;
1371         }
1372 
1373 
1374         bva.va_mask = AT_ALL;
1375         error = VOP_GETATTR(vp, &bva, 0, cr, &ct);
1376 
1377         /*
1378          * If we can't get the attributes, then we can't do the
1379          * right access checking.  So, we'll fail the request.
1380          */
1381         if (error)
1382                 goto err;
1383 
1384         bvap = &bva;
1385         avap = bvap;
1386 
1387         if (args->count != args->data.data_len) {
1388                 resp->status = NFS3ERR_INVAL;
1389                 goto err1;
1390         }
1391 
1392         if (rdonly(ro, vp)) {
1393                 resp->status = NFS3ERR_ROFS;
1394                 goto err1;
1395         }
1396 
1397         if (vp->v_type != VREG) {
1398                 resp->status = NFS3ERR_INVAL;
1399                 goto err1;
1400         }
1401 
1402         if (crgetuid(cr) != bva.va_uid &&
1403             (error = VOP_ACCESS(vp, VWRITE, 0, cr, &ct)))
1404                 goto err;
1405 
1406         if (MANDLOCK(vp, bva.va_mode)) {
1407                 resp->status = NFS3ERR_ACCES;
1408                 goto err1;
1409         }
1410 
1411         if (args->count == 0) {
1412                 resp->status = NFS3_OK;
1413                 vattr_to_wcc_data(bvap, avap, &resp->resok.file_wcc);
1414                 resp->resok.count = 0;
1415                 resp->resok.committed = args->stable;
1416                 resp->resok.verf = ns->write3verf;
1417                 goto out;
1418         }
1419 
1420         if (args->mblk != NULL) {
1421                 iovcnt = 0;
1422                 for (m = args->mblk; m != NULL; m = m->b_cont)
1423                         iovcnt++;
1424                 if (iovcnt <= MAX_IOVECS) {
1425 #ifdef DEBUG
1426                         rfs3_write_hits++;
1427 #endif
1428                         iovp = iov;
1429                 } else {
1430 #ifdef DEBUG
1431                         rfs3_write_misses++;
1432 #endif
1433                         iovp = kmem_alloc(sizeof (*iovp) * iovcnt, KM_SLEEP);
1434                 }
1435                 mblk_to_iov(args->mblk, iovcnt, iovp);
1436 
1437         } else if (args->rlist != NULL) {
1438                 iovcnt = 1;
1439                 iovp = iov;
1440                 iovp->iov_base = (char *)((args->rlist)->u.c_daddr3);
1441                 iovp->iov_len = args->count;
1442         } else {
1443                 iovcnt = 1;
1444                 iovp = iov;
1445                 iovp->iov_base = args->data.data_val;
1446                 iovp->iov_len = args->count;
1447         }
1448 
1449         uio.uio_iov = iovp;
1450         uio.uio_iovcnt = iovcnt;
1451 
1452         uio.uio_segflg = UIO_SYSSPACE;
1453         uio.uio_extflg = UIO_COPY_DEFAULT;
1454         uio.uio_loffset = args->offset;
1455         uio.uio_resid = args->count;
1456         uio.uio_llimit = curproc->p_fsz_ctl;
1457         rlimit = uio.uio_llimit - args->offset;
1458         if (rlimit < (u_offset_t)uio.uio_resid)
1459                 uio.uio_resid = (int)rlimit;
1460 
1461         if (args->stable == UNSTABLE)
1462                 ioflag = 0;
1463         else if (args->stable == FILE_SYNC)
1464                 ioflag = FSYNC;
1465         else if (args->stable == DATA_SYNC)
1466                 ioflag = FDSYNC;
1467         else {
1468                 if (iovp != iov)
1469                         kmem_free(iovp, sizeof (*iovp) * iovcnt);
1470                 resp->status = NFS3ERR_INVAL;
1471                 goto err1;
1472         }
1473 
1474         /*
1475          * We're changing creds because VM may fault and we need
1476          * the cred of the current thread to be used if quota
1477          * checking is enabled.
1478          */
1479         savecred = curthread->t_cred;
1480         curthread->t_cred = cr;
1481         error = VOP_WRITE(vp, &uio, ioflag, cr, &ct);
1482         curthread->t_cred = savecred;
1483 
1484         if (iovp != iov)
1485                 kmem_free(iovp, sizeof (*iovp) * iovcnt);
1486 
1487         /* check if a monitor detected a delegation conflict */
1488         if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1489                 resp->status = NFS3ERR_JUKEBOX;
1490                 goto err1;
1491         }
1492 
1493         ava.va_mask = AT_ALL;
1494         avap = VOP_GETATTR(vp, &ava, 0, cr, &ct) ? NULL : &ava;
1495 
1496         if (error)
1497                 goto err;
1498 
1499         /*
1500          * If we were unable to get the V_WRITELOCK_TRUE, then we
1501          * may not have accurate after attrs, so check if
1502          * we have both attributes, they have a non-zero va_seq, and
1503          * va_seq has changed by exactly one,
1504          * if not, turn off the before attr.
1505          */
1506         if (rwlock_ret != V_WRITELOCK_TRUE) {
1507                 if (bvap == NULL || avap == NULL ||
1508                     bvap->va_seq == 0 || avap->va_seq == 0 ||
1509                     avap->va_seq != (bvap->va_seq + 1)) {
1510                         bvap = NULL;
1511                 }
1512         }
1513 
1514         resp->status = NFS3_OK;
1515         vattr_to_wcc_data(bvap, avap, &resp->resok.file_wcc);
1516         resp->resok.count = args->count - uio.uio_resid;
1517         resp->resok.committed = args->stable;
1518         resp->resok.verf = ns->write3verf;
1519         goto out;
1520 
1521 err:
1522         if (curthread->t_flag & T_WOULDBLOCK) {
1523                 curthread->t_flag &= ~T_WOULDBLOCK;
1524                 resp->status = NFS3ERR_JUKEBOX;
1525         } else
1526                 resp->status = puterrno3(error);
1527 err1:
1528         vattr_to_wcc_data(bvap, avap, &resp->resfail.file_wcc);
1529 out:
1530         DTRACE_NFSV3_5(op__write__done, struct svc_req *, req,
1531             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
1532             WRITE3res *, resp);
1533 
1534         if (vp != NULL) {
1535                 if (rwlock_ret != -1)
1536                         VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, &ct);
1537                 if (in_crit)
1538                         nbl_end_crit(vp);
1539                 VN_RELE(vp);
1540         }
1541 }
1542 
1543 void *
1544 rfs3_write_getfh(WRITE3args *args)
1545 {
1546 
1547         return (&args->file);
1548 }
1549 
1550 void
1551 rfs3_create(CREATE3args *args, CREATE3res *resp, struct exportinfo *exi,
1552     struct svc_req *req, cred_t *cr, bool_t ro)
1553 {
1554         int error;
1555         int in_crit = 0;
1556         vnode_t *vp;
1557         vnode_t *tvp = NULL;
1558         vnode_t *dvp;
1559         struct vattr *vap;
1560         struct vattr va;
1561         struct vattr *dbvap;
1562         struct vattr dbva;
1563         struct vattr *davap;
1564         struct vattr dava;
1565         enum vcexcl excl;
1566         nfstime3 *mtime;
1567         len_t reqsize;
1568         bool_t trunc;
1569         struct sockaddr *ca;
1570         char *name = NULL;
1571 
1572         dbvap = NULL;
1573         davap = NULL;
1574 
1575         dvp = nfs3_fhtovp(&args->where.dir, exi);
1576 
1577         DTRACE_NFSV3_5(op__create__start, struct svc_req *, req,
1578             cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
1579             CREATE3args *, args);
1580 
1581         if (dvp == NULL) {
1582                 error = ESTALE;
1583                 goto out;
1584         }
1585 
1586         dbva.va_mask = AT_ALL;
1587         dbvap = VOP_GETATTR(dvp, &dbva, 0, cr, NULL) ? NULL : &dbva;
1588         davap = dbvap;
1589 
1590         if (args->where.name == nfs3nametoolong) {
1591                 resp->status = NFS3ERR_NAMETOOLONG;
1592                 goto out1;
1593         }
1594 
1595         if (args->where.name == NULL || *(args->where.name) == '\0') {
1596                 resp->status = NFS3ERR_ACCES;
1597                 goto out1;
1598         }
1599 
1600         if (rdonly(ro, dvp)) {
1601                 resp->status = NFS3ERR_ROFS;
1602                 goto out1;
1603         }
1604 
1605         if (is_system_labeled()) {
1606                 bslabel_t *clabel = req->rq_label;
1607 
1608                 ASSERT(clabel != NULL);
1609                 DTRACE_PROBE2(tx__rfs3__log__info__opcreate__clabel, char *,
1610                     "got client label from request(1)", struct svc_req *, req);
1611 
1612                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
1613                         if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
1614                             exi)) {
1615                                 resp->status = NFS3ERR_ACCES;
1616                                 goto out1;
1617                         }
1618                 }
1619         }
1620 
1621         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
1622         name = nfscmd_convname(ca, exi, args->where.name,
1623             NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
1624 
1625         if (name == NULL) {
1626                 /* This is really a Solaris EILSEQ */
1627                 resp->status = NFS3ERR_INVAL;
1628                 goto out1;
1629         }
1630 
1631         if (args->how.mode == EXCLUSIVE) {
1632                 va.va_mask = AT_TYPE | AT_MODE | AT_MTIME;
1633                 va.va_type = VREG;
1634                 va.va_mode = (mode_t)0;
1635                 /*
1636                  * Ensure no time overflows and that types match
1637                  */
1638                 mtime = (nfstime3 *)&args->how.createhow3_u.verf;
1639                 va.va_mtime.tv_sec = mtime->seconds % INT32_MAX;
1640                 va.va_mtime.tv_nsec = mtime->nseconds;
1641                 excl = EXCL;
1642         } else {
1643                 error = sattr3_to_vattr(&args->how.createhow3_u.obj_attributes,
1644                     &va);
1645                 if (error)
1646                         goto out;
1647                 va.va_mask |= AT_TYPE;
1648                 va.va_type = VREG;
1649                 if (args->how.mode == GUARDED)
1650                         excl = EXCL;
1651                 else {
1652                         excl = NONEXCL;
1653 
1654                         /*
1655                          * During creation of file in non-exclusive mode
1656                          * if size of file is being set then make sure
1657                          * that if the file already exists that no conflicting
1658                          * non-blocking mandatory locks exists in the region
1659                          * being modified. If there are conflicting locks fail
1660                          * the operation with EACCES.
1661                          */
1662                         if (va.va_mask & AT_SIZE) {
1663                                 struct vattr tva;
1664 
1665                                 /*
1666                                  * Does file already exist?
1667                                  */
1668                                 error = VOP_LOOKUP(dvp, name, &tvp,
1669                                     NULL, 0, NULL, cr, NULL, NULL, NULL);
1670 
1671                                 /*
1672                                  * Check to see if the file has been delegated
1673                                  * to a v4 client.  If so, then begin recall of
1674                                  * the delegation and return JUKEBOX to allow
1675                                  * the client to retrasmit its request.
1676                                  */
1677 
1678                                 trunc = va.va_size == 0;
1679                                 if (!error &&
1680                                     rfs4_check_delegated(FWRITE, tvp, trunc)) {
1681                                         resp->status = NFS3ERR_JUKEBOX;
1682                                         goto out1;
1683                                 }
1684 
1685                                 /*
1686                                  * Check for NBMAND lock conflicts
1687                                  */
1688                                 if (!error && nbl_need_check(tvp)) {
1689                                         u_offset_t offset;
1690                                         ssize_t len;
1691 
1692                                         nbl_start_crit(tvp, RW_READER);
1693                                         in_crit = 1;
1694 
1695                                         tva.va_mask = AT_SIZE;
1696                                         error = VOP_GETATTR(tvp, &tva, 0, cr,
1697                                             NULL);
1698                                         /*
1699                                          * Can't check for conflicts, so return
1700                                          * error.
1701                                          */
1702                                         if (error)
1703                                                 goto out;
1704 
1705                                         offset = tva.va_size < va.va_size ?
1706                                             tva.va_size : va.va_size;
1707                                         len = tva.va_size < va.va_size ?
1708                                             va.va_size - tva.va_size :
1709                                             tva.va_size - va.va_size;
1710                                         if (nbl_conflict(tvp, NBL_WRITE,
1711                                             offset, len, 0, NULL)) {
1712                                                 error = EACCES;
1713                                                 goto out;
1714                                         }
1715                                 } else if (tvp) {
1716                                         VN_RELE(tvp);
1717                                         tvp = NULL;
1718                                 }
1719                         }
1720                 }
1721                 if (va.va_mask & AT_SIZE)
1722                         reqsize = va.va_size;
1723         }
1724 
1725         /*
1726          * Must specify the mode.
1727          */
1728         if (!(va.va_mask & AT_MODE)) {
1729                 resp->status = NFS3ERR_INVAL;
1730                 goto out1;
1731         }
1732 
1733         /*
1734          * If the filesystem is exported with nosuid, then mask off
1735          * the setuid and setgid bits.
1736          */
1737         if (va.va_type == VREG && (exi->exi_export.ex_flags & EX_NOSUID))
1738                 va.va_mode &= ~(VSUID | VSGID);
1739 
1740 tryagain:
1741         /*
1742          * The file open mode used is VWRITE.  If the client needs
1743          * some other semantic, then it should do the access checking
1744          * itself.  It would have been nice to have the file open mode
1745          * passed as part of the arguments.
1746          */
1747         error = VOP_CREATE(dvp, name, &va, excl, VWRITE,
1748             &vp, cr, 0, NULL, NULL);
1749 
1750         dava.va_mask = AT_ALL;
1751         davap = VOP_GETATTR(dvp, &dava, 0, cr, NULL) ? NULL : &dava;
1752 
1753         if (error) {
1754                 /*
1755                  * If we got something other than file already exists
1756                  * then just return this error.  Otherwise, we got
1757                  * EEXIST.  If we were doing a GUARDED create, then
1758                  * just return this error.  Otherwise, we need to
1759                  * make sure that this wasn't a duplicate of an
1760                  * exclusive create request.
1761                  *
1762                  * The assumption is made that a non-exclusive create
1763                  * request will never return EEXIST.
1764                  */
1765                 if (error != EEXIST || args->how.mode == GUARDED)
1766                         goto out;
1767                 /*
1768                  * Lookup the file so that we can get a vnode for it.
1769                  */
1770                 error = VOP_LOOKUP(dvp, name, &vp, NULL, 0,
1771                     NULL, cr, NULL, NULL, NULL);
1772                 if (error) {
1773                         /*
1774                          * We couldn't find the file that we thought that
1775                          * we just created.  So, we'll just try creating
1776                          * it again.
1777                          */
1778                         if (error == ENOENT)
1779                                 goto tryagain;
1780                         goto out;
1781                 }
1782 
1783                 /*
1784                  * If the file is delegated to a v4 client, go ahead
1785                  * and initiate recall, this create is a hint that a
1786                  * conflicting v3 open has occurred.
1787                  */
1788 
1789                 if (rfs4_check_delegated(FWRITE, vp, FALSE)) {
1790                         VN_RELE(vp);
1791                         resp->status = NFS3ERR_JUKEBOX;
1792                         goto out1;
1793                 }
1794 
1795                 va.va_mask = AT_ALL;
1796                 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
1797 
1798                 mtime = (nfstime3 *)&args->how.createhow3_u.verf;
1799                 /* % with INT32_MAX to prevent overflows */
1800                 if (args->how.mode == EXCLUSIVE && (vap == NULL ||
1801                     vap->va_mtime.tv_sec !=
1802                     (mtime->seconds % INT32_MAX) ||
1803                     vap->va_mtime.tv_nsec != mtime->nseconds)) {
1804                         VN_RELE(vp);
1805                         error = EEXIST;
1806                         goto out;
1807                 }
1808         } else {
1809 
1810                 if ((args->how.mode == UNCHECKED ||
1811                     args->how.mode == GUARDED) &&
1812                     args->how.createhow3_u.obj_attributes.size.set_it &&
1813                     va.va_size == 0)
1814                         trunc = TRUE;
1815                 else
1816                         trunc = FALSE;
1817 
1818                 if (rfs4_check_delegated(FWRITE, vp, trunc)) {
1819                         VN_RELE(vp);
1820                         resp->status = NFS3ERR_JUKEBOX;
1821                         goto out1;
1822                 }
1823 
1824                 va.va_mask = AT_ALL;
1825                 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
1826 
1827                 /*
1828                  * We need to check to make sure that the file got
1829                  * created to the indicated size.  If not, we do a
1830                  * setattr to try to change the size, but we don't
1831                  * try too hard.  This shouldn't a problem as most
1832                  * clients will only specifiy a size of zero which
1833                  * local file systems handle.  However, even if
1834                  * the client does specify a non-zero size, it can
1835                  * still recover by checking the size of the file
1836                  * after it has created it and then issue a setattr
1837                  * request of its own to set the size of the file.
1838                  */
1839                 if (vap != NULL &&
1840                     (args->how.mode == UNCHECKED ||
1841                     args->how.mode == GUARDED) &&
1842                     args->how.createhow3_u.obj_attributes.size.set_it &&
1843                     vap->va_size != reqsize) {
1844                         va.va_mask = AT_SIZE;
1845                         va.va_size = reqsize;
1846                         (void) VOP_SETATTR(vp, &va, 0, cr, NULL);
1847                         va.va_mask = AT_ALL;
1848                         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
1849                 }
1850         }
1851 
1852         if (name != args->where.name)
1853                 kmem_free(name, MAXPATHLEN + 1);
1854 
1855         error = makefh3(&resp->resok.obj.handle, vp, exi);
1856         if (error)
1857                 resp->resok.obj.handle_follows = FALSE;
1858         else
1859                 resp->resok.obj.handle_follows = TRUE;
1860 
1861         /*
1862          * Force modified data and metadata out to stable storage.
1863          */
1864         (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
1865         (void) VOP_FSYNC(dvp, 0, cr, NULL);
1866 
1867         VN_RELE(vp);
1868         if (tvp != NULL) {
1869                 if (in_crit)
1870                         nbl_end_crit(tvp);
1871                 VN_RELE(tvp);
1872         }
1873 
1874         resp->status = NFS3_OK;
1875         vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
1876         vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
1877 
1878         DTRACE_NFSV3_5(op__create__done, struct svc_req *, req,
1879             cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
1880             CREATE3res *, resp);
1881 
1882         VN_RELE(dvp);
1883         return;
1884 
1885 out:
1886         if (curthread->t_flag & T_WOULDBLOCK) {
1887                 curthread->t_flag &= ~T_WOULDBLOCK;
1888                 resp->status = NFS3ERR_JUKEBOX;
1889         } else
1890                 resp->status = puterrno3(error);
1891 out1:
1892         DTRACE_NFSV3_5(op__create__done, struct svc_req *, req,
1893             cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
1894             CREATE3res *, resp);
1895 
1896         if (name != NULL && name != args->where.name)
1897                 kmem_free(name, MAXPATHLEN + 1);
1898 
1899         if (tvp != NULL) {
1900                 if (in_crit)
1901                         nbl_end_crit(tvp);
1902                 VN_RELE(tvp);
1903         }
1904         if (dvp != NULL)
1905                 VN_RELE(dvp);
1906         vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc);
1907 }
1908 
1909 void *
1910 rfs3_create_getfh(CREATE3args *args)
1911 {
1912 
1913         return (&args->where.dir);
1914 }
1915 
1916 void
1917 rfs3_mkdir(MKDIR3args *args, MKDIR3res *resp, struct exportinfo *exi,
1918     struct svc_req *req, cred_t *cr, bool_t ro)
1919 {
1920         int error;
1921         vnode_t *vp = NULL;
1922         vnode_t *dvp;
1923         struct vattr *vap;
1924         struct vattr va;
1925         struct vattr *dbvap;
1926         struct vattr dbva;
1927         struct vattr *davap;
1928         struct vattr dava;
1929         struct sockaddr *ca;
1930         char *name = NULL;
1931 
1932         dbvap = NULL;
1933         davap = NULL;
1934 
1935         dvp = nfs3_fhtovp(&args->where.dir, exi);
1936 
1937         DTRACE_NFSV3_5(op__mkdir__start, struct svc_req *, req,
1938             cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
1939             MKDIR3args *, args);
1940 
1941         if (dvp == NULL) {
1942                 error = ESTALE;
1943                 goto out;
1944         }
1945 
1946         dbva.va_mask = AT_ALL;
1947         dbvap = VOP_GETATTR(dvp, &dbva, 0, cr, NULL) ? NULL : &dbva;
1948         davap = dbvap;
1949 
1950         if (args->where.name == nfs3nametoolong) {
1951                 resp->status = NFS3ERR_NAMETOOLONG;
1952                 goto out1;
1953         }
1954 
1955         if (args->where.name == NULL || *(args->where.name) == '\0') {
1956                 resp->status = NFS3ERR_ACCES;
1957                 goto out1;
1958         }
1959 
1960         if (rdonly(ro, dvp)) {
1961                 resp->status = NFS3ERR_ROFS;
1962                 goto out1;
1963         }
1964 
1965         if (is_system_labeled()) {
1966                 bslabel_t *clabel = req->rq_label;
1967 
1968                 ASSERT(clabel != NULL);
1969                 DTRACE_PROBE2(tx__rfs3__log__info__opmkdir__clabel, char *,
1970                     "got client label from request(1)", struct svc_req *, req);
1971 
1972                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
1973                         if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
1974                             exi)) {
1975                                 resp->status = NFS3ERR_ACCES;
1976                                 goto out1;
1977                         }
1978                 }
1979         }
1980 
1981         error = sattr3_to_vattr(&args->attributes, &va);
1982         if (error)
1983                 goto out;
1984 
1985         if (!(va.va_mask & AT_MODE)) {
1986                 resp->status = NFS3ERR_INVAL;
1987                 goto out1;
1988         }
1989 
1990         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
1991         name = nfscmd_convname(ca, exi, args->where.name,
1992             NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
1993 
1994         if (name == NULL) {
1995                 resp->status = NFS3ERR_INVAL;
1996                 goto out1;
1997         }
1998 
1999         va.va_mask |= AT_TYPE;
2000         va.va_type = VDIR;
2001 
2002         error = VOP_MKDIR(dvp, name, &va, &vp, cr, NULL, 0, NULL);
2003 
2004         if (name != args->where.name)
2005                 kmem_free(name, MAXPATHLEN + 1);
2006 
2007         dava.va_mask = AT_ALL;
2008         davap = VOP_GETATTR(dvp, &dava, 0, cr, NULL) ? NULL : &dava;
2009 
2010         /*
2011          * Force modified data and metadata out to stable storage.
2012          */
2013         (void) VOP_FSYNC(dvp, 0, cr, NULL);
2014 
2015         if (error)
2016                 goto out;
2017 
2018         error = makefh3(&resp->resok.obj.handle, vp, exi);
2019         if (error)
2020                 resp->resok.obj.handle_follows = FALSE;
2021         else
2022                 resp->resok.obj.handle_follows = TRUE;
2023 
2024         va.va_mask = AT_ALL;
2025         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
2026 
2027         /*
2028          * Force modified data and metadata out to stable storage.
2029          */
2030         (void) VOP_FSYNC(vp, 0, cr, NULL);
2031 
2032         VN_RELE(vp);
2033 
2034         resp->status = NFS3_OK;
2035         vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
2036         vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
2037 
2038         DTRACE_NFSV3_5(op__mkdir__done, struct svc_req *, req,
2039             cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
2040             MKDIR3res *, resp);
2041         VN_RELE(dvp);
2042 
2043         return;
2044 
2045 out:
2046         if (curthread->t_flag & T_WOULDBLOCK) {
2047                 curthread->t_flag &= ~T_WOULDBLOCK;
2048                 resp->status = NFS3ERR_JUKEBOX;
2049         } else
2050                 resp->status = puterrno3(error);
2051 out1:
2052         DTRACE_NFSV3_5(op__mkdir__done, struct svc_req *, req,
2053             cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
2054             MKDIR3res *, resp);
2055         if (dvp != NULL)
2056                 VN_RELE(dvp);
2057         vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc);
2058 }
2059 
2060 void *
2061 rfs3_mkdir_getfh(MKDIR3args *args)
2062 {
2063 
2064         return (&args->where.dir);
2065 }
2066 
2067 void
2068 rfs3_symlink(SYMLINK3args *args, SYMLINK3res *resp, struct exportinfo *exi,
2069     struct svc_req *req, cred_t *cr, bool_t ro)
2070 {
2071         int error;
2072         vnode_t *vp;
2073         vnode_t *dvp;
2074         struct vattr *vap;
2075         struct vattr va;
2076         struct vattr *dbvap;
2077         struct vattr dbva;
2078         struct vattr *davap;
2079         struct vattr dava;
2080         struct sockaddr *ca;
2081         char *name = NULL;
2082         char *symdata = NULL;
2083 
2084         dbvap = NULL;
2085         davap = NULL;
2086 
2087         dvp = nfs3_fhtovp(&args->where.dir, exi);
2088 
2089         DTRACE_NFSV3_5(op__symlink__start, struct svc_req *, req,
2090             cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
2091             SYMLINK3args *, args);
2092 
2093         if (dvp == NULL) {
2094                 error = ESTALE;
2095                 goto err;
2096         }
2097 
2098         dbva.va_mask = AT_ALL;
2099         dbvap = VOP_GETATTR(dvp, &dbva, 0, cr, NULL) ? NULL : &dbva;
2100         davap = dbvap;
2101 
2102         if (args->where.name == nfs3nametoolong) {
2103                 resp->status = NFS3ERR_NAMETOOLONG;
2104                 goto err1;
2105         }
2106 
2107         if (args->where.name == NULL || *(args->where.name) == '\0') {
2108                 resp->status = NFS3ERR_ACCES;
2109                 goto err1;
2110         }
2111 
2112         if (rdonly(ro, dvp)) {
2113                 resp->status = NFS3ERR_ROFS;
2114                 goto err1;
2115         }
2116 
2117         if (is_system_labeled()) {
2118                 bslabel_t *clabel = req->rq_label;
2119 
2120                 ASSERT(clabel != NULL);
2121                 DTRACE_PROBE2(tx__rfs3__log__info__opsymlink__clabel, char *,
2122                     "got client label from request(1)", struct svc_req *, req);
2123 
2124                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2125                         if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
2126                             exi)) {
2127                                 resp->status = NFS3ERR_ACCES;
2128                                 goto err1;
2129                         }
2130                 }
2131         }
2132 
2133         error = sattr3_to_vattr(&args->symlink.symlink_attributes, &va);
2134         if (error)
2135                 goto err;
2136 
2137         if (!(va.va_mask & AT_MODE)) {
2138                 resp->status = NFS3ERR_INVAL;
2139                 goto err1;
2140         }
2141 
2142         if (args->symlink.symlink_data == nfs3nametoolong) {
2143                 resp->status = NFS3ERR_NAMETOOLONG;
2144                 goto err1;
2145         }
2146 
2147         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2148         name = nfscmd_convname(ca, exi, args->where.name,
2149             NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2150 
2151         if (name == NULL) {
2152                 /* This is really a Solaris EILSEQ */
2153                 resp->status = NFS3ERR_INVAL;
2154                 goto err1;
2155         }
2156 
2157         symdata = nfscmd_convname(ca, exi, args->symlink.symlink_data,
2158             NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2159         if (symdata == NULL) {
2160                 /* This is really a Solaris EILSEQ */
2161                 resp->status = NFS3ERR_INVAL;
2162                 goto err1;
2163         }
2164 
2165 
2166         va.va_mask |= AT_TYPE;
2167         va.va_type = VLNK;
2168 
2169         error = VOP_SYMLINK(dvp, name, &va, symdata, cr, NULL, 0);
2170 
2171         dava.va_mask = AT_ALL;
2172         davap = VOP_GETATTR(dvp, &dava, 0, cr, NULL) ? NULL : &dava;
2173 
2174         if (error)
2175                 goto err;
2176 
2177         error = VOP_LOOKUP(dvp, name, &vp, NULL, 0, NULL, cr,
2178             NULL, NULL, NULL);
2179 
2180         /*
2181          * Force modified data and metadata out to stable storage.
2182          */
2183         (void) VOP_FSYNC(dvp, 0, cr, NULL);
2184 
2185 
2186         resp->status = NFS3_OK;
2187         if (error) {
2188                 resp->resok.obj.handle_follows = FALSE;
2189                 vattr_to_post_op_attr(NULL, &resp->resok.obj_attributes);
2190                 vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
2191                 goto out;
2192         }
2193 
2194         error = makefh3(&resp->resok.obj.handle, vp, exi);
2195         if (error)
2196                 resp->resok.obj.handle_follows = FALSE;
2197         else
2198                 resp->resok.obj.handle_follows = TRUE;
2199 
2200         va.va_mask = AT_ALL;
2201         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
2202 
2203         /*
2204          * Force modified data and metadata out to stable storage.
2205          */
2206         (void) VOP_FSYNC(vp, 0, cr, NULL);
2207 
2208         VN_RELE(vp);
2209 
2210         vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
2211         vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
2212         goto out;
2213 
2214 err:
2215         if (curthread->t_flag & T_WOULDBLOCK) {
2216                 curthread->t_flag &= ~T_WOULDBLOCK;
2217                 resp->status = NFS3ERR_JUKEBOX;
2218         } else
2219                 resp->status = puterrno3(error);
2220 err1:
2221         vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc);
2222 out:
2223         if (name != NULL && name != args->where.name)
2224                 kmem_free(name, MAXPATHLEN + 1);
2225         if (symdata != NULL && symdata != args->symlink.symlink_data)
2226                 kmem_free(symdata, MAXPATHLEN + 1);
2227 
2228         DTRACE_NFSV3_5(op__symlink__done, struct svc_req *, req,
2229             cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
2230             SYMLINK3res *, resp);
2231 
2232         if (dvp != NULL)
2233                 VN_RELE(dvp);
2234 }
2235 
2236 void *
2237 rfs3_symlink_getfh(SYMLINK3args *args)
2238 {
2239 
2240         return (&args->where.dir);
2241 }
2242 
2243 void
2244 rfs3_mknod(MKNOD3args *args, MKNOD3res *resp, struct exportinfo *exi,
2245     struct svc_req *req, cred_t *cr, bool_t ro)
2246 {
2247         int error;
2248         vnode_t *vp;
2249         vnode_t *realvp;
2250         vnode_t *dvp;
2251         struct vattr *vap;
2252         struct vattr va;
2253         struct vattr *dbvap;
2254         struct vattr dbva;
2255         struct vattr *davap;
2256         struct vattr dava;
2257         int mode;
2258         enum vcexcl excl;
2259         struct sockaddr *ca;
2260         char *name = NULL;
2261 
2262         dbvap = NULL;
2263         davap = NULL;
2264 
2265         dvp = nfs3_fhtovp(&args->where.dir, exi);
2266 
2267         DTRACE_NFSV3_5(op__mknod__start, struct svc_req *, req,
2268             cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
2269             MKNOD3args *, args);
2270 
2271         if (dvp == NULL) {
2272                 error = ESTALE;
2273                 goto out;
2274         }
2275 
2276         dbva.va_mask = AT_ALL;
2277         dbvap = VOP_GETATTR(dvp, &dbva, 0, cr, NULL) ? NULL : &dbva;
2278         davap = dbvap;
2279 
2280         if (args->where.name == nfs3nametoolong) {
2281                 resp->status = NFS3ERR_NAMETOOLONG;
2282                 goto out1;
2283         }
2284 
2285         if (args->where.name == NULL || *(args->where.name) == '\0') {
2286                 resp->status = NFS3ERR_ACCES;
2287                 goto out1;
2288         }
2289 
2290         if (rdonly(ro, dvp)) {
2291                 resp->status = NFS3ERR_ROFS;
2292                 goto out1;
2293         }
2294 
2295         if (is_system_labeled()) {
2296                 bslabel_t *clabel = req->rq_label;
2297 
2298                 ASSERT(clabel != NULL);
2299                 DTRACE_PROBE2(tx__rfs3__log__info__opmknod__clabel, char *,
2300                     "got client label from request(1)", struct svc_req *, req);
2301 
2302                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2303                         if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
2304                             exi)) {
2305                                 resp->status = NFS3ERR_ACCES;
2306                                 goto out1;
2307                         }
2308                 }
2309         }
2310 
2311         switch (args->what.type) {
2312         case NF3CHR:
2313         case NF3BLK:
2314                 error = sattr3_to_vattr(
2315                     &args->what.mknoddata3_u.device.dev_attributes, &va);
2316                 if (error)
2317                         goto out;
2318                 if (secpolicy_sys_devices(cr) != 0) {
2319                         resp->status = NFS3ERR_PERM;
2320                         goto out1;
2321                 }
2322                 if (args->what.type == NF3CHR)
2323                         va.va_type = VCHR;
2324                 else
2325                         va.va_type = VBLK;
2326                 va.va_rdev = makedevice(
2327                     args->what.mknoddata3_u.device.spec.specdata1,
2328                     args->what.mknoddata3_u.device.spec.specdata2);
2329                 va.va_mask |= AT_TYPE | AT_RDEV;
2330                 break;
2331         case NF3SOCK:
2332                 error = sattr3_to_vattr(
2333                     &args->what.mknoddata3_u.pipe_attributes, &va);
2334                 if (error)
2335                         goto out;
2336                 va.va_type = VSOCK;
2337                 va.va_mask |= AT_TYPE;
2338                 break;
2339         case NF3FIFO:
2340                 error = sattr3_to_vattr(
2341                     &args->what.mknoddata3_u.pipe_attributes, &va);
2342                 if (error)
2343                         goto out;
2344                 va.va_type = VFIFO;
2345                 va.va_mask |= AT_TYPE;
2346                 break;
2347         default:
2348                 resp->status = NFS3ERR_BADTYPE;
2349                 goto out1;
2350         }
2351 
2352         /*
2353          * Must specify the mode.
2354          */
2355         if (!(va.va_mask & AT_MODE)) {
2356                 resp->status = NFS3ERR_INVAL;
2357                 goto out1;
2358         }
2359 
2360         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2361         name = nfscmd_convname(ca, exi, args->where.name,
2362             NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2363 
2364         if (name == NULL) {
2365                 resp->status = NFS3ERR_INVAL;
2366                 goto out1;
2367         }
2368 
2369         excl = EXCL;
2370 
2371         mode = 0;
2372 
2373         error = VOP_CREATE(dvp, name, &va, excl, mode,
2374             &vp, cr, 0, NULL, NULL);
2375 
2376         if (name != args->where.name)
2377                 kmem_free(name, MAXPATHLEN + 1);
2378 
2379         dava.va_mask = AT_ALL;
2380         davap = VOP_GETATTR(dvp, &dava, 0, cr, NULL) ? NULL : &dava;
2381 
2382         /*
2383          * Force modified data and metadata out to stable storage.
2384          */
2385         (void) VOP_FSYNC(dvp, 0, cr, NULL);
2386 
2387         if (error)
2388                 goto out;
2389 
2390         resp->status = NFS3_OK;
2391 
2392         error = makefh3(&resp->resok.obj.handle, vp, exi);
2393         if (error)
2394                 resp->resok.obj.handle_follows = FALSE;
2395         else
2396                 resp->resok.obj.handle_follows = TRUE;
2397 
2398         va.va_mask = AT_ALL;
2399         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
2400 
2401         /*
2402          * Force modified metadata out to stable storage.
2403          *
2404          * if a underlying vp exists, pass it to VOP_FSYNC
2405          */
2406         if (VOP_REALVP(vp, &realvp, NULL) == 0)
2407                 (void) VOP_FSYNC(realvp, FNODSYNC, cr, NULL);
2408         else
2409                 (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
2410 
2411         VN_RELE(vp);
2412 
2413         vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
2414         vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
2415         DTRACE_NFSV3_5(op__mknod__done, struct svc_req *, req,
2416             cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
2417             MKNOD3res *, resp);
2418         VN_RELE(dvp);
2419         return;
2420 
2421 out:
2422         if (curthread->t_flag & T_WOULDBLOCK) {
2423                 curthread->t_flag &= ~T_WOULDBLOCK;
2424                 resp->status = NFS3ERR_JUKEBOX;
2425         } else
2426                 resp->status = puterrno3(error);
2427 out1:
2428         DTRACE_NFSV3_5(op__mknod__done, struct svc_req *, req,
2429             cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
2430             MKNOD3res *, resp);
2431         if (dvp != NULL)
2432                 VN_RELE(dvp);
2433         vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc);
2434 }
2435 
2436 void *
2437 rfs3_mknod_getfh(MKNOD3args *args)
2438 {
2439 
2440         return (&args->where.dir);
2441 }
2442 
2443 void
2444 rfs3_remove(REMOVE3args *args, REMOVE3res *resp, struct exportinfo *exi,
2445     struct svc_req *req, cred_t *cr, bool_t ro)
2446 {
2447         int error = 0;
2448         vnode_t *vp;
2449         struct vattr *bvap;
2450         struct vattr bva;
2451         struct vattr *avap;
2452         struct vattr ava;
2453         vnode_t *targvp = NULL;
2454         struct sockaddr *ca;
2455         char *name = NULL;
2456 
2457         bvap = NULL;
2458         avap = NULL;
2459 
2460         vp = nfs3_fhtovp(&args->object.dir, exi);
2461 
2462         DTRACE_NFSV3_5(op__remove__start, struct svc_req *, req,
2463             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
2464             REMOVE3args *, args);
2465 
2466         if (vp == NULL) {
2467                 error = ESTALE;
2468                 goto err;
2469         }
2470 
2471         bva.va_mask = AT_ALL;
2472         bvap = VOP_GETATTR(vp, &bva, 0, cr, NULL) ? NULL : &bva;
2473         avap = bvap;
2474 
2475         if (vp->v_type != VDIR) {
2476                 resp->status = NFS3ERR_NOTDIR;
2477                 goto err1;
2478         }
2479 
2480         if (args->object.name == nfs3nametoolong) {
2481                 resp->status = NFS3ERR_NAMETOOLONG;
2482                 goto err1;
2483         }
2484 
2485         if (args->object.name == NULL || *(args->object.name) == '\0') {
2486                 resp->status = NFS3ERR_ACCES;
2487                 goto err1;
2488         }
2489 
2490         if (rdonly(ro, vp)) {
2491                 resp->status = NFS3ERR_ROFS;
2492                 goto err1;
2493         }
2494 
2495         if (is_system_labeled()) {
2496                 bslabel_t *clabel = req->rq_label;
2497 
2498                 ASSERT(clabel != NULL);
2499                 DTRACE_PROBE2(tx__rfs3__log__info__opremove__clabel, char *,
2500                     "got client label from request(1)", struct svc_req *, req);
2501 
2502                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2503                         if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
2504                             exi)) {
2505                                 resp->status = NFS3ERR_ACCES;
2506                                 goto err1;
2507                         }
2508                 }
2509         }
2510 
2511         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2512         name = nfscmd_convname(ca, exi, args->object.name,
2513             NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2514 
2515         if (name == NULL) {
2516                 resp->status = NFS3ERR_INVAL;
2517                 goto err1;
2518         }
2519 
2520         /*
2521          * Check for a conflict with a non-blocking mandatory share
2522          * reservation and V4 delegations
2523          */
2524         error = VOP_LOOKUP(vp, name, &targvp, NULL, 0,
2525             NULL, cr, NULL, NULL, NULL);
2526         if (error != 0)
2527                 goto err;
2528 
2529         if (rfs4_check_delegated(FWRITE, targvp, TRUE)) {
2530                 resp->status = NFS3ERR_JUKEBOX;
2531                 goto err1;
2532         }
2533 
2534         if (!nbl_need_check(targvp)) {
2535                 error = VOP_REMOVE(vp, name, cr, NULL, 0);
2536         } else {
2537                 nbl_start_crit(targvp, RW_READER);
2538                 if (nbl_conflict(targvp, NBL_REMOVE, 0, 0, 0, NULL)) {
2539                         error = EACCES;
2540                 } else {
2541                         error = VOP_REMOVE(vp, name, cr, NULL, 0);
2542                 }
2543                 nbl_end_crit(targvp);
2544         }
2545         VN_RELE(targvp);
2546         targvp = NULL;
2547 
2548         ava.va_mask = AT_ALL;
2549         avap = VOP_GETATTR(vp, &ava, 0, cr, NULL) ? NULL : &ava;
2550 
2551         /*
2552          * Force modified data and metadata out to stable storage.
2553          */
2554         (void) VOP_FSYNC(vp, 0, cr, NULL);
2555 
2556         if (error)
2557                 goto err;
2558 
2559         resp->status = NFS3_OK;
2560         vattr_to_wcc_data(bvap, avap, &resp->resok.dir_wcc);
2561         goto out;
2562 
2563 err:
2564         if (curthread->t_flag & T_WOULDBLOCK) {
2565                 curthread->t_flag &= ~T_WOULDBLOCK;
2566                 resp->status = NFS3ERR_JUKEBOX;
2567         } else
2568                 resp->status = puterrno3(error);
2569 err1:
2570         vattr_to_wcc_data(bvap, avap, &resp->resfail.dir_wcc);
2571 out:
2572         DTRACE_NFSV3_5(op__remove__done, struct svc_req *, req,
2573             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
2574             REMOVE3res *, resp);
2575 
2576         if (name != NULL && name != args->object.name)
2577                 kmem_free(name, MAXPATHLEN + 1);
2578 
2579         if (vp != NULL)
2580                 VN_RELE(vp);
2581 }
2582 
2583 void *
2584 rfs3_remove_getfh(REMOVE3args *args)
2585 {
2586 
2587         return (&args->object.dir);
2588 }
2589 
2590 void
2591 rfs3_rmdir(RMDIR3args *args, RMDIR3res *resp, struct exportinfo *exi,
2592     struct svc_req *req, cred_t *cr, bool_t ro)
2593 {
2594         int error;
2595         vnode_t *vp;
2596         struct vattr *bvap;
2597         struct vattr bva;
2598         struct vattr *avap;
2599         struct vattr ava;
2600         struct sockaddr *ca;
2601         char *name = NULL;
2602 
2603         bvap = NULL;
2604         avap = NULL;
2605 
2606         vp = nfs3_fhtovp(&args->object.dir, exi);
2607 
2608         DTRACE_NFSV3_5(op__rmdir__start, struct svc_req *, req,
2609             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
2610             RMDIR3args *, args);
2611 
2612         if (vp == NULL) {
2613                 error = ESTALE;
2614                 goto err;
2615         }
2616 
2617         bva.va_mask = AT_ALL;
2618         bvap = VOP_GETATTR(vp, &bva, 0, cr, NULL) ? NULL : &bva;
2619         avap = bvap;
2620 
2621         if (vp->v_type != VDIR) {
2622                 resp->status = NFS3ERR_NOTDIR;
2623                 goto err1;
2624         }
2625 
2626         if (args->object.name == nfs3nametoolong) {
2627                 resp->status = NFS3ERR_NAMETOOLONG;
2628                 goto err1;
2629         }
2630 
2631         if (args->object.name == NULL || *(args->object.name) == '\0') {
2632                 resp->status = NFS3ERR_ACCES;
2633                 goto err1;
2634         }
2635 
2636         if (rdonly(ro, vp)) {
2637                 resp->status = NFS3ERR_ROFS;
2638                 goto err1;
2639         }
2640 
2641         if (is_system_labeled()) {
2642                 bslabel_t *clabel = req->rq_label;
2643 
2644                 ASSERT(clabel != NULL);
2645                 DTRACE_PROBE2(tx__rfs3__log__info__opremovedir__clabel, char *,
2646                     "got client label from request(1)", struct svc_req *, req);
2647 
2648                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2649                         if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
2650                             exi)) {
2651                                 resp->status = NFS3ERR_ACCES;
2652                                 goto err1;
2653                         }
2654                 }
2655         }
2656 
2657         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2658         name = nfscmd_convname(ca, exi, args->object.name,
2659             NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2660 
2661         if (name == NULL) {
2662                 resp->status = NFS3ERR_INVAL;
2663                 goto err1;
2664         }
2665 
2666         error = VOP_RMDIR(vp, name, ZONE_ROOTVP(), cr, NULL, 0);
2667 
2668         if (name != args->object.name)
2669                 kmem_free(name, MAXPATHLEN + 1);
2670 
2671         ava.va_mask = AT_ALL;
2672         avap = VOP_GETATTR(vp, &ava, 0, cr, NULL) ? NULL : &ava;
2673 
2674         /*
2675          * Force modified data and metadata out to stable storage.
2676          */
2677         (void) VOP_FSYNC(vp, 0, cr, NULL);
2678 
2679         if (error) {
2680                 /*
2681                  * System V defines rmdir to return EEXIST, not ENOTEMPTY,
2682                  * if the directory is not empty.  A System V NFS server
2683                  * needs to map NFS3ERR_EXIST to NFS3ERR_NOTEMPTY to transmit
2684                  * over the wire.
2685                  */
2686                 if (error == EEXIST)
2687                         error = ENOTEMPTY;
2688                 goto err;
2689         }
2690 
2691         resp->status = NFS3_OK;
2692         vattr_to_wcc_data(bvap, avap, &resp->resok.dir_wcc);
2693         goto out;
2694 
2695 err:
2696         if (curthread->t_flag & T_WOULDBLOCK) {
2697                 curthread->t_flag &= ~T_WOULDBLOCK;
2698                 resp->status = NFS3ERR_JUKEBOX;
2699         } else
2700                 resp->status = puterrno3(error);
2701 err1:
2702         vattr_to_wcc_data(bvap, avap, &resp->resfail.dir_wcc);
2703 out:
2704         DTRACE_NFSV3_5(op__rmdir__done, struct svc_req *, req,
2705             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
2706             RMDIR3res *, resp);
2707         if (vp != NULL)
2708                 VN_RELE(vp);
2709 
2710 }
2711 
2712 void *
2713 rfs3_rmdir_getfh(RMDIR3args *args)
2714 {
2715 
2716         return (&args->object.dir);
2717 }
2718 
2719 void
2720 rfs3_rename(RENAME3args *args, RENAME3res *resp, struct exportinfo *exi,
2721     struct svc_req *req, cred_t *cr, bool_t ro)
2722 {
2723         int error = 0;
2724         vnode_t *fvp;
2725         vnode_t *tvp;
2726         vnode_t *targvp;
2727         struct vattr *fbvap;
2728         struct vattr fbva;
2729         struct vattr *favap;
2730         struct vattr fava;
2731         struct vattr *tbvap;
2732         struct vattr tbva;
2733         struct vattr *tavap;
2734         struct vattr tava;
2735         nfs_fh3 *fh3;
2736         struct exportinfo *to_exi;
2737         vnode_t *srcvp = NULL;
2738         bslabel_t *clabel;
2739         struct sockaddr *ca;
2740         char *name = NULL;
2741         char *toname = NULL;
2742 
2743         fbvap = NULL;
2744         favap = NULL;
2745         tbvap = NULL;
2746         tavap = NULL;
2747         tvp = NULL;
2748 
2749         fvp = nfs3_fhtovp(&args->from.dir, exi);
2750 
2751         DTRACE_NFSV3_5(op__rename__start, struct svc_req *, req,
2752             cred_t *, cr, vnode_t *, fvp, struct exportinfo *, exi,
2753             RENAME3args *, args);
2754 
2755         if (fvp == NULL) {
2756                 error = ESTALE;
2757                 goto err;
2758         }
2759 
2760         if (is_system_labeled()) {
2761                 clabel = req->rq_label;
2762                 ASSERT(clabel != NULL);
2763                 DTRACE_PROBE2(tx__rfs3__log__info__oprename__clabel, char *,
2764                     "got client label from request(1)", struct svc_req *, req);
2765 
2766                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2767                         if (!do_rfs_label_check(clabel, fvp, EQUALITY_CHECK,
2768                             exi)) {
2769                                 resp->status = NFS3ERR_ACCES;
2770                                 goto err1;
2771                         }
2772                 }
2773         }
2774 
2775         fbva.va_mask = AT_ALL;
2776         fbvap = VOP_GETATTR(fvp, &fbva, 0, cr, NULL) ? NULL : &fbva;
2777         favap = fbvap;
2778 
2779         fh3 = &args->to.dir;
2780         to_exi = checkexport(&fh3->fh3_fsid, FH3TOXFIDP(fh3));
2781         if (to_exi == NULL) {
2782                 resp->status = NFS3ERR_ACCES;
2783                 goto err1;
2784         }
2785         exi_rele(to_exi);
2786 
2787         if (to_exi != exi) {
2788                 resp->status = NFS3ERR_XDEV;
2789                 goto err1;
2790         }
2791 
2792         tvp = nfs3_fhtovp(&args->to.dir, exi);
2793         if (tvp == NULL) {
2794                 error = ESTALE;
2795                 goto err;
2796         }
2797 
2798         tbva.va_mask = AT_ALL;
2799         tbvap = VOP_GETATTR(tvp, &tbva, 0, cr, NULL) ? NULL : &tbva;
2800         tavap = tbvap;
2801 
2802         if (fvp->v_type != VDIR || tvp->v_type != VDIR) {
2803                 resp->status = NFS3ERR_NOTDIR;
2804                 goto err1;
2805         }
2806 
2807         if (args->from.name == nfs3nametoolong ||
2808             args->to.name == nfs3nametoolong) {
2809                 resp->status = NFS3ERR_NAMETOOLONG;
2810                 goto err1;
2811         }
2812         if (args->from.name == NULL || *(args->from.name) == '\0' ||
2813             args->to.name == NULL || *(args->to.name) == '\0') {
2814                 resp->status = NFS3ERR_ACCES;
2815                 goto err1;
2816         }
2817 
2818         if (rdonly(ro, tvp)) {
2819                 resp->status = NFS3ERR_ROFS;
2820                 goto err1;
2821         }
2822 
2823         if (is_system_labeled()) {
2824                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2825                         if (!do_rfs_label_check(clabel, tvp, EQUALITY_CHECK,
2826                             exi)) {
2827                                 resp->status = NFS3ERR_ACCES;
2828                                 goto err1;
2829                         }
2830                 }
2831         }
2832 
2833         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2834         name = nfscmd_convname(ca, exi, args->from.name,
2835             NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2836 
2837         if (name == NULL) {
2838                 resp->status = NFS3ERR_INVAL;
2839                 goto err1;
2840         }
2841 
2842         toname = nfscmd_convname(ca, exi, args->to.name,
2843             NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2844 
2845         if (toname == NULL) {
2846                 resp->status = NFS3ERR_INVAL;
2847                 goto err1;
2848         }
2849 
2850         /*
2851          * Check for a conflict with a non-blocking mandatory share
2852          * reservation or V4 delegations.
2853          */
2854         error = VOP_LOOKUP(fvp, name, &srcvp, NULL, 0,
2855             NULL, cr, NULL, NULL, NULL);
2856         if (error != 0)
2857                 goto err;
2858 
2859         /*
2860          * If we rename a delegated file we should recall the
2861          * delegation, since future opens should fail or would
2862          * refer to a new file.
2863          */
2864         if (rfs4_check_delegated(FWRITE, srcvp, FALSE)) {
2865                 resp->status = NFS3ERR_JUKEBOX;
2866                 goto err1;
2867         }
2868 
2869         /*
2870          * Check for renaming over a delegated file.  Check nfs4_deleg_policy
2871          * first to avoid VOP_LOOKUP if possible.
2872          */
2873         if (nfs4_get_deleg_policy() != SRV_NEVER_DELEGATE &&
2874             VOP_LOOKUP(tvp, toname, &targvp, NULL, 0, NULL, cr,
2875             NULL, NULL, NULL) == 0) {
2876 
2877                 if (rfs4_check_delegated(FWRITE, targvp, TRUE)) {
2878                         VN_RELE(targvp);
2879                         resp->status = NFS3ERR_JUKEBOX;
2880                         goto err1;
2881                 }
2882                 VN_RELE(targvp);
2883         }
2884 
2885         if (!nbl_need_check(srcvp)) {
2886                 error = VOP_RENAME(fvp, name, tvp, toname, cr, NULL, 0);
2887         } else {
2888                 nbl_start_crit(srcvp, RW_READER);
2889                 if (nbl_conflict(srcvp, NBL_RENAME, 0, 0, 0, NULL))
2890                         error = EACCES;
2891                 else
2892                         error = VOP_RENAME(fvp, name, tvp, toname, cr, NULL, 0);
2893                 nbl_end_crit(srcvp);
2894         }
2895         if (error == 0)
2896                 vn_renamepath(tvp, srcvp, args->to.name,
2897                     strlen(args->to.name));
2898         VN_RELE(srcvp);
2899         srcvp = NULL;
2900 
2901         fava.va_mask = AT_ALL;
2902         favap = VOP_GETATTR(fvp, &fava, 0, cr, NULL) ? NULL : &fava;
2903         tava.va_mask = AT_ALL;
2904         tavap = VOP_GETATTR(tvp, &tava, 0, cr, NULL) ? NULL : &tava;
2905 
2906         /*
2907          * Force modified data and metadata out to stable storage.
2908          */
2909         (void) VOP_FSYNC(fvp, 0, cr, NULL);
2910         (void) VOP_FSYNC(tvp, 0, cr, NULL);
2911 
2912         if (error)
2913                 goto err;
2914 
2915         resp->status = NFS3_OK;
2916         vattr_to_wcc_data(fbvap, favap, &resp->resok.fromdir_wcc);
2917         vattr_to_wcc_data(tbvap, tavap, &resp->resok.todir_wcc);
2918         goto out;
2919 
2920 err:
2921         if (curthread->t_flag & T_WOULDBLOCK) {
2922                 curthread->t_flag &= ~T_WOULDBLOCK;
2923                 resp->status = NFS3ERR_JUKEBOX;
2924         } else {
2925                 resp->status = puterrno3(error);
2926         }
2927 err1:
2928         vattr_to_wcc_data(fbvap, favap, &resp->resfail.fromdir_wcc);
2929         vattr_to_wcc_data(tbvap, tavap, &resp->resfail.todir_wcc);
2930 
2931 out:
2932         if (name != NULL && name != args->from.name)
2933                 kmem_free(name, MAXPATHLEN + 1);
2934         if (toname != NULL && toname != args->to.name)
2935                 kmem_free(toname, MAXPATHLEN + 1);
2936 
2937         DTRACE_NFSV3_5(op__rename__done, struct svc_req *, req,
2938             cred_t *, cr, vnode_t *, fvp, struct exportinfo *, exi,
2939             RENAME3res *, resp);
2940         if (fvp != NULL)
2941                 VN_RELE(fvp);
2942         if (tvp != NULL)
2943                 VN_RELE(tvp);
2944 }
2945 
2946 void *
2947 rfs3_rename_getfh(RENAME3args *args)
2948 {
2949 
2950         return (&args->from.dir);
2951 }
2952 
2953 void
2954 rfs3_link(LINK3args *args, LINK3res *resp, struct exportinfo *exi,
2955     struct svc_req *req, cred_t *cr, bool_t ro)
2956 {
2957         int error;
2958         vnode_t *vp;
2959         vnode_t *dvp;
2960         struct vattr *vap;
2961         struct vattr va;
2962         struct vattr *bvap;
2963         struct vattr bva;
2964         struct vattr *avap;
2965         struct vattr ava;
2966         nfs_fh3 *fh3;
2967         struct exportinfo *to_exi;
2968         bslabel_t *clabel;
2969         struct sockaddr *ca;
2970         char *name = NULL;
2971 
2972         vap = NULL;
2973         bvap = NULL;
2974         avap = NULL;
2975         dvp = NULL;
2976 
2977         vp = nfs3_fhtovp(&args->file, exi);
2978 
2979         DTRACE_NFSV3_5(op__link__start, struct svc_req *, req,
2980             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
2981             LINK3args *, args);
2982 
2983         if (vp == NULL) {
2984                 error = ESTALE;
2985                 goto out;
2986         }
2987 
2988         va.va_mask = AT_ALL;
2989         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
2990 
2991         fh3 = &args->link.dir;
2992         to_exi = checkexport(&fh3->fh3_fsid, FH3TOXFIDP(fh3));
2993         if (to_exi == NULL) {
2994                 resp->status = NFS3ERR_ACCES;
2995                 goto out1;
2996         }
2997         exi_rele(to_exi);
2998 
2999         if (to_exi != exi) {
3000                 resp->status = NFS3ERR_XDEV;
3001                 goto out1;
3002         }
3003 
3004         if (is_system_labeled()) {
3005                 clabel = req->rq_label;
3006 
3007                 ASSERT(clabel != NULL);
3008                 DTRACE_PROBE2(tx__rfs3__log__info__oplink__clabel, char *,
3009                     "got client label from request(1)", struct svc_req *, req);
3010 
3011                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3012                         if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3013                             exi)) {
3014                                 resp->status = NFS3ERR_ACCES;
3015                                 goto out1;
3016                         }
3017                 }
3018         }
3019 
3020         dvp = nfs3_fhtovp(&args->link.dir, exi);
3021         if (dvp == NULL) {
3022                 error = ESTALE;
3023                 goto out;
3024         }
3025 
3026         bva.va_mask = AT_ALL;
3027         bvap = VOP_GETATTR(dvp, &bva, 0, cr, NULL) ? NULL : &bva;
3028 
3029         if (dvp->v_type != VDIR) {
3030                 resp->status = NFS3ERR_NOTDIR;
3031                 goto out1;
3032         }
3033 
3034         if (args->link.name == nfs3nametoolong) {
3035                 resp->status = NFS3ERR_NAMETOOLONG;
3036                 goto out1;
3037         }
3038 
3039         if (args->link.name == NULL || *(args->link.name) == '\0') {
3040                 resp->status = NFS3ERR_ACCES;
3041                 goto out1;
3042         }
3043 
3044         if (rdonly(ro, dvp)) {
3045                 resp->status = NFS3ERR_ROFS;
3046                 goto out1;
3047         }
3048 
3049         if (is_system_labeled()) {
3050                 DTRACE_PROBE2(tx__rfs3__log__info__oplinkdir__clabel, char *,
3051                     "got client label from request(1)", struct svc_req *, req);
3052 
3053                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3054                         if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
3055                             exi)) {
3056                                 resp->status = NFS3ERR_ACCES;
3057                                 goto out1;
3058                         }
3059                 }
3060         }
3061 
3062         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
3063         name = nfscmd_convname(ca, exi, args->link.name,
3064             NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
3065 
3066         if (name == NULL) {
3067                 resp->status = NFS3ERR_SERVERFAULT;
3068                 goto out1;
3069         }
3070 
3071         error = VOP_LINK(dvp, vp, name, cr, NULL, 0);
3072 
3073         va.va_mask = AT_ALL;
3074         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3075         ava.va_mask = AT_ALL;
3076         avap = VOP_GETATTR(dvp, &ava, 0, cr, NULL) ? NULL : &ava;
3077 
3078         /*
3079          * Force modified data and metadata out to stable storage.
3080          */
3081         (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
3082         (void) VOP_FSYNC(dvp, 0, cr, NULL);
3083 
3084         if (error)
3085                 goto out;
3086 
3087         VN_RELE(dvp);
3088 
3089         resp->status = NFS3_OK;
3090         vattr_to_post_op_attr(vap, &resp->resok.file_attributes);
3091         vattr_to_wcc_data(bvap, avap, &resp->resok.linkdir_wcc);
3092 
3093         DTRACE_NFSV3_5(op__link__done, struct svc_req *, req,
3094             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3095             LINK3res *, resp);
3096 
3097         VN_RELE(vp);
3098 
3099         return;
3100 
3101 out:
3102         if (curthread->t_flag & T_WOULDBLOCK) {
3103                 curthread->t_flag &= ~T_WOULDBLOCK;
3104                 resp->status = NFS3ERR_JUKEBOX;
3105         } else
3106                 resp->status = puterrno3(error);
3107 out1:
3108         if (name != NULL && name != args->link.name)
3109                 kmem_free(name, MAXPATHLEN + 1);
3110 
3111         DTRACE_NFSV3_5(op__link__done, struct svc_req *, req,
3112             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3113             LINK3res *, resp);
3114 
3115         if (vp != NULL)
3116                 VN_RELE(vp);
3117         if (dvp != NULL)
3118                 VN_RELE(dvp);
3119         vattr_to_post_op_attr(vap, &resp->resfail.file_attributes);
3120         vattr_to_wcc_data(bvap, avap, &resp->resfail.linkdir_wcc);
3121 }
3122 
3123 void *
3124 rfs3_link_getfh(LINK3args *args)
3125 {
3126 
3127         return (&args->file);
3128 }
3129 
3130 /*
3131  * This macro defines the size of a response which contains attribute
3132  * information and one directory entry (whose length is specified by
3133  * the macro parameter).  If the incoming request is larger than this,
3134  * then we are guaranteed to be able to return at one directory entry
3135  * if one exists.  Therefore, we do not need to check for
3136  * NFS3ERR_TOOSMALL if the requested size is larger then this.  If it
3137  * is not, then we need to check to make sure that this error does not
3138  * need to be returned.
3139  *
3140  * NFS3_READDIR_MIN_COUNT is comprised of following :
3141  *
3142  * status - 1 * BYTES_PER_XDR_UNIT
3143  * attr. flag - 1 * BYTES_PER_XDR_UNIT
3144  * cookie verifier - 2 * BYTES_PER_XDR_UNIT
3145  * attributes  - NFS3_SIZEOF_FATTR3 * BYTES_PER_XDR_UNIT
3146  * boolean - 1 * BYTES_PER_XDR_UNIT
3147  * file id - 2 * BYTES_PER_XDR_UNIT
3148  * directory name length - 1 * BYTES_PER_XDR_UNIT
3149  * cookie - 2 * BYTES_PER_XDR_UNIT
3150  * end of list - 1 * BYTES_PER_XDR_UNIT
3151  * end of file - 1 * BYTES_PER_XDR_UNIT
3152  * Name length of directory to the nearest byte
3153  */
3154 
3155 #define NFS3_READDIR_MIN_COUNT(length)  \
3156         ((1 + 1 + 2 + NFS3_SIZEOF_FATTR3 + 1 + 2 + 1 + 2 + 1 + 1) * \
3157                 BYTES_PER_XDR_UNIT + roundup((length), BYTES_PER_XDR_UNIT))
3158 
3159 /* ARGSUSED */
3160 void
3161 rfs3_readdir(READDIR3args *args, READDIR3res *resp, struct exportinfo *exi,
3162     struct svc_req *req, cred_t *cr, bool_t ro)
3163 {
3164         int error;
3165         vnode_t *vp;
3166         struct vattr *vap;
3167         struct vattr va;
3168         struct iovec iov;
3169         struct uio uio;
3170         char *data;
3171         int iseof;
3172         int bufsize;
3173         int namlen;
3174         uint_t count;
3175         struct sockaddr *ca;
3176 
3177         vap = NULL;
3178 
3179         vp = nfs3_fhtovp(&args->dir, exi);
3180 
3181         DTRACE_NFSV3_5(op__readdir__start, struct svc_req *, req,
3182             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3183             READDIR3args *, args);
3184 
3185         if (vp == NULL) {
3186                 error = ESTALE;
3187                 goto out;
3188         }
3189 
3190         if (is_system_labeled()) {
3191                 bslabel_t *clabel = req->rq_label;
3192 
3193                 ASSERT(clabel != NULL);
3194                 DTRACE_PROBE2(tx__rfs3__log__info__opreaddir__clabel, char *,
3195                     "got client label from request(1)", struct svc_req *, req);
3196 
3197                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3198                         if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3199                             exi)) {
3200                                 resp->status = NFS3ERR_ACCES;
3201                                 goto out1;
3202                         }
3203                 }
3204         }
3205 
3206         (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL);
3207 
3208         va.va_mask = AT_ALL;
3209         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3210 
3211         if (vp->v_type != VDIR) {
3212                 resp->status = NFS3ERR_NOTDIR;
3213                 goto out1;
3214         }
3215 
3216         error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
3217         if (error)
3218                 goto out;
3219 
3220         /*
3221          * Now don't allow arbitrary count to alloc;
3222          * allow the maximum not to exceed rfs3_tsize()
3223          */
3224         if (args->count > rfs3_tsize(req))
3225                 args->count = rfs3_tsize(req);
3226 
3227         /*
3228          * Make sure that there is room to read at least one entry
3229          * if any are available.
3230          */
3231         if (args->count < DIRENT64_RECLEN(MAXNAMELEN))
3232                 count = DIRENT64_RECLEN(MAXNAMELEN);
3233         else
3234                 count = args->count;
3235 
3236         data = kmem_alloc(count, KM_SLEEP);
3237 
3238         iov.iov_base = data;
3239         iov.iov_len = count;
3240         uio.uio_iov = &iov;
3241         uio.uio_iovcnt = 1;
3242         uio.uio_segflg = UIO_SYSSPACE;
3243         uio.uio_extflg = UIO_COPY_CACHED;
3244         uio.uio_loffset = (offset_t)args->cookie;
3245         uio.uio_resid = count;
3246 
3247         error = VOP_READDIR(vp, &uio, cr, &iseof, NULL, 0);
3248 
3249         va.va_mask = AT_ALL;
3250         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3251 
3252         if (error) {
3253                 kmem_free(data, count);
3254                 goto out;
3255         }
3256 
3257         /*
3258          * If the count was not large enough to be able to guarantee
3259          * to be able to return at least one entry, then need to
3260          * check to see if NFS3ERR_TOOSMALL should be returned.
3261          */
3262         if (args->count < NFS3_READDIR_MIN_COUNT(MAXNAMELEN)) {
3263                 /*
3264                  * bufsize is used to keep track of the size of the response.
3265                  * It is primed with:
3266                  *      1 for the status +
3267                  *      1 for the dir_attributes.attributes boolean +
3268                  *      2 for the cookie verifier
3269                  * all times BYTES_PER_XDR_UNIT to convert from XDR units
3270                  * to bytes.  If there are directory attributes to be
3271                  * returned, then:
3272                  *      NFS3_SIZEOF_FATTR3 for the dir_attributes.attr fattr3
3273                  * time BYTES_PER_XDR_UNIT is added to account for them.
3274                  */
3275                 bufsize = (1 + 1 + 2) * BYTES_PER_XDR_UNIT;
3276                 if (vap != NULL)
3277                         bufsize += NFS3_SIZEOF_FATTR3 * BYTES_PER_XDR_UNIT;
3278                 /*
3279                  * An entry is composed of:
3280                  *      1 for the true/false list indicator +
3281                  *      2 for the fileid +
3282                  *      1 for the length of the name +
3283                  *      2 for the cookie +
3284                  * all times BYTES_PER_XDR_UNIT to convert from
3285                  * XDR units to bytes, plus the length of the name
3286                  * rounded up to the nearest BYTES_PER_XDR_UNIT.
3287                  */
3288                 if (count != uio.uio_resid) {
3289                         namlen = strlen(((struct dirent64 *)data)->d_name);
3290                         bufsize += (1 + 2 + 1 + 2) * BYTES_PER_XDR_UNIT +
3291                             roundup(namlen, BYTES_PER_XDR_UNIT);
3292                 }
3293                 /*
3294                  * We need to check to see if the number of bytes left
3295                  * to go into the buffer will actually fit into the
3296                  * buffer.  This is calculated as the size of this
3297                  * entry plus:
3298                  *      1 for the true/false list indicator +
3299                  *      1 for the eof indicator
3300                  * times BYTES_PER_XDR_UNIT to convert from from
3301                  * XDR units to bytes.
3302                  */
3303                 bufsize += (1 + 1) * BYTES_PER_XDR_UNIT;
3304                 if (bufsize > args->count) {
3305                         kmem_free(data, count);
3306                         resp->status = NFS3ERR_TOOSMALL;
3307                         goto out1;
3308                 }
3309         }
3310 
3311         /*
3312          * Have a valid readir buffer for the native character
3313          * set. Need to check if a conversion is necessary and
3314          * potentially rewrite the whole buffer. Note that if the
3315          * conversion expands names enough, the structure may not
3316          * fit. In this case, we need to drop entries until if fits
3317          * and patch the counts in order that the next readdir will
3318          * get the correct entries.
3319          */
3320         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
3321         data = nfscmd_convdirent(ca, exi, data, count, &resp->status);
3322 
3323 
3324         VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
3325 
3326 #if 0 /* notyet */
3327         /*
3328          * Don't do this.  It causes local disk writes when just
3329          * reading the file and the overhead is deemed larger
3330          * than the benefit.
3331          */
3332         /*
3333          * Force modified metadata out to stable storage.
3334          */
3335         (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
3336 #endif
3337 
3338         resp->status = NFS3_OK;
3339         vattr_to_post_op_attr(vap, &resp->resok.dir_attributes);
3340         resp->resok.cookieverf = 0;
3341         resp->resok.reply.entries = (entry3 *)data;
3342         resp->resok.reply.eof = iseof;
3343         resp->resok.size = count - uio.uio_resid;
3344         resp->resok.count = args->count;
3345         resp->resok.freecount = count;
3346 
3347         DTRACE_NFSV3_5(op__readdir__done, struct svc_req *, req,
3348             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3349             READDIR3res *, resp);
3350 
3351         VN_RELE(vp);
3352 
3353         return;
3354 
3355 out:
3356         if (curthread->t_flag & T_WOULDBLOCK) {
3357                 curthread->t_flag &= ~T_WOULDBLOCK;
3358                 resp->status = NFS3ERR_JUKEBOX;
3359         } else
3360                 resp->status = puterrno3(error);
3361 out1:
3362         vattr_to_post_op_attr(vap, &resp->resfail.dir_attributes);
3363 
3364         DTRACE_NFSV3_5(op__readdir__done, struct svc_req *, req,
3365             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3366             READDIR3res *, resp);
3367 
3368         if (vp != NULL) {
3369                 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
3370                 VN_RELE(vp);
3371         }
3372         vattr_to_post_op_attr(vap, &resp->resfail.dir_attributes);
3373 }
3374 
3375 void *
3376 rfs3_readdir_getfh(READDIR3args *args)
3377 {
3378 
3379         return (&args->dir);
3380 }
3381 
3382 void
3383 rfs3_readdir_free(READDIR3res *resp)
3384 {
3385 
3386         if (resp->status == NFS3_OK)
3387                 kmem_free(resp->resok.reply.entries, resp->resok.freecount);
3388 }
3389 
3390 #ifdef nextdp
3391 #undef nextdp
3392 #endif
3393 #define nextdp(dp)      ((struct dirent64 *)((char *)(dp) + (dp)->d_reclen))
3394 
3395 /*
3396  * This macro computes the size of a response which contains
3397  * one directory entry including the attributes as well as file handle.
3398  * If the incoming request is larger than this, then we are guaranteed to be
3399  * able to return at least one more directory entry if one exists.
3400  *
3401  * NFS3_READDIRPLUS_ENTRY is made up of the following:
3402  *
3403  * boolean - 1 * BYTES_PER_XDR_UNIT
3404  * file id - 2 * BYTES_PER_XDR_UNIT
3405  * directory name length - 1 * BYTES_PER_XDR_UNIT
3406  * cookie - 2 * BYTES_PER_XDR_UNIT
3407  * attribute flag - 1 * BYTES_PER_XDR_UNIT
3408  * attributes - NFS3_SIZEOF_FATTR3 * BYTES_PER_XDR_UNIT
3409  * status byte for file handle - 1 *  BYTES_PER_XDR_UNIT
3410  * length of a file handle - 1 * BYTES_PER_XDR_UNIT
3411  * Maximum length of a file handle (NFS3_MAXFHSIZE)
3412  * name length of the entry to the nearest bytes
3413  */
3414 #define NFS3_READDIRPLUS_ENTRY(namelen) \
3415         ((1 + 2 + 1 + 2 + 1 + NFS3_SIZEOF_FATTR3 + 1 + 1) * \
3416                 BYTES_PER_XDR_UNIT + \
3417         NFS3_MAXFHSIZE + roundup(namelen, BYTES_PER_XDR_UNIT))
3418 
3419 static int rfs3_readdir_unit = MAXBSIZE;
3420 
3421 /* ARGSUSED */
3422 void
3423 rfs3_readdirplus(READDIRPLUS3args *args, READDIRPLUS3res *resp,
3424     struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
3425 {
3426         int error;
3427         vnode_t *vp;
3428         struct vattr *vap;
3429         struct vattr va;
3430         struct iovec iov;
3431         struct uio uio;
3432         char *data;
3433         int iseof;
3434         struct dirent64 *dp;
3435         vnode_t *nvp;
3436         struct vattr *nvap;
3437         struct vattr nva;
3438         entryplus3_info *infop = NULL;
3439         int size = 0;
3440         int nents = 0;
3441         int bufsize = 0;
3442         int entrysize = 0;
3443         int tofit = 0;
3444         int rd_unit = rfs3_readdir_unit;
3445         int prev_len;
3446         int space_left;
3447         int i;
3448         uint_t *namlen = NULL;
3449         char *ndata = NULL;
3450         struct sockaddr *ca;
3451         size_t ret;
3452 
3453         vap = NULL;
3454 
3455         vp = nfs3_fhtovp(&args->dir, exi);
3456 
3457         DTRACE_NFSV3_5(op__readdirplus__start, struct svc_req *, req,
3458             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3459             READDIRPLUS3args *, args);
3460 
3461         if (vp == NULL) {
3462                 error = ESTALE;
3463                 goto out;
3464         }
3465 
3466         if (is_system_labeled()) {
3467                 bslabel_t *clabel = req->rq_label;
3468 
3469                 ASSERT(clabel != NULL);
3470                 DTRACE_PROBE2(tx__rfs3__log__info__opreaddirplus__clabel,
3471                     char *, "got client label from request(1)",
3472                     struct svc_req *, req);
3473 
3474                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3475                         if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3476                             exi)) {
3477                                 resp->status = NFS3ERR_ACCES;
3478                                 goto out1;
3479                         }
3480                 }
3481         }
3482 
3483         (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL);
3484 
3485         va.va_mask = AT_ALL;
3486         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3487 
3488         if (vp->v_type != VDIR) {
3489                 error = ENOTDIR;
3490                 goto out;
3491         }
3492 
3493         error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
3494         if (error)
3495                 goto out;
3496 
3497         /*
3498          * Don't allow arbitrary counts for allocation
3499          */
3500         if (args->maxcount > rfs3_tsize(req))
3501                 args->maxcount = rfs3_tsize(req);
3502 
3503         /*
3504          * Make sure that there is room to read at least one entry
3505          * if any are available
3506          */
3507         args->dircount = MIN(args->dircount, args->maxcount);
3508 
3509         if (args->dircount < DIRENT64_RECLEN(MAXNAMELEN))
3510                 args->dircount = DIRENT64_RECLEN(MAXNAMELEN);
3511 
3512         /*
3513          * This allocation relies on a minimum directory entry
3514          * being roughly 24 bytes.  Therefore, the namlen array
3515          * will have enough space based on the maximum number of
3516          * entries to read.
3517          */
3518         namlen = kmem_alloc(args->dircount, KM_SLEEP);
3519 
3520         space_left = args->dircount;
3521         data = kmem_alloc(args->dircount, KM_SLEEP);
3522         dp = (struct dirent64 *)data;
3523         uio.uio_iov = &iov;
3524         uio.uio_iovcnt = 1;
3525         uio.uio_segflg = UIO_SYSSPACE;
3526         uio.uio_extflg = UIO_COPY_CACHED;
3527         uio.uio_loffset = (offset_t)args->cookie;
3528 
3529         /*
3530          * bufsize is used to keep track of the size of the response as we
3531          * get post op attributes and filehandles for each entry.  This is
3532          * an optimization as the server may have read more entries than will
3533          * fit in the buffer specified by maxcount.  We stop calculating
3534          * post op attributes and filehandles once we have exceeded maxcount.
3535          * This will minimize the effect of truncation.
3536          *
3537          * It is primed with:
3538          *      1 for the status +
3539          *      1 for the dir_attributes.attributes boolean +
3540          *      2 for the cookie verifier
3541          * all times BYTES_PER_XDR_UNIT to convert from XDR units
3542          * to bytes.  If there are directory attributes to be
3543          * returned, then:
3544          *      NFS3_SIZEOF_FATTR3 for the dir_attributes.attr fattr3
3545          * time BYTES_PER_XDR_UNIT is added to account for them.
3546          */
3547         bufsize = (1 + 1 + 2) * BYTES_PER_XDR_UNIT;
3548         if (vap != NULL)
3549                 bufsize += NFS3_SIZEOF_FATTR3 * BYTES_PER_XDR_UNIT;
3550 
3551 getmoredents:
3552         /*
3553          * Here we make a check so that our read unit is not larger than
3554          * the space left in the buffer.
3555          */
3556         rd_unit = MIN(rd_unit, space_left);
3557         iov.iov_base = (char *)dp;
3558         iov.iov_len = rd_unit;
3559         uio.uio_resid = rd_unit;
3560         prev_len = rd_unit;
3561 
3562         error = VOP_READDIR(vp, &uio, cr, &iseof, NULL, 0);
3563 
3564         if (error) {
3565                 kmem_free(data, args->dircount);
3566                 goto out;
3567         }
3568 
3569         if (uio.uio_resid == prev_len && !iseof) {
3570                 if (nents == 0) {
3571                         kmem_free(data, args->dircount);
3572                         resp->status = NFS3ERR_TOOSMALL;
3573                         goto out1;
3574                 }
3575 
3576                 /*
3577                  * We could not get any more entries, so get the attributes
3578                  * and filehandle for the entries already obtained.
3579                  */
3580                 goto good;
3581         }
3582 
3583         /*
3584          * We estimate the size of the response by assuming the
3585          * entry exists and attributes and filehandle are also valid
3586          */
3587         for (size = prev_len - uio.uio_resid;
3588             size > 0;
3589             size -= dp->d_reclen, dp = nextdp(dp)) {
3590 
3591                 if (dp->d_ino == 0) {
3592                         nents++;
3593                         continue;
3594                 }
3595 
3596                 namlen[nents] = strlen(dp->d_name);
3597                 entrysize = NFS3_READDIRPLUS_ENTRY(namlen[nents]);
3598 
3599                 /*
3600                  * We need to check to see if the number of bytes left
3601                  * to go into the buffer will actually fit into the
3602                  * buffer.  This is calculated as the size of this
3603                  * entry plus:
3604                  *      1 for the true/false list indicator +
3605                  *      1 for the eof indicator
3606                  * times BYTES_PER_XDR_UNIT to convert from XDR units
3607                  * to bytes.
3608                  *
3609                  * Also check the dircount limit against the first entry read
3610                  *
3611                  */
3612                 tofit = entrysize + (1 + 1) * BYTES_PER_XDR_UNIT;
3613                 if (bufsize + tofit > args->maxcount) {
3614                         /*
3615                          * We make a check here to see if this was the
3616                          * first entry being measured.  If so, then maxcount
3617                          * was too small to begin with and so we need to
3618                          * return with NFS3ERR_TOOSMALL.
3619                          */
3620                         if (nents == 0) {
3621                                 kmem_free(data, args->dircount);
3622                                 resp->status = NFS3ERR_TOOSMALL;
3623                                 goto out1;
3624                         }
3625                         iseof = FALSE;
3626                         goto good;
3627                 }
3628                 bufsize += entrysize;
3629                 nents++;
3630         }
3631 
3632         /*
3633          * If there is enough room to fit at least 1 more entry including
3634          * post op attributes and filehandle in the buffer AND that we haven't
3635          * exceeded dircount then go back and get some more.
3636          */
3637         if (!iseof &&
3638             (args->maxcount - bufsize) >= NFS3_READDIRPLUS_ENTRY(MAXNAMELEN)) {
3639                 space_left -= (prev_len - uio.uio_resid);
3640                 if (space_left >= DIRENT64_RECLEN(MAXNAMELEN))
3641                         goto getmoredents;
3642 
3643                 /* else, fall through */
3644         }
3645 good:
3646         va.va_mask = AT_ALL;
3647         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3648 
3649         VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
3650 
3651         infop = kmem_alloc(nents * sizeof (struct entryplus3_info), KM_SLEEP);
3652         resp->resok.infop = infop;
3653 
3654         dp = (struct dirent64 *)data;
3655         for (i = 0; i < nents; i++) {
3656 
3657                 if (dp->d_ino == 0) {
3658                         infop[i].attr.attributes = FALSE;
3659                         infop[i].fh.handle_follows = FALSE;
3660                         dp = nextdp(dp);
3661                         continue;
3662                 }
3663 
3664                 infop[i].namelen = namlen[i];
3665 
3666                 error = VOP_LOOKUP(vp, dp->d_name, &nvp, NULL, 0, NULL, cr,
3667                     NULL, NULL, NULL);
3668                 if (error) {
3669                         infop[i].attr.attributes = FALSE;
3670                         infop[i].fh.handle_follows = FALSE;
3671                         dp = nextdp(dp);
3672                         continue;
3673                 }
3674 
3675                 nva.va_mask = AT_ALL;
3676                 nvap = rfs4_delegated_getattr(nvp, &nva, 0, cr) ? NULL : &nva;
3677 
3678                 /* Lie about the object type for a referral */
3679                 if (vn_is_nfs_reparse(nvp, cr))
3680                         nvap->va_type = VLNK;
3681 
3682                 if (vn_ismntpt(nvp)) {
3683                         infop[i].attr.attributes = FALSE;
3684                         infop[i].fh.handle_follows = FALSE;
3685                 } else {
3686                         vattr_to_post_op_attr(nvap, &infop[i].attr);
3687 
3688                         error = makefh3(&infop[i].fh.handle, nvp, exi);
3689                         if (!error)
3690                                 infop[i].fh.handle_follows = TRUE;
3691                         else
3692                                 infop[i].fh.handle_follows = FALSE;
3693                 }
3694 
3695                 VN_RELE(nvp);
3696                 dp = nextdp(dp);
3697         }
3698 
3699         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
3700         ret = nfscmd_convdirplus(ca, exi, data, nents, args->dircount, &ndata);
3701         if (ndata == NULL)
3702                 ndata = data;
3703 
3704         if (ret > 0) {
3705                 /*
3706                  * We had to drop one or more entries in order to fit
3707                  * during the character conversion.  We need to patch
3708                  * up the size and eof info.
3709                  */
3710                 if (iseof)
3711                         iseof = FALSE;
3712 
3713                 ret = nfscmd_dropped_entrysize((struct dirent64 *)data,
3714                     nents, ret);
3715         }
3716 
3717 
3718 #if 0 /* notyet */
3719         /*
3720          * Don't do this.  It causes local disk writes when just
3721          * reading the file and the overhead is deemed larger
3722          * than the benefit.
3723          */
3724         /*
3725          * Force modified metadata out to stable storage.
3726          */
3727         (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
3728 #endif
3729 
3730         kmem_free(namlen, args->dircount);
3731 
3732         resp->status = NFS3_OK;
3733         vattr_to_post_op_attr(vap, &resp->resok.dir_attributes);
3734         resp->resok.cookieverf = 0;
3735         resp->resok.reply.entries = (entryplus3 *)ndata;
3736         resp->resok.reply.eof = iseof;
3737         resp->resok.size = nents;
3738         resp->resok.count = args->dircount - ret;
3739         resp->resok.maxcount = args->maxcount;
3740 
3741         DTRACE_NFSV3_5(op__readdirplus__done, struct svc_req *, req,
3742             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3743             READDIRPLUS3res *, resp);
3744 
3745         VN_RELE(vp);
3746 
3747         return;
3748 
3749 out:
3750         if (curthread->t_flag & T_WOULDBLOCK) {
3751                 curthread->t_flag &= ~T_WOULDBLOCK;
3752                 resp->status = NFS3ERR_JUKEBOX;
3753         } else {
3754                 resp->status = puterrno3(error);
3755         }
3756 out1:
3757         vattr_to_post_op_attr(vap, &resp->resfail.dir_attributes);
3758 
3759         DTRACE_NFSV3_5(op__readdirplus__done, struct svc_req *, req,
3760             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3761             READDIRPLUS3res *, resp);
3762 
3763         if (vp != NULL) {
3764                 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
3765                 VN_RELE(vp);
3766         }
3767 
3768         if (namlen != NULL)
3769                 kmem_free(namlen, args->dircount);
3770 
3771         vattr_to_post_op_attr(vap, &resp->resfail.dir_attributes);
3772 }
3773 
3774 void *
3775 rfs3_readdirplus_getfh(READDIRPLUS3args *args)
3776 {
3777 
3778         return (&args->dir);
3779 }
3780 
3781 void
3782 rfs3_readdirplus_free(READDIRPLUS3res *resp)
3783 {
3784 
3785         if (resp->status == NFS3_OK) {
3786                 kmem_free(resp->resok.reply.entries, resp->resok.count);
3787                 kmem_free(resp->resok.infop,
3788                     resp->resok.size * sizeof (struct entryplus3_info));
3789         }
3790 }
3791 
3792 /* ARGSUSED */
3793 void
3794 rfs3_fsstat(FSSTAT3args *args, FSSTAT3res *resp, struct exportinfo *exi,
3795     struct svc_req *req, cred_t *cr, bool_t ro)
3796 {
3797         int error;
3798         vnode_t *vp;
3799         struct vattr *vap;
3800         struct vattr va;
3801         struct statvfs64 sb;
3802 
3803         vap = NULL;
3804 
3805         vp = nfs3_fhtovp(&args->fsroot, exi);
3806 
3807         DTRACE_NFSV3_5(op__fsstat__start, struct svc_req *, req,
3808             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3809             FSSTAT3args *, args);
3810 
3811         if (vp == NULL) {
3812                 error = ESTALE;
3813                 goto out;
3814         }
3815 
3816         if (is_system_labeled()) {
3817                 bslabel_t *clabel = req->rq_label;
3818 
3819                 ASSERT(clabel != NULL);
3820                 DTRACE_PROBE2(tx__rfs3__log__info__opfsstat__clabel, char *,
3821                     "got client label from request(1)", struct svc_req *, req);
3822 
3823                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3824                         if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3825                             exi)) {
3826                                 resp->status = NFS3ERR_ACCES;
3827                                 goto out1;
3828                         }
3829                 }
3830         }
3831 
3832         error = VFS_STATVFS(vp->v_vfsp, &sb);
3833 
3834         va.va_mask = AT_ALL;
3835         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3836 
3837         if (error)
3838                 goto out;
3839 
3840         resp->status = NFS3_OK;
3841         vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
3842         if (sb.f_blocks != (fsblkcnt64_t)-1)
3843                 resp->resok.tbytes = (size3)sb.f_frsize * (size3)sb.f_blocks;
3844         else
3845                 resp->resok.tbytes = (size3)sb.f_blocks;
3846         if (sb.f_bfree != (fsblkcnt64_t)-1)
3847                 resp->resok.fbytes = (size3)sb.f_frsize * (size3)sb.f_bfree;
3848         else
3849                 resp->resok.fbytes = (size3)sb.f_bfree;
3850         if (sb.f_bavail != (fsblkcnt64_t)-1)
3851                 resp->resok.abytes = (size3)sb.f_frsize * (size3)sb.f_bavail;
3852         else
3853                 resp->resok.abytes = (size3)sb.f_bavail;
3854         resp->resok.tfiles = (size3)sb.f_files;
3855         resp->resok.ffiles = (size3)sb.f_ffree;
3856         resp->resok.afiles = (size3)sb.f_favail;
3857         resp->resok.invarsec = 0;
3858 
3859         DTRACE_NFSV3_5(op__fsstat__done, struct svc_req *, req,
3860             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3861             FSSTAT3res *, resp);
3862         VN_RELE(vp);
3863 
3864         return;
3865 
3866 out:
3867         if (curthread->t_flag & T_WOULDBLOCK) {
3868                 curthread->t_flag &= ~T_WOULDBLOCK;
3869                 resp->status = NFS3ERR_JUKEBOX;
3870         } else
3871                 resp->status = puterrno3(error);
3872 out1:
3873         DTRACE_NFSV3_5(op__fsstat__done, struct svc_req *, req,
3874             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3875             FSSTAT3res *, resp);
3876 
3877         if (vp != NULL)
3878                 VN_RELE(vp);
3879         vattr_to_post_op_attr(vap, &resp->resfail.obj_attributes);
3880 }
3881 
3882 void *
3883 rfs3_fsstat_getfh(FSSTAT3args *args)
3884 {
3885 
3886         return (&args->fsroot);
3887 }
3888 
3889 /* ARGSUSED */
3890 void
3891 rfs3_fsinfo(FSINFO3args *args, FSINFO3res *resp, struct exportinfo *exi,
3892     struct svc_req *req, cred_t *cr, bool_t ro)
3893 {
3894         vnode_t *vp;
3895         struct vattr *vap;
3896         struct vattr va;
3897         uint32_t xfer_size;
3898         ulong_t l = 0;
3899         int error;
3900 
3901         vp = nfs3_fhtovp(&args->fsroot, exi);
3902 
3903         DTRACE_NFSV3_5(op__fsinfo__start, struct svc_req *, req,
3904             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3905             FSINFO3args *, args);
3906 
3907         if (vp == NULL) {
3908                 if (curthread->t_flag & T_WOULDBLOCK) {
3909                         curthread->t_flag &= ~T_WOULDBLOCK;
3910                         resp->status = NFS3ERR_JUKEBOX;
3911                 } else
3912                         resp->status = NFS3ERR_STALE;
3913                 vattr_to_post_op_attr(NULL, &resp->resfail.obj_attributes);
3914                 goto out;
3915         }
3916 
3917         if (is_system_labeled()) {
3918                 bslabel_t *clabel = req->rq_label;
3919 
3920                 ASSERT(clabel != NULL);
3921                 DTRACE_PROBE2(tx__rfs3__log__info__opfsinfo__clabel, char *,
3922                     "got client label from request(1)", struct svc_req *, req);
3923 
3924                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3925                         if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3926                             exi)) {
3927                                 resp->status = NFS3ERR_STALE;
3928                                 vattr_to_post_op_attr(NULL,
3929                                     &resp->resfail.obj_attributes);
3930                                 goto out;
3931                         }
3932                 }
3933         }
3934 
3935         va.va_mask = AT_ALL;
3936         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3937 
3938         resp->status = NFS3_OK;
3939         vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
3940         xfer_size = rfs3_tsize(req);
3941         resp->resok.rtmax = xfer_size;
3942         resp->resok.rtpref = xfer_size;
3943         resp->resok.rtmult = DEV_BSIZE;
3944         resp->resok.wtmax = xfer_size;
3945         resp->resok.wtpref = xfer_size;
3946         resp->resok.wtmult = DEV_BSIZE;
3947         resp->resok.dtpref = MAXBSIZE;
3948 
3949         /*
3950          * Large file spec: want maxfilesize based on limit of
3951          * underlying filesystem.  We can guess 2^31-1 if need be.
3952          */
3953         error = VOP_PATHCONF(vp, _PC_FILESIZEBITS, &l, cr, NULL);
3954         if (error) {
3955                 resp->status = puterrno3(error);
3956                 goto out;
3957         }
3958 
3959         /*
3960          * If the underlying file system does not support _PC_FILESIZEBITS,
3961          * return a reasonable default. Note that error code on VOP_PATHCONF
3962          * will be 0, even if the underlying file system does not support
3963          * _PC_FILESIZEBITS.
3964          */
3965         if (l == (ulong_t)-1) {
3966                 resp->resok.maxfilesize = MAXOFF32_T;
3967         } else {
3968                 if (l >= (sizeof (uint64_t) * 8))
3969                         resp->resok.maxfilesize = INT64_MAX;
3970                 else
3971                         resp->resok.maxfilesize = (1LL << (l-1)) - 1;
3972         }
3973 
3974         resp->resok.time_delta.seconds = 0;
3975         resp->resok.time_delta.nseconds = 1000;
3976         resp->resok.properties = FSF3_LINK | FSF3_SYMLINK |
3977             FSF3_HOMOGENEOUS | FSF3_CANSETTIME;
3978 
3979         DTRACE_NFSV3_5(op__fsinfo__done, struct svc_req *, req,
3980             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3981             FSINFO3res *, resp);
3982 
3983         VN_RELE(vp);
3984 
3985         return;
3986 
3987 out:
3988         DTRACE_NFSV3_5(op__fsinfo__done, struct svc_req *, req,
3989             cred_t *, cr, vnode_t *, NULL, struct exportinfo *, exi,
3990             FSINFO3res *, resp);
3991         if (vp != NULL)
3992                 VN_RELE(vp);
3993 }
3994 
3995 void *
3996 rfs3_fsinfo_getfh(FSINFO3args *args)
3997 {
3998         return (&args->fsroot);
3999 }
4000 
4001 /* ARGSUSED */
4002 void
4003 rfs3_pathconf(PATHCONF3args *args, PATHCONF3res *resp, struct exportinfo *exi,
4004     struct svc_req *req, cred_t *cr, bool_t ro)
4005 {
4006         int error;
4007         vnode_t *vp;
4008         struct vattr *vap;
4009         struct vattr va;
4010         ulong_t val;
4011 
4012         vap = NULL;
4013 
4014         vp = nfs3_fhtovp(&args->object, exi);
4015 
4016         DTRACE_NFSV3_5(op__pathconf__start, struct svc_req *, req,
4017             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
4018             PATHCONF3args *, args);
4019 
4020         if (vp == NULL) {
4021                 error = ESTALE;
4022                 goto out;
4023         }
4024 
4025         if (is_system_labeled()) {
4026                 bslabel_t *clabel = req->rq_label;
4027 
4028                 ASSERT(clabel != NULL);
4029                 DTRACE_PROBE2(tx__rfs3__log__info__oppathconf__clabel, char *,
4030                     "got client label from request(1)", struct svc_req *, req);
4031 
4032                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
4033                         if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
4034                             exi)) {
4035                                 resp->status = NFS3ERR_ACCES;
4036                                 goto out1;
4037                         }
4038                 }
4039         }
4040 
4041         va.va_mask = AT_ALL;
4042         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
4043 
4044         error = VOP_PATHCONF(vp, _PC_LINK_MAX, &val, cr, NULL);
4045         if (error)
4046                 goto out;
4047         resp->resok.info.link_max = (uint32)val;
4048 
4049         error = VOP_PATHCONF(vp, _PC_NAME_MAX, &val, cr, NULL);
4050         if (error)
4051                 goto out;
4052         resp->resok.info.name_max = (uint32)val;
4053 
4054         error = VOP_PATHCONF(vp, _PC_NO_TRUNC, &val, cr, NULL);
4055         if (error)
4056                 goto out;
4057         if (val == 1)
4058                 resp->resok.info.no_trunc = TRUE;
4059         else
4060                 resp->resok.info.no_trunc = FALSE;
4061 
4062         error = VOP_PATHCONF(vp, _PC_CHOWN_RESTRICTED, &val, cr, NULL);
4063         if (error)
4064                 goto out;
4065         if (val == 1)
4066                 resp->resok.info.chown_restricted = TRUE;
4067         else
4068                 resp->resok.info.chown_restricted = FALSE;
4069 
4070         resp->status = NFS3_OK;
4071         vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
4072         resp->resok.info.case_insensitive = FALSE;
4073         resp->resok.info.case_preserving = TRUE;
4074         DTRACE_NFSV3_5(op__pathconf__done, struct svc_req *, req,
4075             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
4076             PATHCONF3res *, resp);
4077         VN_RELE(vp);
4078         return;
4079 
4080 out:
4081         if (curthread->t_flag & T_WOULDBLOCK) {
4082                 curthread->t_flag &= ~T_WOULDBLOCK;
4083                 resp->status = NFS3ERR_JUKEBOX;
4084         } else
4085                 resp->status = puterrno3(error);
4086 out1:
4087         DTRACE_NFSV3_5(op__pathconf__done, struct svc_req *, req,
4088             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
4089             PATHCONF3res *, resp);
4090         if (vp != NULL)
4091                 VN_RELE(vp);
4092         vattr_to_post_op_attr(vap, &resp->resfail.obj_attributes);
4093 }
4094 
4095 void *
4096 rfs3_pathconf_getfh(PATHCONF3args *args)
4097 {
4098 
4099         return (&args->object);
4100 }
4101 
4102 void
4103 rfs3_commit(COMMIT3args *args, COMMIT3res *resp, struct exportinfo *exi,
4104     struct svc_req *req, cred_t *cr, bool_t ro)
4105 {
4106         nfs3_srv_t *ns;
4107         int error;
4108         vnode_t *vp;
4109         struct vattr *bvap;
4110         struct vattr bva;
4111         struct vattr *avap;
4112         struct vattr ava;
4113 
4114         bvap = NULL;
4115         avap = NULL;
4116 
4117         vp = nfs3_fhtovp(&args->file, exi);
4118 
4119         DTRACE_NFSV3_5(op__commit__start, struct svc_req *, req,
4120             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
4121             COMMIT3args *, args);
4122 
4123         if (vp == NULL) {
4124                 error = ESTALE;
4125                 goto out;
4126         }
4127 
4128         ns = zone_getspecific(rfs3_zone_key, curzone);
4129         bva.va_mask = AT_ALL;
4130         error = VOP_GETATTR(vp, &bva, 0, cr, NULL);
4131 
4132         /*
4133          * If we can't get the attributes, then we can't do the
4134          * right access checking.  So, we'll fail the request.
4135          */
4136         if (error)
4137                 goto out;
4138 
4139         bvap = &bva;
4140 
4141         if (rdonly(ro, vp)) {
4142                 resp->status = NFS3ERR_ROFS;
4143                 goto out1;
4144         }
4145 
4146         if (vp->v_type != VREG) {
4147                 resp->status = NFS3ERR_INVAL;
4148                 goto out1;
4149         }
4150 
4151         if (is_system_labeled()) {
4152                 bslabel_t *clabel = req->rq_label;
4153 
4154                 ASSERT(clabel != NULL);
4155                 DTRACE_PROBE2(tx__rfs3__log__info__opcommit__clabel, char *,
4156                     "got client label from request(1)", struct svc_req *, req);
4157 
4158                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
4159                         if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
4160                             exi)) {
4161                                 resp->status = NFS3ERR_ACCES;
4162                                 goto out1;
4163                         }
4164                 }
4165         }
4166 
4167         if (crgetuid(cr) != bva.va_uid &&
4168             (error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL)))
4169                 goto out;
4170 
4171         error = VOP_FSYNC(vp, FSYNC, cr, NULL);
4172 
4173         ava.va_mask = AT_ALL;
4174         avap = VOP_GETATTR(vp, &ava, 0, cr, NULL) ? NULL : &ava;
4175 
4176         if (error)
4177                 goto out;
4178 
4179         resp->status = NFS3_OK;
4180         vattr_to_wcc_data(bvap, avap, &resp->resok.file_wcc);
4181         resp->resok.verf = ns->write3verf;
4182 
4183         DTRACE_NFSV3_5(op__commit__done, struct svc_req *, req,
4184             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
4185             COMMIT3res *, resp);
4186 
4187         VN_RELE(vp);
4188 
4189         return;
4190 
4191 out:
4192         if (curthread->t_flag & T_WOULDBLOCK) {
4193                 curthread->t_flag &= ~T_WOULDBLOCK;
4194                 resp->status = NFS3ERR_JUKEBOX;
4195         } else
4196                 resp->status = puterrno3(error);
4197 out1:
4198         DTRACE_NFSV3_5(op__commit__done, struct svc_req *, req,
4199             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
4200             COMMIT3res *, resp);
4201 
4202         if (vp != NULL)
4203                 VN_RELE(vp);
4204         vattr_to_wcc_data(bvap, avap, &resp->resfail.file_wcc);
4205 }
4206 
4207 void *
4208 rfs3_commit_getfh(COMMIT3args *args)
4209 {
4210 
4211         return (&args->file);
4212 }
4213 
4214 static int
4215 sattr3_to_vattr(sattr3 *sap, struct vattr *vap)
4216 {
4217 
4218         vap->va_mask = 0;
4219 
4220         if (sap->mode.set_it) {
4221                 vap->va_mode = (mode_t)sap->mode.mode;
4222                 vap->va_mask |= AT_MODE;
4223         }
4224         if (sap->uid.set_it) {
4225                 vap->va_uid = (uid_t)sap->uid.uid;
4226                 vap->va_mask |= AT_UID;
4227         }
4228         if (sap->gid.set_it) {
4229                 vap->va_gid = (gid_t)sap->gid.gid;
4230                 vap->va_mask |= AT_GID;
4231         }
4232         if (sap->size.set_it) {
4233                 if (sap->size.size > (size3)((u_longlong_t)-1))
4234                         return (EINVAL);
4235                 vap->va_size = sap->size.size;
4236                 vap->va_mask |= AT_SIZE;
4237         }
4238         if (sap->atime.set_it == SET_TO_CLIENT_TIME) {
4239 #ifndef _LP64
4240                 /* check time validity */
4241                 if (!NFS3_TIME_OK(sap->atime.atime.seconds))
4242                         return (EOVERFLOW);
4243 #endif
4244                 /*
4245                  * nfs protocol defines times as unsigned so don't extend sign,
4246                  * unless sysadmin set nfs_allow_preepoch_time.
4247                  */
4248                 NFS_TIME_T_CONVERT(vap->va_atime.tv_sec,
4249                     sap->atime.atime.seconds);
4250                 vap->va_atime.tv_nsec = (uint32_t)sap->atime.atime.nseconds;
4251                 vap->va_mask |= AT_ATIME;
4252         } else if (sap->atime.set_it == SET_TO_SERVER_TIME) {
4253                 gethrestime(&vap->va_atime);
4254                 vap->va_mask |= AT_ATIME;
4255         }
4256         if (sap->mtime.set_it == SET_TO_CLIENT_TIME) {
4257 #ifndef _LP64
4258                 /* check time validity */
4259                 if (!NFS3_TIME_OK(sap->mtime.mtime.seconds))
4260                         return (EOVERFLOW);
4261 #endif
4262                 /*
4263                  * nfs protocol defines times as unsigned so don't extend sign,
4264                  * unless sysadmin set nfs_allow_preepoch_time.
4265                  */
4266                 NFS_TIME_T_CONVERT(vap->va_mtime.tv_sec,
4267                     sap->mtime.mtime.seconds);
4268                 vap->va_mtime.tv_nsec = (uint32_t)sap->mtime.mtime.nseconds;
4269                 vap->va_mask |= AT_MTIME;
4270         } else if (sap->mtime.set_it == SET_TO_SERVER_TIME) {
4271                 gethrestime(&vap->va_mtime);
4272                 vap->va_mask |= AT_MTIME;
4273         }
4274 
4275         return (0);
4276 }
4277 
4278 static const ftype3 vt_to_nf3[] = {
4279         0, NF3REG, NF3DIR, NF3BLK, NF3CHR, NF3LNK, NF3FIFO, 0, 0, NF3SOCK, 0
4280 };
4281 
4282 static int
4283 vattr_to_fattr3(struct vattr *vap, fattr3 *fap)
4284 {
4285 
4286         ASSERT(vap->va_type >= VNON && vap->va_type <= VBAD);
4287         /* Return error if time or size overflow */
4288         if (! (NFS_VAP_TIME_OK(vap) && NFS3_SIZE_OK(vap->va_size))) {
4289                 return (EOVERFLOW);
4290         }
4291         fap->type = vt_to_nf3[vap->va_type];
4292         fap->mode = (mode3)(vap->va_mode & MODEMASK);
4293         fap->nlink = (uint32)vap->va_nlink;
4294         if (vap->va_uid == UID_NOBODY)
4295                 fap->uid = (uid3)NFS_UID_NOBODY;
4296         else
4297                 fap->uid = (uid3)vap->va_uid;
4298         if (vap->va_gid == GID_NOBODY)
4299                 fap->gid = (gid3)NFS_GID_NOBODY;
4300         else
4301                 fap->gid = (gid3)vap->va_gid;
4302         fap->size = (size3)vap->va_size;
4303         fap->used = (size3)DEV_BSIZE * (size3)vap->va_nblocks;
4304         fap->rdev.specdata1 = (uint32)getmajor(vap->va_rdev);
4305         fap->rdev.specdata2 = (uint32)getminor(vap->va_rdev);
4306         fap->fsid = (uint64)vap->va_fsid;
4307         fap->fileid = (fileid3)vap->va_nodeid;
4308         fap->atime.seconds = vap->va_atime.tv_sec;
4309         fap->atime.nseconds = vap->va_atime.tv_nsec;
4310         fap->mtime.seconds = vap->va_mtime.tv_sec;
4311         fap->mtime.nseconds = vap->va_mtime.tv_nsec;
4312         fap->ctime.seconds = vap->va_ctime.tv_sec;
4313         fap->ctime.nseconds = vap->va_ctime.tv_nsec;
4314         return (0);
4315 }
4316 
4317 static int
4318 vattr_to_wcc_attr(struct vattr *vap, wcc_attr *wccap)
4319 {
4320 
4321         /* Return error if time or size overflow */
4322         if (!(NFS_TIME_T_OK(vap->va_mtime.tv_sec) &&
4323             NFS_TIME_T_OK(vap->va_ctime.tv_sec) &&
4324             NFS3_SIZE_OK(vap->va_size))) {
4325                 return (EOVERFLOW);
4326         }
4327         wccap->size = (size3)vap->va_size;
4328         wccap->mtime.seconds = vap->va_mtime.tv_sec;
4329         wccap->mtime.nseconds = vap->va_mtime.tv_nsec;
4330         wccap->ctime.seconds = vap->va_ctime.tv_sec;
4331         wccap->ctime.nseconds = vap->va_ctime.tv_nsec;
4332         return (0);
4333 }
4334 
4335 static void
4336 vattr_to_pre_op_attr(struct vattr *vap, pre_op_attr *poap)
4337 {
4338 
4339         /* don't return attrs if time overflow */
4340         if ((vap != NULL) && !vattr_to_wcc_attr(vap, &poap->attr)) {
4341                 poap->attributes = TRUE;
4342         } else
4343                 poap->attributes = FALSE;
4344 }
4345 
4346 void
4347 vattr_to_post_op_attr(struct vattr *vap, post_op_attr *poap)
4348 {
4349 
4350         /* don't return attrs if time overflow */
4351         if ((vap != NULL) && !vattr_to_fattr3(vap, &poap->attr)) {
4352                 poap->attributes = TRUE;
4353         } else
4354                 poap->attributes = FALSE;
4355 }
4356 
4357 static void
4358 vattr_to_wcc_data(struct vattr *bvap, struct vattr *avap, wcc_data *wccp)
4359 {
4360         vattr_to_pre_op_attr(bvap, &wccp->before);
4361         vattr_to_post_op_attr(avap, &wccp->after);
4362 }
4363 
4364 static int
4365 rdma_setup_read_data3(READ3args *args, READ3resok *rok)
4366 {
4367         struct clist    *wcl;
4368         int             wlist_len;
4369         count3          count = rok->count;
4370 
4371         wcl = args->wlist;
4372         if (rdma_setup_read_chunks(wcl, count, &wlist_len) == FALSE)
4373                 return (FALSE);
4374 
4375         wcl = args->wlist;
4376         rok->wlist_len = wlist_len;
4377         rok->wlist = wcl;
4378         return (TRUE);
4379 }
4380 
4381 /* ARGSUSED */
4382 static void *
4383 rfs3_zone_init(zoneid_t zoneid)
4384 {
4385         nfs3_srv_t *ns;
4386         struct rfs3_verf_overlay {
4387                 uint_t id; /* a "unique" identifier */
4388                 int ts; /* a unique timestamp */
4389         } *verfp;
4390         timestruc_t now;
4391 
4392         ns = kmem_zalloc(sizeof (*ns), KM_SLEEP);
4393 
4394         /*
4395          * The following algorithm attempts to find a unique verifier
4396          * to be used as the write verifier returned from the server
4397          * to the client.  It is important that this verifier change
4398          * whenever the server reboots.  Of secondary importance, it
4399          * is important for the verifier to be unique between two
4400          * different servers.
4401          *
4402          * Thus, an attempt is made to use the system hostid and the
4403          * current time in seconds when the nfssrv kernel module is
4404          * loaded.  It is assumed that an NFS server will not be able
4405          * to boot and then to reboot in less than a second.  If the
4406          * hostid has not been set, then the current high resolution
4407          * time is used.  This will ensure different verifiers each
4408          * time the server reboots and minimize the chances that two
4409          * different servers will have the same verifier.
4410          */
4411 
4412 #ifndef lint
4413         /*
4414          * We ASSERT that this constant logic expression is
4415          * always true because in the past, it wasn't.
4416          */
4417         ASSERT(sizeof (*verfp) <= sizeof (ns->write3verf));
4418 #endif
4419 
4420         gethrestime(&now);
4421         verfp = (struct rfs3_verf_overlay *)&ns->write3verf;
4422         verfp->ts = (int)now.tv_sec;
4423         verfp->id = zone_get_hostid(NULL);
4424 
4425         if (verfp->id == 0)
4426                 verfp->id = (uint_t)now.tv_nsec;
4427 
4428         return (ns);
4429 }
4430 
4431 /* ARGSUSED */
4432 static void
4433 rfs3_zone_fini(zoneid_t zoneid, void *data)
4434 {
4435         nfs3_srv_t *ns = data;
4436 
4437         kmem_free(ns, sizeof (*ns));
4438 }
4439 
4440 void
4441 rfs3_srvrinit(void)
4442 {
4443         nfs3_srv_caller_id = fs_new_caller_id();
4444         zone_key_create(&rfs3_zone_key, rfs3_zone_init, NULL, rfs3_zone_fini);
4445 }
4446 
4447 void
4448 rfs3_srvrfini(void)
4449 {
4450         /* Nothing to do */
4451 }