1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright 2018 Nexenta Systems, Inc.
  24  * Copyright (c) 1994, 2010, Oracle and/or its affiliates. All rights reserved.
  25  * Copyright (c) 2013 by Delphix. All rights reserved.
  26  */
  27 
  28 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
  29 /* All Rights Reserved */
  30 
  31 
  32 #include <sys/param.h>
  33 #include <sys/types.h>
  34 #include <sys/systm.h>
  35 #include <sys/cred.h>
  36 #include <sys/buf.h>
  37 #include <sys/vfs.h>
  38 #include <sys/vnode.h>
  39 #include <sys/uio.h>
  40 #include <sys/errno.h>
  41 #include <sys/sysmacros.h>
  42 #include <sys/statvfs.h>
  43 #include <sys/kmem.h>
  44 #include <sys/dirent.h>
  45 #include <sys/cmn_err.h>
  46 #include <sys/debug.h>
  47 #include <sys/systeminfo.h>
  48 #include <sys/flock.h>
  49 #include <sys/nbmlock.h>
  50 #include <sys/policy.h>
  51 #include <sys/sdt.h>
  52 
  53 #include <rpc/types.h>
  54 #include <rpc/auth.h>
  55 #include <rpc/svc.h>
  56 #include <rpc/rpc_rdma.h>
  57 
  58 #include <nfs/nfs.h>
  59 #include <nfs/export.h>
  60 #include <nfs/nfs_cmd.h>
  61 
  62 #include <sys/strsubr.h>
  63 #include <sys/tsol/label.h>
  64 #include <sys/tsol/tndb.h>
  65 
  66 #include <sys/zone.h>
  67 
  68 #include <inet/ip.h>
  69 #include <inet/ip6.h>
  70 
  71 /*
  72  * Zone global variables of NFSv3 server
  73  */
  74 typedef struct nfs3_srv {
  75         writeverf3      write3verf;
  76 } nfs3_srv_t;
  77 
  78 /*
  79  * These are the interface routines for the server side of the
  80  * Network File System.  See the NFS version 3 protocol specification
  81  * for a description of this interface.
  82  */
  83 
  84 static int      sattr3_to_vattr(sattr3 *, struct vattr *);
  85 static int      vattr_to_fattr3(struct vattr *, fattr3 *);
  86 static int      vattr_to_wcc_attr(struct vattr *, wcc_attr *);
  87 static void     vattr_to_pre_op_attr(struct vattr *, pre_op_attr *);
  88 static void     vattr_to_wcc_data(struct vattr *, struct vattr *, wcc_data *);
  89 static int      rdma_setup_read_data3(READ3args *, READ3resok *);
  90 
  91 extern int nfs_loaned_buffers;
  92 
  93 u_longlong_t nfs3_srv_caller_id;
  94 
  95 static nfs3_srv_t *
  96 nfs3_get_srv(void)
  97 {
  98         nfs_globals_t *ng = nfs_srv_getzg();
  99         nfs3_srv_t *srv = ng->nfs3_srv;
 100         ASSERT(srv != NULL);
 101         return (srv);
 102 }
 103 
 104 /* ARGSUSED */
 105 void
 106 rfs3_getattr(GETATTR3args *args, GETATTR3res *resp, struct exportinfo *exi,
 107     struct svc_req *req, cred_t *cr, bool_t ro)
 108 {
 109         int error;
 110         vnode_t *vp;
 111         struct vattr va;
 112 
 113         vp = nfs3_fhtovp(&args->object, exi);
 114 
 115         DTRACE_NFSV3_5(op__getattr__start, struct svc_req *, req,
 116             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
 117             GETATTR3args *, args);
 118 
 119         if (vp == NULL) {
 120                 error = ESTALE;
 121                 goto out;
 122         }
 123 
 124         va.va_mask = AT_ALL;
 125         error = rfs4_delegated_getattr(vp, &va, 0, cr);
 126 
 127         if (!error) {
 128                 /* Lie about the object type for a referral */
 129                 if (vn_is_nfs_reparse(vp, cr))
 130                         va.va_type = VLNK;
 131 
 132                 /* overflow error if time or size is out of range */
 133                 error = vattr_to_fattr3(&va, &resp->resok.obj_attributes);
 134                 if (error)
 135                         goto out;
 136                 resp->status = NFS3_OK;
 137 
 138                 DTRACE_NFSV3_5(op__getattr__done, struct svc_req *, req,
 139                     cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
 140                     GETATTR3res *, resp);
 141 
 142                 VN_RELE(vp);
 143 
 144                 return;
 145         }
 146 
 147 out:
 148         if (curthread->t_flag & T_WOULDBLOCK) {
 149                 curthread->t_flag &= ~T_WOULDBLOCK;
 150                 resp->status = NFS3ERR_JUKEBOX;
 151         } else
 152                 resp->status = puterrno3(error);
 153 
 154         DTRACE_NFSV3_5(op__getattr__done, struct svc_req *, req,
 155             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
 156             GETATTR3res *, resp);
 157 
 158         if (vp != NULL)
 159                 VN_RELE(vp);
 160 }
 161 
 162 void *
 163 rfs3_getattr_getfh(GETATTR3args *args)
 164 {
 165 
 166         return (&args->object);
 167 }
 168 
 169 void
 170 rfs3_setattr(SETATTR3args *args, SETATTR3res *resp, struct exportinfo *exi,
 171     struct svc_req *req, cred_t *cr, bool_t ro)
 172 {
 173         int error;
 174         vnode_t *vp;
 175         struct vattr *bvap;
 176         struct vattr bva;
 177         struct vattr *avap;
 178         struct vattr ava;
 179         int flag;
 180         int in_crit = 0;
 181         struct flock64 bf;
 182         caller_context_t ct;
 183 
 184         bvap = NULL;
 185         avap = NULL;
 186 
 187         vp = nfs3_fhtovp(&args->object, exi);
 188 
 189         DTRACE_NFSV3_5(op__setattr__start, struct svc_req *, req,
 190             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
 191             SETATTR3args *, args);
 192 
 193         if (vp == NULL) {
 194                 error = ESTALE;
 195                 goto out;
 196         }
 197 
 198         error = sattr3_to_vattr(&args->new_attributes, &ava);
 199         if (error)
 200                 goto out;
 201 
 202         if (is_system_labeled()) {
 203                 bslabel_t *clabel = req->rq_label;
 204 
 205                 ASSERT(clabel != NULL);
 206                 DTRACE_PROBE2(tx__rfs3__log__info__opsetattr__clabel, char *,
 207                     "got client label from request(1)", struct svc_req *, req);
 208 
 209                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
 210                         if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
 211                             exi)) {
 212                                 resp->status = NFS3ERR_ACCES;
 213                                 goto out1;
 214                         }
 215                 }
 216         }
 217 
 218         /*
 219          * We need to specially handle size changes because of
 220          * possible conflicting NBMAND locks. Get into critical
 221          * region before VOP_GETATTR, so the size attribute is
 222          * valid when checking conflicts.
 223          *
 224          * Also, check to see if the v4 side of the server has
 225          * delegated this file.  If so, then we return JUKEBOX to
 226          * allow the client to retrasmit its request.
 227          */
 228         if (vp->v_type == VREG && (ava.va_mask & AT_SIZE)) {
 229                 if (nbl_need_check(vp)) {
 230                         nbl_start_crit(vp, RW_READER);
 231                         in_crit = 1;
 232                 }
 233         }
 234 
 235         bva.va_mask = AT_ALL;
 236         error = rfs4_delegated_getattr(vp, &bva, 0, cr);
 237 
 238         /*
 239          * If we can't get the attributes, then we can't do the
 240          * right access checking.  So, we'll fail the request.
 241          */
 242         if (error)
 243                 goto out;
 244 
 245         bvap = &bva;
 246 
 247         if (rdonly(ro, vp)) {
 248                 resp->status = NFS3ERR_ROFS;
 249                 goto out1;
 250         }
 251 
 252         if (args->guard.check &&
 253             (args->guard.obj_ctime.seconds != bva.va_ctime.tv_sec ||
 254             args->guard.obj_ctime.nseconds != bva.va_ctime.tv_nsec)) {
 255                 resp->status = NFS3ERR_NOT_SYNC;
 256                 goto out1;
 257         }
 258 
 259         if (args->new_attributes.mtime.set_it == SET_TO_CLIENT_TIME)
 260                 flag = ATTR_UTIME;
 261         else
 262                 flag = 0;
 263 
 264         /*
 265          * If the filesystem is exported with nosuid, then mask off
 266          * the setuid and setgid bits.
 267          */
 268         if ((ava.va_mask & AT_MODE) && vp->v_type == VREG &&
 269             (exi->exi_export.ex_flags & EX_NOSUID))
 270                 ava.va_mode &= ~(VSUID | VSGID);
 271 
 272         ct.cc_sysid = 0;
 273         ct.cc_pid = 0;
 274         ct.cc_caller_id = nfs3_srv_caller_id;
 275         ct.cc_flags = CC_DONTBLOCK;
 276 
 277         /*
 278          * We need to specially handle size changes because it is
 279          * possible for the client to create a file with modes
 280          * which indicate read-only, but with the file opened for
 281          * writing.  If the client then tries to set the size of
 282          * the file, then the normal access checking done in
 283          * VOP_SETATTR would prevent the client from doing so,
 284          * although it should be legal for it to do so.  To get
 285          * around this, we do the access checking for ourselves
 286          * and then use VOP_SPACE which doesn't do the access
 287          * checking which VOP_SETATTR does. VOP_SPACE can only
 288          * operate on VREG files, let VOP_SETATTR handle the other
 289          * extremely rare cases.
 290          * Also the client should not be allowed to change the
 291          * size of the file if there is a conflicting non-blocking
 292          * mandatory lock in the region the change.
 293          */
 294         if (vp->v_type == VREG && (ava.va_mask & AT_SIZE)) {
 295                 if (in_crit) {
 296                         u_offset_t offset;
 297                         ssize_t length;
 298 
 299                         if (ava.va_size < bva.va_size) {
 300                                 offset = ava.va_size;
 301                                 length = bva.va_size - ava.va_size;
 302                         } else {
 303                                 offset = bva.va_size;
 304                                 length = ava.va_size - bva.va_size;
 305                         }
 306                         if (nbl_conflict(vp, NBL_WRITE, offset, length, 0,
 307                             NULL)) {
 308                                 error = EACCES;
 309                                 goto out;
 310                         }
 311                 }
 312 
 313                 if (crgetuid(cr) == bva.va_uid && ava.va_size != bva.va_size) {
 314                         ava.va_mask &= ~AT_SIZE;
 315                         bf.l_type = F_WRLCK;
 316                         bf.l_whence = 0;
 317                         bf.l_start = (off64_t)ava.va_size;
 318                         bf.l_len = 0;
 319                         bf.l_sysid = 0;
 320                         bf.l_pid = 0;
 321                         error = VOP_SPACE(vp, F_FREESP, &bf, FWRITE,
 322                             (offset_t)ava.va_size, cr, &ct);
 323                 }
 324         }
 325 
 326         if (!error && ava.va_mask)
 327                 error = VOP_SETATTR(vp, &ava, flag, cr, &ct);
 328 
 329         /* check if a monitor detected a delegation conflict */
 330         if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
 331                 resp->status = NFS3ERR_JUKEBOX;
 332                 goto out1;
 333         }
 334 
 335         ava.va_mask = AT_ALL;
 336         avap = rfs4_delegated_getattr(vp, &ava, 0, cr) ? NULL : &ava;
 337 
 338         /*
 339          * Force modified metadata out to stable storage.
 340          */
 341         (void) VOP_FSYNC(vp, FNODSYNC, cr, &ct);
 342 
 343         if (error)
 344                 goto out;
 345 
 346         if (in_crit)
 347                 nbl_end_crit(vp);
 348 
 349         resp->status = NFS3_OK;
 350         vattr_to_wcc_data(bvap, avap, &resp->resok.obj_wcc);
 351 
 352         DTRACE_NFSV3_5(op__setattr__done, struct svc_req *, req,
 353             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
 354             SETATTR3res *, resp);
 355 
 356         VN_RELE(vp);
 357 
 358         return;
 359 
 360 out:
 361         if (curthread->t_flag & T_WOULDBLOCK) {
 362                 curthread->t_flag &= ~T_WOULDBLOCK;
 363                 resp->status = NFS3ERR_JUKEBOX;
 364         } else
 365                 resp->status = puterrno3(error);
 366 out1:
 367         DTRACE_NFSV3_5(op__setattr__done, struct svc_req *, req,
 368             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
 369             SETATTR3res *, resp);
 370 
 371         if (vp != NULL) {
 372                 if (in_crit)
 373                         nbl_end_crit(vp);
 374                 VN_RELE(vp);
 375         }
 376         vattr_to_wcc_data(bvap, avap, &resp->resfail.obj_wcc);
 377 }
 378 
 379 void *
 380 rfs3_setattr_getfh(SETATTR3args *args)
 381 {
 382 
 383         return (&args->object);
 384 }
 385 
 386 /* ARGSUSED */
 387 void
 388 rfs3_lookup(LOOKUP3args *args, LOOKUP3res *resp, struct exportinfo *exi,
 389     struct svc_req *req, cred_t *cr, bool_t ro)
 390 {
 391         int error;
 392         vnode_t *vp;
 393         vnode_t *dvp;
 394         struct vattr *vap;
 395         struct vattr va;
 396         struct vattr *dvap;
 397         struct vattr dva;
 398         nfs_fh3 *fhp;
 399         struct sec_ol sec = {0, 0};
 400         bool_t publicfh_flag = FALSE, auth_weak = FALSE;
 401         struct sockaddr *ca;
 402         char *name = NULL;
 403 
 404         dvap = NULL;
 405 
 406         if (exi != NULL)
 407                 exi_hold(exi);
 408 
 409         /*
 410          * Allow lookups from the root - the default
 411          * location of the public filehandle.
 412          */
 413         if (exi != NULL && (exi->exi_export.ex_flags & EX_PUBLIC)) {
 414                 ASSERT3U(exi->exi_zoneid, ==, curzone->zone_id);
 415                 dvp = ZONE_ROOTVP();
 416                 VN_HOLD(dvp);
 417 
 418                 DTRACE_NFSV3_5(op__lookup__start, struct svc_req *, req,
 419                     cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
 420                     LOOKUP3args *, args);
 421         } else {
 422                 dvp = nfs3_fhtovp(&args->what.dir, exi);
 423 
 424                 DTRACE_NFSV3_5(op__lookup__start, struct svc_req *, req,
 425                     cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
 426                     LOOKUP3args *, args);
 427 
 428                 if (dvp == NULL) {
 429                         error = ESTALE;
 430                         goto out;
 431                 }
 432         }
 433 
 434         dva.va_mask = AT_ALL;
 435         dvap = VOP_GETATTR(dvp, &dva, 0, cr, NULL) ? NULL : &dva;
 436 
 437         if (args->what.name == nfs3nametoolong) {
 438                 resp->status = NFS3ERR_NAMETOOLONG;
 439                 goto out1;
 440         }
 441 
 442         if (args->what.name == NULL || *(args->what.name) == '\0') {
 443                 resp->status = NFS3ERR_ACCES;
 444                 goto out1;
 445         }
 446 
 447         fhp = &args->what.dir;
 448         ASSERT3U(curzone->zone_id, ==, exi->exi_zoneid); /* exi is non-NULL */
 449         if (strcmp(args->what.name, "..") == 0 &&
 450             EQFID(&exi->exi_fid, FH3TOFIDP(fhp))) {
 451                 if ((exi->exi_export.ex_flags & EX_NOHIDE) &&
 452                     ((dvp->v_flag & VROOT) || VN_IS_CURZONEROOT(dvp))) {
 453                         /*
 454                          * special case for ".." and 'nohide'exported root
 455                          */
 456                         if (rfs_climb_crossmnt(&dvp, &exi, cr) != 0) {
 457                                 resp->status = NFS3ERR_ACCES;
 458                                 goto out1;
 459                         }
 460                 } else {
 461                         resp->status = NFS3ERR_NOENT;
 462                         goto out1;
 463                 }
 464         }
 465 
 466         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
 467         name = nfscmd_convname(ca, exi, args->what.name,
 468             NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
 469 
 470         if (name == NULL) {
 471                 resp->status = NFS3ERR_ACCES;
 472                 goto out1;
 473         }
 474 
 475         /*
 476          * If the public filehandle is used then allow
 477          * a multi-component lookup
 478          */
 479         if (PUBLIC_FH3(&args->what.dir)) {
 480                 publicfh_flag = TRUE;
 481 
 482                 exi_rele(exi);
 483                 exi = NULL;
 484 
 485                 error = rfs_publicfh_mclookup(name, dvp, cr, &vp,
 486                     &exi, &sec);
 487 
 488                 /*
 489                  * Since WebNFS may bypass MOUNT, we need to ensure this
 490                  * request didn't come from an unlabeled admin_low client.
 491                  */
 492                 if (is_system_labeled() && error == 0) {
 493                         int             addr_type;
 494                         void            *ipaddr;
 495                         tsol_tpc_t      *tp;
 496 
 497                         if (ca->sa_family == AF_INET) {
 498                                 addr_type = IPV4_VERSION;
 499                                 ipaddr = &((struct sockaddr_in *)ca)->sin_addr;
 500                         } else if (ca->sa_family == AF_INET6) {
 501                                 addr_type = IPV6_VERSION;
 502                                 ipaddr = &((struct sockaddr_in6 *)
 503                                     ca)->sin6_addr;
 504                         }
 505                         tp = find_tpc(ipaddr, addr_type, B_FALSE);
 506                         if (tp == NULL || tp->tpc_tp.tp_doi !=
 507                             l_admin_low->tsl_doi || tp->tpc_tp.host_type !=
 508                             SUN_CIPSO) {
 509                                 VN_RELE(vp);
 510                                 error = EACCES;
 511                         }
 512                         if (tp != NULL)
 513                                 TPC_RELE(tp);
 514                 }
 515         } else {
 516                 error = VOP_LOOKUP(dvp, name, &vp,
 517                     NULL, 0, NULL, cr, NULL, NULL, NULL);
 518         }
 519 
 520         if (name != args->what.name)
 521                 kmem_free(name, MAXPATHLEN + 1);
 522 
 523         if (error == 0 && vn_ismntpt(vp)) {
 524                 error = rfs_cross_mnt(&vp, &exi);
 525                 if (error)
 526                         VN_RELE(vp);
 527         }
 528 
 529         if (is_system_labeled() && error == 0) {
 530                 bslabel_t *clabel = req->rq_label;
 531 
 532                 ASSERT(clabel != NULL);
 533                 DTRACE_PROBE2(tx__rfs3__log__info__oplookup__clabel, char *,
 534                     "got client label from request(1)", struct svc_req *, req);
 535 
 536                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
 537                         if (!do_rfs_label_check(clabel, dvp,
 538                             DOMINANCE_CHECK, exi)) {
 539                                 VN_RELE(vp);
 540                                 error = EACCES;
 541                         }
 542                 }
 543         }
 544 
 545         dva.va_mask = AT_ALL;
 546         dvap = VOP_GETATTR(dvp, &dva, 0, cr, NULL) ? NULL : &dva;
 547 
 548         if (error)
 549                 goto out;
 550 
 551         if (sec.sec_flags & SEC_QUERY) {
 552                 error = makefh3_ol(&resp->resok.object, exi, sec.sec_index);
 553         } else {
 554                 error = makefh3(&resp->resok.object, vp, exi);
 555                 if (!error && publicfh_flag && !chk_clnt_sec(exi, req))
 556                         auth_weak = TRUE;
 557         }
 558 
 559         if (error) {
 560                 VN_RELE(vp);
 561                 goto out;
 562         }
 563 
 564         va.va_mask = AT_ALL;
 565         vap = rfs4_delegated_getattr(vp, &va, 0, cr) ? NULL : &va;
 566 
 567         VN_RELE(vp);
 568 
 569         resp->status = NFS3_OK;
 570         vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
 571         vattr_to_post_op_attr(dvap, &resp->resok.dir_attributes);
 572 
 573         /*
 574          * If it's public fh, no 0x81, and client's flavor is
 575          * invalid, set WebNFS status to WNFSERR_CLNT_FLAVOR now.
 576          * Then set RPC status to AUTH_TOOWEAK in common_dispatch.
 577          */
 578         if (auth_weak)
 579                 resp->status = (enum nfsstat3)WNFSERR_CLNT_FLAVOR;
 580 
 581         DTRACE_NFSV3_5(op__lookup__done, struct svc_req *, req,
 582             cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
 583             LOOKUP3res *, resp);
 584         VN_RELE(dvp);
 585         exi_rele(exi);
 586 
 587         return;
 588 
 589 out:
 590         if (curthread->t_flag & T_WOULDBLOCK) {
 591                 curthread->t_flag &= ~T_WOULDBLOCK;
 592                 resp->status = NFS3ERR_JUKEBOX;
 593         } else
 594                 resp->status = puterrno3(error);
 595 out1:
 596         DTRACE_NFSV3_5(op__lookup__done, struct svc_req *, req,
 597             cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
 598             LOOKUP3res *, resp);
 599 
 600         if (exi != NULL)
 601                 exi_rele(exi);
 602 
 603         if (dvp != NULL)
 604                 VN_RELE(dvp);
 605         vattr_to_post_op_attr(dvap, &resp->resfail.dir_attributes);
 606 
 607 }
 608 
 609 void *
 610 rfs3_lookup_getfh(LOOKUP3args *args)
 611 {
 612 
 613         return (&args->what.dir);
 614 }
 615 
 616 /* ARGSUSED */
 617 void
 618 rfs3_access(ACCESS3args *args, ACCESS3res *resp, struct exportinfo *exi,
 619     struct svc_req *req, cred_t *cr, bool_t ro)
 620 {
 621         int error;
 622         vnode_t *vp;
 623         struct vattr *vap;
 624         struct vattr va;
 625         int checkwriteperm;
 626         boolean_t dominant_label = B_FALSE;
 627         boolean_t equal_label = B_FALSE;
 628         boolean_t admin_low_client;
 629 
 630         vap = NULL;
 631 
 632         vp = nfs3_fhtovp(&args->object, exi);
 633 
 634         DTRACE_NFSV3_5(op__access__start, struct svc_req *, req,
 635             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
 636             ACCESS3args *, args);
 637 
 638         if (vp == NULL) {
 639                 error = ESTALE;
 640                 goto out;
 641         }
 642 
 643         /*
 644          * If the file system is exported read only, it is not appropriate
 645          * to check write permissions for regular files and directories.
 646          * Special files are interpreted by the client, so the underlying
 647          * permissions are sent back to the client for interpretation.
 648          */
 649         if (rdonly(ro, vp) && (vp->v_type == VREG || vp->v_type == VDIR))
 650                 checkwriteperm = 0;
 651         else
 652                 checkwriteperm = 1;
 653 
 654         /*
 655          * We need the mode so that we can correctly determine access
 656          * permissions relative to a mandatory lock file.  Access to
 657          * mandatory lock files is denied on the server, so it might
 658          * as well be reflected to the server during the open.
 659          */
 660         va.va_mask = AT_MODE;
 661         error = VOP_GETATTR(vp, &va, 0, cr, NULL);
 662         if (error)
 663                 goto out;
 664 
 665         vap = &va;
 666 
 667         resp->resok.access = 0;
 668 
 669         if (is_system_labeled()) {
 670                 bslabel_t *clabel = req->rq_label;
 671 
 672                 ASSERT(clabel != NULL);
 673                 DTRACE_PROBE2(tx__rfs3__log__info__opaccess__clabel, char *,
 674                     "got client label from request(1)", struct svc_req *, req);
 675 
 676                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
 677                         if ((equal_label = do_rfs_label_check(clabel, vp,
 678                             EQUALITY_CHECK, exi)) == B_FALSE) {
 679                                 dominant_label = do_rfs_label_check(clabel,
 680                                     vp, DOMINANCE_CHECK, exi);
 681                         } else
 682                                 dominant_label = B_TRUE;
 683                         admin_low_client = B_FALSE;
 684                 } else
 685                         admin_low_client = B_TRUE;
 686         }
 687 
 688         if (args->access & ACCESS3_READ) {
 689                 error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
 690                 if (error) {
 691                         if (curthread->t_flag & T_WOULDBLOCK)
 692                                 goto out;
 693                 } else if (!MANDLOCK(vp, va.va_mode) &&
 694                     (!is_system_labeled() || admin_low_client ||
 695                     dominant_label))
 696                         resp->resok.access |= ACCESS3_READ;
 697         }
 698         if ((args->access & ACCESS3_LOOKUP) && vp->v_type == VDIR) {
 699                 error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL);
 700                 if (error) {
 701                         if (curthread->t_flag & T_WOULDBLOCK)
 702                                 goto out;
 703                 } else if (!is_system_labeled() || admin_low_client ||
 704                     dominant_label)
 705                         resp->resok.access |= ACCESS3_LOOKUP;
 706         }
 707         if (checkwriteperm &&
 708             (args->access & (ACCESS3_MODIFY|ACCESS3_EXTEND))) {
 709                 error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
 710                 if (error) {
 711                         if (curthread->t_flag & T_WOULDBLOCK)
 712                                 goto out;
 713                 } else if (!MANDLOCK(vp, va.va_mode) &&
 714                     (!is_system_labeled() || admin_low_client || equal_label)) {
 715                         resp->resok.access |=
 716                             (args->access & (ACCESS3_MODIFY|ACCESS3_EXTEND));
 717                 }
 718         }
 719         if (checkwriteperm &&
 720             (args->access & ACCESS3_DELETE) && vp->v_type == VDIR) {
 721                 error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
 722                 if (error) {
 723                         if (curthread->t_flag & T_WOULDBLOCK)
 724                                 goto out;
 725                 } else if (!is_system_labeled() || admin_low_client ||
 726                     equal_label)
 727                         resp->resok.access |= ACCESS3_DELETE;
 728         }
 729         if (args->access & ACCESS3_EXECUTE) {
 730                 error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL);
 731                 if (error) {
 732                         if (curthread->t_flag & T_WOULDBLOCK)
 733                                 goto out;
 734                 } else if (!MANDLOCK(vp, va.va_mode) &&
 735                     (!is_system_labeled() || admin_low_client ||
 736                     dominant_label))
 737                         resp->resok.access |= ACCESS3_EXECUTE;
 738         }
 739 
 740         va.va_mask = AT_ALL;
 741         vap = rfs4_delegated_getattr(vp, &va, 0, cr) ? NULL : &va;
 742 
 743         resp->status = NFS3_OK;
 744         vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
 745 
 746         DTRACE_NFSV3_5(op__access__done, struct svc_req *, req,
 747             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
 748             ACCESS3res *, resp);
 749 
 750         VN_RELE(vp);
 751 
 752         return;
 753 
 754 out:
 755         if (curthread->t_flag & T_WOULDBLOCK) {
 756                 curthread->t_flag &= ~T_WOULDBLOCK;
 757                 resp->status = NFS3ERR_JUKEBOX;
 758         } else
 759                 resp->status = puterrno3(error);
 760         DTRACE_NFSV3_5(op__access__done, struct svc_req *, req,
 761             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
 762             ACCESS3res *, resp);
 763         if (vp != NULL)
 764                 VN_RELE(vp);
 765         vattr_to_post_op_attr(vap, &resp->resfail.obj_attributes);
 766 }
 767 
 768 void *
 769 rfs3_access_getfh(ACCESS3args *args)
 770 {
 771 
 772         return (&args->object);
 773 }
 774 
 775 /* ARGSUSED */
 776 void
 777 rfs3_readlink(READLINK3args *args, READLINK3res *resp, struct exportinfo *exi,
 778     struct svc_req *req, cred_t *cr, bool_t ro)
 779 {
 780         int error;
 781         vnode_t *vp;
 782         struct vattr *vap;
 783         struct vattr va;
 784         struct iovec iov;
 785         struct uio uio;
 786         char *data;
 787         struct sockaddr *ca;
 788         char *name = NULL;
 789         int is_referral = 0;
 790 
 791         vap = NULL;
 792 
 793         vp = nfs3_fhtovp(&args->symlink, exi);
 794 
 795         DTRACE_NFSV3_5(op__readlink__start, struct svc_req *, req,
 796             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
 797             READLINK3args *, args);
 798 
 799         if (vp == NULL) {
 800                 error = ESTALE;
 801                 goto out;
 802         }
 803 
 804         va.va_mask = AT_ALL;
 805         error = VOP_GETATTR(vp, &va, 0, cr, NULL);
 806         if (error)
 807                 goto out;
 808 
 809         vap = &va;
 810 
 811         /* We lied about the object type for a referral */
 812         if (vn_is_nfs_reparse(vp, cr))
 813                 is_referral = 1;
 814 
 815         if (vp->v_type != VLNK && !is_referral) {
 816                 resp->status = NFS3ERR_INVAL;
 817                 goto out1;
 818         }
 819 
 820         if (MANDLOCK(vp, va.va_mode)) {
 821                 resp->status = NFS3ERR_ACCES;
 822                 goto out1;
 823         }
 824 
 825         if (is_system_labeled()) {
 826                 bslabel_t *clabel = req->rq_label;
 827 
 828                 ASSERT(clabel != NULL);
 829                 DTRACE_PROBE2(tx__rfs3__log__info__opreadlink__clabel, char *,
 830                     "got client label from request(1)", struct svc_req *, req);
 831 
 832                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
 833                         if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
 834                             exi)) {
 835                                 resp->status = NFS3ERR_ACCES;
 836                                 goto out1;
 837                         }
 838                 }
 839         }
 840 
 841         data = kmem_alloc(MAXPATHLEN + 1, KM_SLEEP);
 842 
 843         if (is_referral) {
 844                 char *s;
 845                 size_t strsz;
 846                 kstat_named_t *stat = exi->exi_ne->ne_globals->svstat[NFS_V3];
 847 
 848                 /* Get an artificial symlink based on a referral */
 849                 s = build_symlink(vp, cr, &strsz);
 850                 stat[NFS_REFERLINKS].value.ui64++;
 851                 DTRACE_PROBE2(nfs3serv__func__referral__reflink,
 852                     vnode_t *, vp, char *, s);
 853                 if (s == NULL)
 854                         error = EINVAL;
 855                 else {
 856                         error = 0;
 857                         (void) strlcpy(data, s, MAXPATHLEN + 1);
 858                         kmem_free(s, strsz);
 859                 }
 860 
 861         } else {
 862 
 863                 iov.iov_base = data;
 864                 iov.iov_len = MAXPATHLEN;
 865                 uio.uio_iov = &iov;
 866                 uio.uio_iovcnt = 1;
 867                 uio.uio_segflg = UIO_SYSSPACE;
 868                 uio.uio_extflg = UIO_COPY_CACHED;
 869                 uio.uio_loffset = 0;
 870                 uio.uio_resid = MAXPATHLEN;
 871 
 872                 error = VOP_READLINK(vp, &uio, cr, NULL);
 873 
 874                 if (!error)
 875                         *(data + MAXPATHLEN - uio.uio_resid) = '\0';
 876         }
 877 
 878         va.va_mask = AT_ALL;
 879         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
 880 
 881         /* Lie about object type again just to be consistent */
 882         if (is_referral && vap != NULL)
 883                 vap->va_type = VLNK;
 884 
 885 #if 0 /* notyet */
 886         /*
 887          * Don't do this.  It causes local disk writes when just
 888          * reading the file and the overhead is deemed larger
 889          * than the benefit.
 890          */
 891         /*
 892          * Force modified metadata out to stable storage.
 893          */
 894         (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
 895 #endif
 896 
 897         if (error) {
 898                 kmem_free(data, MAXPATHLEN + 1);
 899                 goto out;
 900         }
 901 
 902         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
 903         name = nfscmd_convname(ca, exi, data, NFSCMD_CONV_OUTBOUND,
 904             MAXPATHLEN + 1);
 905 
 906         if (name == NULL) {
 907                 /*
 908                  * Even though the conversion failed, we return
 909                  * something. We just don't translate it.
 910                  */
 911                 name = data;
 912         }
 913 
 914         resp->status = NFS3_OK;
 915         vattr_to_post_op_attr(vap, &resp->resok.symlink_attributes);
 916         resp->resok.data = name;
 917 
 918         DTRACE_NFSV3_5(op__readlink__done, struct svc_req *, req,
 919             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
 920             READLINK3res *, resp);
 921         VN_RELE(vp);
 922 
 923         if (name != data)
 924                 kmem_free(data, MAXPATHLEN + 1);
 925 
 926         return;
 927 
 928 out:
 929         if (curthread->t_flag & T_WOULDBLOCK) {
 930                 curthread->t_flag &= ~T_WOULDBLOCK;
 931                 resp->status = NFS3ERR_JUKEBOX;
 932         } else
 933                 resp->status = puterrno3(error);
 934 out1:
 935         DTRACE_NFSV3_5(op__readlink__done, struct svc_req *, req,
 936             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
 937             READLINK3res *, resp);
 938         if (vp != NULL)
 939                 VN_RELE(vp);
 940         vattr_to_post_op_attr(vap, &resp->resfail.symlink_attributes);
 941 }
 942 
 943 void *
 944 rfs3_readlink_getfh(READLINK3args *args)
 945 {
 946 
 947         return (&args->symlink);
 948 }
 949 
 950 void
 951 rfs3_readlink_free(READLINK3res *resp)
 952 {
 953 
 954         if (resp->status == NFS3_OK)
 955                 kmem_free(resp->resok.data, MAXPATHLEN + 1);
 956 }
 957 
 958 /*
 959  * Server routine to handle read
 960  * May handle RDMA data as well as mblks
 961  */
 962 /* ARGSUSED */
 963 void
 964 rfs3_read(READ3args *args, READ3res *resp, struct exportinfo *exi,
 965     struct svc_req *req, cred_t *cr, bool_t ro)
 966 {
 967         int error;
 968         vnode_t *vp;
 969         struct vattr *vap;
 970         struct vattr va;
 971         struct iovec iov, *iovp = NULL;
 972         int iovcnt;
 973         struct uio uio;
 974         u_offset_t offset;
 975         mblk_t *mp = NULL;
 976         int in_crit = 0;
 977         int need_rwunlock = 0;
 978         caller_context_t ct;
 979         int rdma_used = 0;
 980         int loaned_buffers;
 981         struct uio *uiop;
 982 
 983         vap = NULL;
 984 
 985         vp = nfs3_fhtovp(&args->file, exi);
 986 
 987         DTRACE_NFSV3_5(op__read__start, struct svc_req *, req,
 988             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
 989             READ3args *, args);
 990 
 991 
 992         if (vp == NULL) {
 993                 error = ESTALE;
 994                 goto out;
 995         }
 996 
 997         if (args->wlist) {
 998                 if (args->count > clist_len(args->wlist)) {
 999                         error = EINVAL;
1000                         goto out;
1001                 }
1002                 rdma_used = 1;
1003         }
1004 
1005         /* use loaned buffers for TCP */
1006         loaned_buffers = (nfs_loaned_buffers && !rdma_used) ? 1 : 0;
1007 
1008         if (is_system_labeled()) {
1009                 bslabel_t *clabel = req->rq_label;
1010 
1011                 ASSERT(clabel != NULL);
1012                 DTRACE_PROBE2(tx__rfs3__log__info__opread__clabel, char *,
1013                     "got client label from request(1)", struct svc_req *, req);
1014 
1015                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
1016                         if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
1017                             exi)) {
1018                                 resp->status = NFS3ERR_ACCES;
1019                                 goto out1;
1020                         }
1021                 }
1022         }
1023 
1024         ct.cc_sysid = 0;
1025         ct.cc_pid = 0;
1026         ct.cc_caller_id = nfs3_srv_caller_id;
1027         ct.cc_flags = CC_DONTBLOCK;
1028 
1029         /*
1030          * Enter the critical region before calling VOP_RWLOCK
1031          * to avoid a deadlock with write requests.
1032          */
1033         if (nbl_need_check(vp)) {
1034                 nbl_start_crit(vp, RW_READER);
1035                 in_crit = 1;
1036                 if (nbl_conflict(vp, NBL_READ, args->offset, args->count, 0,
1037                     NULL)) {
1038                         error = EACCES;
1039                         goto out;
1040                 }
1041         }
1042 
1043         error = VOP_RWLOCK(vp, V_WRITELOCK_FALSE, &ct);
1044 
1045         /* check if a monitor detected a delegation conflict */
1046         if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1047                 resp->status = NFS3ERR_JUKEBOX;
1048                 goto out1;
1049         }
1050 
1051         need_rwunlock = 1;
1052 
1053         va.va_mask = AT_ALL;
1054         error = VOP_GETATTR(vp, &va, 0, cr, &ct);
1055 
1056         /*
1057          * If we can't get the attributes, then we can't do the
1058          * right access checking.  So, we'll fail the request.
1059          */
1060         if (error)
1061                 goto out;
1062 
1063         vap = &va;
1064 
1065         if (vp->v_type != VREG) {
1066                 resp->status = NFS3ERR_INVAL;
1067                 goto out1;
1068         }
1069 
1070         if (crgetuid(cr) != va.va_uid) {
1071                 error = VOP_ACCESS(vp, VREAD, 0, cr, &ct);
1072                 if (error) {
1073                         if (curthread->t_flag & T_WOULDBLOCK)
1074                                 goto out;
1075                         error = VOP_ACCESS(vp, VEXEC, 0, cr, &ct);
1076                         if (error)
1077                                 goto out;
1078                 }
1079         }
1080 
1081         if (MANDLOCK(vp, va.va_mode)) {
1082                 resp->status = NFS3ERR_ACCES;
1083                 goto out1;
1084         }
1085 
1086         offset = args->offset;
1087         if (offset >= va.va_size) {
1088                 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
1089                 if (in_crit)
1090                         nbl_end_crit(vp);
1091                 resp->status = NFS3_OK;
1092                 vattr_to_post_op_attr(vap, &resp->resok.file_attributes);
1093                 resp->resok.count = 0;
1094                 resp->resok.eof = TRUE;
1095                 resp->resok.data.data_len = 0;
1096                 resp->resok.data.data_val = NULL;
1097                 resp->resok.data.mp = NULL;
1098                 /* RDMA */
1099                 resp->resok.wlist = args->wlist;
1100                 resp->resok.wlist_len = resp->resok.count;
1101                 if (resp->resok.wlist)
1102                         clist_zero_len(resp->resok.wlist);
1103                 goto done;
1104         }
1105 
1106         if (args->count == 0) {
1107                 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
1108                 if (in_crit)
1109                         nbl_end_crit(vp);
1110                 resp->status = NFS3_OK;
1111                 vattr_to_post_op_attr(vap, &resp->resok.file_attributes);
1112                 resp->resok.count = 0;
1113                 resp->resok.eof = FALSE;
1114                 resp->resok.data.data_len = 0;
1115                 resp->resok.data.data_val = NULL;
1116                 resp->resok.data.mp = NULL;
1117                 /* RDMA */
1118                 resp->resok.wlist = args->wlist;
1119                 resp->resok.wlist_len = resp->resok.count;
1120                 if (resp->resok.wlist)
1121                         clist_zero_len(resp->resok.wlist);
1122                 goto done;
1123         }
1124 
1125         /*
1126          * do not allocate memory more the max. allowed
1127          * transfer size
1128          */
1129         if (args->count > rfs3_tsize(req))
1130                 args->count = rfs3_tsize(req);
1131 
1132         if (loaned_buffers) {
1133                 uiop = (uio_t *)rfs_setup_xuio(vp);
1134                 ASSERT(uiop != NULL);
1135                 uiop->uio_segflg = UIO_SYSSPACE;
1136                 uiop->uio_loffset = args->offset;
1137                 uiop->uio_resid = args->count;
1138 
1139                 /* Jump to do the read if successful */
1140                 if (VOP_REQZCBUF(vp, UIO_READ, (xuio_t *)uiop, cr, &ct) == 0) {
1141                         /*
1142                          * Need to hold the vnode until after VOP_RETZCBUF()
1143                          * is called.
1144                          */
1145                         VN_HOLD(vp);
1146                         goto doio_read;
1147                 }
1148 
1149                 DTRACE_PROBE2(nfss__i__reqzcbuf_failed, int,
1150                     uiop->uio_loffset, int, uiop->uio_resid);
1151 
1152                 uiop->uio_extflg = 0;
1153                 /* failure to setup for zero copy */
1154                 rfs_free_xuio((void *)uiop);
1155                 loaned_buffers = 0;
1156         }
1157 
1158         /*
1159          * If returning data via RDMA Write, then grab the chunk list.
1160          * If we aren't returning READ data w/RDMA_WRITE, then grab
1161          * a mblk.
1162          */
1163         if (rdma_used) {
1164                 (void) rdma_get_wchunk(req, &iov, args->wlist);
1165                 uio.uio_iov = &iov;
1166                 uio.uio_iovcnt = 1;
1167         } else {
1168                 /*
1169                  * mp will contain the data to be sent out in the read reply.
1170                  * For UDP, this will be freed after the reply has been sent
1171                  * out by the driver.  For TCP, it will be freed after the last
1172                  * segment associated with the reply has been ACKed by the
1173                  * client.
1174                  */
1175                 mp = rfs_read_alloc(args->count, &iovp, &iovcnt);
1176                 uio.uio_iov = iovp;
1177                 uio.uio_iovcnt = iovcnt;
1178         }
1179 
1180         uio.uio_segflg = UIO_SYSSPACE;
1181         uio.uio_extflg = UIO_COPY_CACHED;
1182         uio.uio_loffset = args->offset;
1183         uio.uio_resid = args->count;
1184         uiop = &uio;
1185 
1186 doio_read:
1187         error = VOP_READ(vp, uiop, 0, cr, &ct);
1188 
1189         if (error) {
1190                 if (mp)
1191                         freemsg(mp);
1192                 /* check if a monitor detected a delegation conflict */
1193                 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1194                         resp->status = NFS3ERR_JUKEBOX;
1195                         goto out1;
1196                 }
1197                 goto out;
1198         }
1199 
1200         /* make mblk using zc buffers */
1201         if (loaned_buffers) {
1202                 mp = uio_to_mblk(uiop);
1203                 ASSERT(mp != NULL);
1204         }
1205 
1206         va.va_mask = AT_ALL;
1207         error = VOP_GETATTR(vp, &va, 0, cr, &ct);
1208 
1209         if (error)
1210                 vap = NULL;
1211         else
1212                 vap = &va;
1213 
1214         VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
1215 
1216         if (in_crit)
1217                 nbl_end_crit(vp);
1218 
1219         resp->status = NFS3_OK;
1220         vattr_to_post_op_attr(vap, &resp->resok.file_attributes);
1221         resp->resok.count = args->count - uiop->uio_resid;
1222         if (!error && offset + resp->resok.count == va.va_size)
1223                 resp->resok.eof = TRUE;
1224         else
1225                 resp->resok.eof = FALSE;
1226         resp->resok.data.data_len = resp->resok.count;
1227 
1228         if (mp)
1229                 rfs_rndup_mblks(mp, resp->resok.count, loaned_buffers);
1230 
1231         resp->resok.data.mp = mp;
1232         resp->resok.size = (uint_t)args->count;
1233 
1234         if (rdma_used) {
1235                 resp->resok.data.data_val = (caddr_t)iov.iov_base;
1236                 if (!rdma_setup_read_data3(args, &(resp->resok))) {
1237                         resp->status = NFS3ERR_INVAL;
1238                 }
1239         } else {
1240                 resp->resok.data.data_val = (caddr_t)mp->b_datap->db_base;
1241                 (resp->resok).wlist = NULL;
1242         }
1243 
1244 done:
1245         DTRACE_NFSV3_5(op__read__done, struct svc_req *, req,
1246             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
1247             READ3res *, resp);
1248 
1249         VN_RELE(vp);
1250 
1251         if (iovp != NULL)
1252                 kmem_free(iovp, iovcnt * sizeof (struct iovec));
1253 
1254         return;
1255 
1256 out:
1257         if (curthread->t_flag & T_WOULDBLOCK) {
1258                 curthread->t_flag &= ~T_WOULDBLOCK;
1259                 resp->status = NFS3ERR_JUKEBOX;
1260         } else
1261                 resp->status = puterrno3(error);
1262 out1:
1263         DTRACE_NFSV3_5(op__read__done, struct svc_req *, req,
1264             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
1265             READ3res *, resp);
1266 
1267         if (vp != NULL) {
1268                 if (need_rwunlock)
1269                         VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
1270                 if (in_crit)
1271                         nbl_end_crit(vp);
1272                 VN_RELE(vp);
1273         }
1274         vattr_to_post_op_attr(vap, &resp->resfail.file_attributes);
1275 
1276         if (iovp != NULL)
1277                 kmem_free(iovp, iovcnt * sizeof (struct iovec));
1278 }
1279 
1280 void
1281 rfs3_read_free(READ3res *resp)
1282 {
1283         mblk_t *mp;
1284 
1285         if (resp->status == NFS3_OK) {
1286                 mp = resp->resok.data.mp;
1287                 if (mp != NULL)
1288                         freemsg(mp);
1289         }
1290 }
1291 
1292 void *
1293 rfs3_read_getfh(READ3args *args)
1294 {
1295 
1296         return (&args->file);
1297 }
1298 
1299 #define MAX_IOVECS      12
1300 
1301 #ifdef DEBUG
1302 static int rfs3_write_hits = 0;
1303 static int rfs3_write_misses = 0;
1304 #endif
1305 
1306 void
1307 rfs3_write(WRITE3args *args, WRITE3res *resp, struct exportinfo *exi,
1308     struct svc_req *req, cred_t *cr, bool_t ro)
1309 {
1310         nfs3_srv_t *ns;
1311         int error;
1312         vnode_t *vp;
1313         struct vattr *bvap = NULL;
1314         struct vattr bva;
1315         struct vattr *avap = NULL;
1316         struct vattr ava;
1317         u_offset_t rlimit;
1318         struct uio uio;
1319         struct iovec iov[MAX_IOVECS];
1320         mblk_t *m;
1321         struct iovec *iovp;
1322         int iovcnt;
1323         int ioflag;
1324         cred_t *savecred;
1325         int in_crit = 0;
1326         int rwlock_ret = -1;
1327         caller_context_t ct;
1328 
1329         vp = nfs3_fhtovp(&args->file, exi);
1330 
1331         DTRACE_NFSV3_5(op__write__start, struct svc_req *, req,
1332             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
1333             WRITE3args *, args);
1334 
1335         if (vp == NULL) {
1336                 error = ESTALE;
1337                 goto err;
1338         }
1339 
1340         ASSERT3U(curzone->zone_id, ==, exi->exi_zoneid); /* exi is non-NULL. */
1341         ns = nfs3_get_srv();
1342 
1343         if (is_system_labeled()) {
1344                 bslabel_t *clabel = req->rq_label;
1345 
1346                 ASSERT(clabel != NULL);
1347                 DTRACE_PROBE2(tx__rfs3__log__info__opwrite__clabel, char *,
1348                     "got client label from request(1)", struct svc_req *, req);
1349 
1350                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
1351                         if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
1352                             exi)) {
1353                                 resp->status = NFS3ERR_ACCES;
1354                                 goto err1;
1355                         }
1356                 }
1357         }
1358 
1359         ct.cc_sysid = 0;
1360         ct.cc_pid = 0;
1361         ct.cc_caller_id = nfs3_srv_caller_id;
1362         ct.cc_flags = CC_DONTBLOCK;
1363 
1364         /*
1365          * We have to enter the critical region before calling VOP_RWLOCK
1366          * to avoid a deadlock with ufs.
1367          */
1368         if (nbl_need_check(vp)) {
1369                 nbl_start_crit(vp, RW_READER);
1370                 in_crit = 1;
1371                 if (nbl_conflict(vp, NBL_WRITE, args->offset, args->count, 0,
1372                     NULL)) {
1373                         error = EACCES;
1374                         goto err;
1375                 }
1376         }
1377 
1378         rwlock_ret = VOP_RWLOCK(vp, V_WRITELOCK_TRUE, &ct);
1379 
1380         /* check if a monitor detected a delegation conflict */
1381         if (rwlock_ret == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1382                 resp->status = NFS3ERR_JUKEBOX;
1383                 rwlock_ret = -1;
1384                 goto err1;
1385         }
1386 
1387 
1388         bva.va_mask = AT_ALL;
1389         error = VOP_GETATTR(vp, &bva, 0, cr, &ct);
1390 
1391         /*
1392          * If we can't get the attributes, then we can't do the
1393          * right access checking.  So, we'll fail the request.
1394          */
1395         if (error)
1396                 goto err;
1397 
1398         bvap = &bva;
1399         avap = bvap;
1400 
1401         if (args->count != args->data.data_len) {
1402                 resp->status = NFS3ERR_INVAL;
1403                 goto err1;
1404         }
1405 
1406         if (rdonly(ro, vp)) {
1407                 resp->status = NFS3ERR_ROFS;
1408                 goto err1;
1409         }
1410 
1411         if (vp->v_type != VREG) {
1412                 resp->status = NFS3ERR_INVAL;
1413                 goto err1;
1414         }
1415 
1416         if (crgetuid(cr) != bva.va_uid &&
1417             (error = VOP_ACCESS(vp, VWRITE, 0, cr, &ct)))
1418                 goto err;
1419 
1420         if (MANDLOCK(vp, bva.va_mode)) {
1421                 resp->status = NFS3ERR_ACCES;
1422                 goto err1;
1423         }
1424 
1425         if (args->count == 0) {
1426                 resp->status = NFS3_OK;
1427                 vattr_to_wcc_data(bvap, avap, &resp->resok.file_wcc);
1428                 resp->resok.count = 0;
1429                 resp->resok.committed = args->stable;
1430                 resp->resok.verf = ns->write3verf;
1431                 goto out;
1432         }
1433 
1434         if (args->mblk != NULL) {
1435                 iovcnt = 0;
1436                 for (m = args->mblk; m != NULL; m = m->b_cont)
1437                         iovcnt++;
1438                 if (iovcnt <= MAX_IOVECS) {
1439 #ifdef DEBUG
1440                         rfs3_write_hits++;
1441 #endif
1442                         iovp = iov;
1443                 } else {
1444 #ifdef DEBUG
1445                         rfs3_write_misses++;
1446 #endif
1447                         iovp = kmem_alloc(sizeof (*iovp) * iovcnt, KM_SLEEP);
1448                 }
1449                 mblk_to_iov(args->mblk, iovcnt, iovp);
1450 
1451         } else if (args->rlist != NULL) {
1452                 iovcnt = 1;
1453                 iovp = iov;
1454                 iovp->iov_base = (char *)((args->rlist)->u.c_daddr3);
1455                 iovp->iov_len = args->count;
1456         } else {
1457                 iovcnt = 1;
1458                 iovp = iov;
1459                 iovp->iov_base = args->data.data_val;
1460                 iovp->iov_len = args->count;
1461         }
1462 
1463         uio.uio_iov = iovp;
1464         uio.uio_iovcnt = iovcnt;
1465 
1466         uio.uio_segflg = UIO_SYSSPACE;
1467         uio.uio_extflg = UIO_COPY_DEFAULT;
1468         uio.uio_loffset = args->offset;
1469         uio.uio_resid = args->count;
1470         uio.uio_llimit = curproc->p_fsz_ctl;
1471         rlimit = uio.uio_llimit - args->offset;
1472         if (rlimit < (u_offset_t)uio.uio_resid)
1473                 uio.uio_resid = (int)rlimit;
1474 
1475         if (args->stable == UNSTABLE)
1476                 ioflag = 0;
1477         else if (args->stable == FILE_SYNC)
1478                 ioflag = FSYNC;
1479         else if (args->stable == DATA_SYNC)
1480                 ioflag = FDSYNC;
1481         else {
1482                 if (iovp != iov)
1483                         kmem_free(iovp, sizeof (*iovp) * iovcnt);
1484                 resp->status = NFS3ERR_INVAL;
1485                 goto err1;
1486         }
1487 
1488         /*
1489          * We're changing creds because VM may fault and we need
1490          * the cred of the current thread to be used if quota
1491          * checking is enabled.
1492          */
1493         savecred = curthread->t_cred;
1494         curthread->t_cred = cr;
1495         error = VOP_WRITE(vp, &uio, ioflag, cr, &ct);
1496         curthread->t_cred = savecred;
1497 
1498         if (iovp != iov)
1499                 kmem_free(iovp, sizeof (*iovp) * iovcnt);
1500 
1501         /* check if a monitor detected a delegation conflict */
1502         if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1503                 resp->status = NFS3ERR_JUKEBOX;
1504                 goto err1;
1505         }
1506 
1507         ava.va_mask = AT_ALL;
1508         avap = VOP_GETATTR(vp, &ava, 0, cr, &ct) ? NULL : &ava;
1509 
1510         if (error)
1511                 goto err;
1512 
1513         /*
1514          * If we were unable to get the V_WRITELOCK_TRUE, then we
1515          * may not have accurate after attrs, so check if
1516          * we have both attributes, they have a non-zero va_seq, and
1517          * va_seq has changed by exactly one,
1518          * if not, turn off the before attr.
1519          */
1520         if (rwlock_ret != V_WRITELOCK_TRUE) {
1521                 if (bvap == NULL || avap == NULL ||
1522                     bvap->va_seq == 0 || avap->va_seq == 0 ||
1523                     avap->va_seq != (bvap->va_seq + 1)) {
1524                         bvap = NULL;
1525                 }
1526         }
1527 
1528         resp->status = NFS3_OK;
1529         vattr_to_wcc_data(bvap, avap, &resp->resok.file_wcc);
1530         resp->resok.count = args->count - uio.uio_resid;
1531         resp->resok.committed = args->stable;
1532         resp->resok.verf = ns->write3verf;
1533         goto out;
1534 
1535 err:
1536         if (curthread->t_flag & T_WOULDBLOCK) {
1537                 curthread->t_flag &= ~T_WOULDBLOCK;
1538                 resp->status = NFS3ERR_JUKEBOX;
1539         } else
1540                 resp->status = puterrno3(error);
1541 err1:
1542         vattr_to_wcc_data(bvap, avap, &resp->resfail.file_wcc);
1543 out:
1544         DTRACE_NFSV3_5(op__write__done, struct svc_req *, req,
1545             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
1546             WRITE3res *, resp);
1547 
1548         if (vp != NULL) {
1549                 if (rwlock_ret != -1)
1550                         VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, &ct);
1551                 if (in_crit)
1552                         nbl_end_crit(vp);
1553                 VN_RELE(vp);
1554         }
1555 }
1556 
1557 void *
1558 rfs3_write_getfh(WRITE3args *args)
1559 {
1560 
1561         return (&args->file);
1562 }
1563 
1564 void
1565 rfs3_create(CREATE3args *args, CREATE3res *resp, struct exportinfo *exi,
1566     struct svc_req *req, cred_t *cr, bool_t ro)
1567 {
1568         int error;
1569         int in_crit = 0;
1570         vnode_t *vp;
1571         vnode_t *tvp = NULL;
1572         vnode_t *dvp;
1573         struct vattr *vap;
1574         struct vattr va;
1575         struct vattr *dbvap;
1576         struct vattr dbva;
1577         struct vattr *davap;
1578         struct vattr dava;
1579         enum vcexcl excl;
1580         nfstime3 *mtime;
1581         len_t reqsize;
1582         bool_t trunc;
1583         struct sockaddr *ca;
1584         char *name = NULL;
1585 
1586         dbvap = NULL;
1587         davap = NULL;
1588 
1589         dvp = nfs3_fhtovp(&args->where.dir, exi);
1590 
1591         DTRACE_NFSV3_5(op__create__start, struct svc_req *, req,
1592             cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
1593             CREATE3args *, args);
1594 
1595         if (dvp == NULL) {
1596                 error = ESTALE;
1597                 goto out;
1598         }
1599 
1600         dbva.va_mask = AT_ALL;
1601         dbvap = VOP_GETATTR(dvp, &dbva, 0, cr, NULL) ? NULL : &dbva;
1602         davap = dbvap;
1603 
1604         if (args->where.name == nfs3nametoolong) {
1605                 resp->status = NFS3ERR_NAMETOOLONG;
1606                 goto out1;
1607         }
1608 
1609         if (args->where.name == NULL || *(args->where.name) == '\0') {
1610                 resp->status = NFS3ERR_ACCES;
1611                 goto out1;
1612         }
1613 
1614         if (rdonly(ro, dvp)) {
1615                 resp->status = NFS3ERR_ROFS;
1616                 goto out1;
1617         }
1618 
1619         if (is_system_labeled()) {
1620                 bslabel_t *clabel = req->rq_label;
1621 
1622                 ASSERT(clabel != NULL);
1623                 DTRACE_PROBE2(tx__rfs3__log__info__opcreate__clabel, char *,
1624                     "got client label from request(1)", struct svc_req *, req);
1625 
1626                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
1627                         if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
1628                             exi)) {
1629                                 resp->status = NFS3ERR_ACCES;
1630                                 goto out1;
1631                         }
1632                 }
1633         }
1634 
1635         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
1636         name = nfscmd_convname(ca, exi, args->where.name,
1637             NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
1638 
1639         if (name == NULL) {
1640                 /* This is really a Solaris EILSEQ */
1641                 resp->status = NFS3ERR_INVAL;
1642                 goto out1;
1643         }
1644 
1645         if (args->how.mode == EXCLUSIVE) {
1646                 va.va_mask = AT_TYPE | AT_MODE | AT_MTIME;
1647                 va.va_type = VREG;
1648                 va.va_mode = (mode_t)0;
1649                 /*
1650                  * Ensure no time overflows and that types match
1651                  */
1652                 mtime = (nfstime3 *)&args->how.createhow3_u.verf;
1653                 va.va_mtime.tv_sec = mtime->seconds % INT32_MAX;
1654                 va.va_mtime.tv_nsec = mtime->nseconds;
1655                 excl = EXCL;
1656         } else {
1657                 error = sattr3_to_vattr(&args->how.createhow3_u.obj_attributes,
1658                     &va);
1659                 if (error)
1660                         goto out;
1661                 va.va_mask |= AT_TYPE;
1662                 va.va_type = VREG;
1663                 if (args->how.mode == GUARDED)
1664                         excl = EXCL;
1665                 else {
1666                         excl = NONEXCL;
1667 
1668                         /*
1669                          * During creation of file in non-exclusive mode
1670                          * if size of file is being set then make sure
1671                          * that if the file already exists that no conflicting
1672                          * non-blocking mandatory locks exists in the region
1673                          * being modified. If there are conflicting locks fail
1674                          * the operation with EACCES.
1675                          */
1676                         if (va.va_mask & AT_SIZE) {
1677                                 struct vattr tva;
1678 
1679                                 /*
1680                                  * Does file already exist?
1681                                  */
1682                                 error = VOP_LOOKUP(dvp, name, &tvp,
1683                                     NULL, 0, NULL, cr, NULL, NULL, NULL);
1684 
1685                                 /*
1686                                  * Check to see if the file has been delegated
1687                                  * to a v4 client.  If so, then begin recall of
1688                                  * the delegation and return JUKEBOX to allow
1689                                  * the client to retrasmit its request.
1690                                  */
1691 
1692                                 trunc = va.va_size == 0;
1693                                 if (!error &&
1694                                     rfs4_check_delegated(FWRITE, tvp, trunc)) {
1695                                         resp->status = NFS3ERR_JUKEBOX;
1696                                         goto out1;
1697                                 }
1698 
1699                                 /*
1700                                  * Check for NBMAND lock conflicts
1701                                  */
1702                                 if (!error && nbl_need_check(tvp)) {
1703                                         u_offset_t offset;
1704                                         ssize_t len;
1705 
1706                                         nbl_start_crit(tvp, RW_READER);
1707                                         in_crit = 1;
1708 
1709                                         tva.va_mask = AT_SIZE;
1710                                         error = VOP_GETATTR(tvp, &tva, 0, cr,
1711                                             NULL);
1712                                         /*
1713                                          * Can't check for conflicts, so return
1714                                          * error.
1715                                          */
1716                                         if (error)
1717                                                 goto out;
1718 
1719                                         offset = tva.va_size < va.va_size ?
1720                                             tva.va_size : va.va_size;
1721                                         len = tva.va_size < va.va_size ?
1722                                             va.va_size - tva.va_size :
1723                                             tva.va_size - va.va_size;
1724                                         if (nbl_conflict(tvp, NBL_WRITE,
1725                                             offset, len, 0, NULL)) {
1726                                                 error = EACCES;
1727                                                 goto out;
1728                                         }
1729                                 } else if (tvp) {
1730                                         VN_RELE(tvp);
1731                                         tvp = NULL;
1732                                 }
1733                         }
1734                 }
1735                 if (va.va_mask & AT_SIZE)
1736                         reqsize = va.va_size;
1737         }
1738 
1739         /*
1740          * Must specify the mode.
1741          */
1742         if (!(va.va_mask & AT_MODE)) {
1743                 resp->status = NFS3ERR_INVAL;
1744                 goto out1;
1745         }
1746 
1747         /*
1748          * If the filesystem is exported with nosuid, then mask off
1749          * the setuid and setgid bits.
1750          */
1751         if (va.va_type == VREG && (exi->exi_export.ex_flags & EX_NOSUID))
1752                 va.va_mode &= ~(VSUID | VSGID);
1753 
1754 tryagain:
1755         /*
1756          * The file open mode used is VWRITE.  If the client needs
1757          * some other semantic, then it should do the access checking
1758          * itself.  It would have been nice to have the file open mode
1759          * passed as part of the arguments.
1760          */
1761         error = VOP_CREATE(dvp, name, &va, excl, VWRITE,
1762             &vp, cr, 0, NULL, NULL);
1763 
1764         dava.va_mask = AT_ALL;
1765         davap = VOP_GETATTR(dvp, &dava, 0, cr, NULL) ? NULL : &dava;
1766 
1767         if (error) {
1768                 /*
1769                  * If we got something other than file already exists
1770                  * then just return this error.  Otherwise, we got
1771                  * EEXIST.  If we were doing a GUARDED create, then
1772                  * just return this error.  Otherwise, we need to
1773                  * make sure that this wasn't a duplicate of an
1774                  * exclusive create request.
1775                  *
1776                  * The assumption is made that a non-exclusive create
1777                  * request will never return EEXIST.
1778                  */
1779                 if (error != EEXIST || args->how.mode == GUARDED)
1780                         goto out;
1781                 /*
1782                  * Lookup the file so that we can get a vnode for it.
1783                  */
1784                 error = VOP_LOOKUP(dvp, name, &vp, NULL, 0,
1785                     NULL, cr, NULL, NULL, NULL);
1786                 if (error) {
1787                         /*
1788                          * We couldn't find the file that we thought that
1789                          * we just created.  So, we'll just try creating
1790                          * it again.
1791                          */
1792                         if (error == ENOENT)
1793                                 goto tryagain;
1794                         goto out;
1795                 }
1796 
1797                 /*
1798                  * If the file is delegated to a v4 client, go ahead
1799                  * and initiate recall, this create is a hint that a
1800                  * conflicting v3 open has occurred.
1801                  */
1802 
1803                 if (rfs4_check_delegated(FWRITE, vp, FALSE)) {
1804                         VN_RELE(vp);
1805                         resp->status = NFS3ERR_JUKEBOX;
1806                         goto out1;
1807                 }
1808 
1809                 va.va_mask = AT_ALL;
1810                 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
1811 
1812                 mtime = (nfstime3 *)&args->how.createhow3_u.verf;
1813                 /* % with INT32_MAX to prevent overflows */
1814                 if (args->how.mode == EXCLUSIVE && (vap == NULL ||
1815                     vap->va_mtime.tv_sec !=
1816                     (mtime->seconds % INT32_MAX) ||
1817                     vap->va_mtime.tv_nsec != mtime->nseconds)) {
1818                         VN_RELE(vp);
1819                         error = EEXIST;
1820                         goto out;
1821                 }
1822         } else {
1823 
1824                 if ((args->how.mode == UNCHECKED ||
1825                     args->how.mode == GUARDED) &&
1826                     args->how.createhow3_u.obj_attributes.size.set_it &&
1827                     va.va_size == 0)
1828                         trunc = TRUE;
1829                 else
1830                         trunc = FALSE;
1831 
1832                 if (rfs4_check_delegated(FWRITE, vp, trunc)) {
1833                         VN_RELE(vp);
1834                         resp->status = NFS3ERR_JUKEBOX;
1835                         goto out1;
1836                 }
1837 
1838                 va.va_mask = AT_ALL;
1839                 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
1840 
1841                 /*
1842                  * We need to check to make sure that the file got
1843                  * created to the indicated size.  If not, we do a
1844                  * setattr to try to change the size, but we don't
1845                  * try too hard.  This shouldn't a problem as most
1846                  * clients will only specifiy a size of zero which
1847                  * local file systems handle.  However, even if
1848                  * the client does specify a non-zero size, it can
1849                  * still recover by checking the size of the file
1850                  * after it has created it and then issue a setattr
1851                  * request of its own to set the size of the file.
1852                  */
1853                 if (vap != NULL &&
1854                     (args->how.mode == UNCHECKED ||
1855                     args->how.mode == GUARDED) &&
1856                     args->how.createhow3_u.obj_attributes.size.set_it &&
1857                     vap->va_size != reqsize) {
1858                         va.va_mask = AT_SIZE;
1859                         va.va_size = reqsize;
1860                         (void) VOP_SETATTR(vp, &va, 0, cr, NULL);
1861                         va.va_mask = AT_ALL;
1862                         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
1863                 }
1864         }
1865 
1866         if (name != args->where.name)
1867                 kmem_free(name, MAXPATHLEN + 1);
1868 
1869         error = makefh3(&resp->resok.obj.handle, vp, exi);
1870         if (error)
1871                 resp->resok.obj.handle_follows = FALSE;
1872         else
1873                 resp->resok.obj.handle_follows = TRUE;
1874 
1875         /*
1876          * Force modified data and metadata out to stable storage.
1877          */
1878         (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
1879         (void) VOP_FSYNC(dvp, 0, cr, NULL);
1880 
1881         VN_RELE(vp);
1882         if (tvp != NULL) {
1883                 if (in_crit)
1884                         nbl_end_crit(tvp);
1885                 VN_RELE(tvp);
1886         }
1887 
1888         resp->status = NFS3_OK;
1889         vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
1890         vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
1891 
1892         DTRACE_NFSV3_5(op__create__done, struct svc_req *, req,
1893             cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
1894             CREATE3res *, resp);
1895 
1896         VN_RELE(dvp);
1897         return;
1898 
1899 out:
1900         if (curthread->t_flag & T_WOULDBLOCK) {
1901                 curthread->t_flag &= ~T_WOULDBLOCK;
1902                 resp->status = NFS3ERR_JUKEBOX;
1903         } else
1904                 resp->status = puterrno3(error);
1905 out1:
1906         DTRACE_NFSV3_5(op__create__done, struct svc_req *, req,
1907             cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
1908             CREATE3res *, resp);
1909 
1910         if (name != NULL && name != args->where.name)
1911                 kmem_free(name, MAXPATHLEN + 1);
1912 
1913         if (tvp != NULL) {
1914                 if (in_crit)
1915                         nbl_end_crit(tvp);
1916                 VN_RELE(tvp);
1917         }
1918         if (dvp != NULL)
1919                 VN_RELE(dvp);
1920         vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc);
1921 }
1922 
1923 void *
1924 rfs3_create_getfh(CREATE3args *args)
1925 {
1926 
1927         return (&args->where.dir);
1928 }
1929 
1930 void
1931 rfs3_mkdir(MKDIR3args *args, MKDIR3res *resp, struct exportinfo *exi,
1932     struct svc_req *req, cred_t *cr, bool_t ro)
1933 {
1934         int error;
1935         vnode_t *vp = NULL;
1936         vnode_t *dvp;
1937         struct vattr *vap;
1938         struct vattr va;
1939         struct vattr *dbvap;
1940         struct vattr dbva;
1941         struct vattr *davap;
1942         struct vattr dava;
1943         struct sockaddr *ca;
1944         char *name = NULL;
1945 
1946         dbvap = NULL;
1947         davap = NULL;
1948 
1949         dvp = nfs3_fhtovp(&args->where.dir, exi);
1950 
1951         DTRACE_NFSV3_5(op__mkdir__start, struct svc_req *, req,
1952             cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
1953             MKDIR3args *, args);
1954 
1955         if (dvp == NULL) {
1956                 error = ESTALE;
1957                 goto out;
1958         }
1959 
1960         dbva.va_mask = AT_ALL;
1961         dbvap = VOP_GETATTR(dvp, &dbva, 0, cr, NULL) ? NULL : &dbva;
1962         davap = dbvap;
1963 
1964         if (args->where.name == nfs3nametoolong) {
1965                 resp->status = NFS3ERR_NAMETOOLONG;
1966                 goto out1;
1967         }
1968 
1969         if (args->where.name == NULL || *(args->where.name) == '\0') {
1970                 resp->status = NFS3ERR_ACCES;
1971                 goto out1;
1972         }
1973 
1974         if (rdonly(ro, dvp)) {
1975                 resp->status = NFS3ERR_ROFS;
1976                 goto out1;
1977         }
1978 
1979         if (is_system_labeled()) {
1980                 bslabel_t *clabel = req->rq_label;
1981 
1982                 ASSERT(clabel != NULL);
1983                 DTRACE_PROBE2(tx__rfs3__log__info__opmkdir__clabel, char *,
1984                     "got client label from request(1)", struct svc_req *, req);
1985 
1986                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
1987                         if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
1988                             exi)) {
1989                                 resp->status = NFS3ERR_ACCES;
1990                                 goto out1;
1991                         }
1992                 }
1993         }
1994 
1995         error = sattr3_to_vattr(&args->attributes, &va);
1996         if (error)
1997                 goto out;
1998 
1999         if (!(va.va_mask & AT_MODE)) {
2000                 resp->status = NFS3ERR_INVAL;
2001                 goto out1;
2002         }
2003 
2004         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2005         name = nfscmd_convname(ca, exi, args->where.name,
2006             NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2007 
2008         if (name == NULL) {
2009                 resp->status = NFS3ERR_INVAL;
2010                 goto out1;
2011         }
2012 
2013         va.va_mask |= AT_TYPE;
2014         va.va_type = VDIR;
2015 
2016         error = VOP_MKDIR(dvp, name, &va, &vp, cr, NULL, 0, NULL);
2017 
2018         if (name != args->where.name)
2019                 kmem_free(name, MAXPATHLEN + 1);
2020 
2021         dava.va_mask = AT_ALL;
2022         davap = VOP_GETATTR(dvp, &dava, 0, cr, NULL) ? NULL : &dava;
2023 
2024         /*
2025          * Force modified data and metadata out to stable storage.
2026          */
2027         (void) VOP_FSYNC(dvp, 0, cr, NULL);
2028 
2029         if (error)
2030                 goto out;
2031 
2032         error = makefh3(&resp->resok.obj.handle, vp, exi);
2033         if (error)
2034                 resp->resok.obj.handle_follows = FALSE;
2035         else
2036                 resp->resok.obj.handle_follows = TRUE;
2037 
2038         va.va_mask = AT_ALL;
2039         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
2040 
2041         /*
2042          * Force modified data and metadata out to stable storage.
2043          */
2044         (void) VOP_FSYNC(vp, 0, cr, NULL);
2045 
2046         VN_RELE(vp);
2047 
2048         resp->status = NFS3_OK;
2049         vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
2050         vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
2051 
2052         DTRACE_NFSV3_5(op__mkdir__done, struct svc_req *, req,
2053             cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
2054             MKDIR3res *, resp);
2055         VN_RELE(dvp);
2056 
2057         return;
2058 
2059 out:
2060         if (curthread->t_flag & T_WOULDBLOCK) {
2061                 curthread->t_flag &= ~T_WOULDBLOCK;
2062                 resp->status = NFS3ERR_JUKEBOX;
2063         } else
2064                 resp->status = puterrno3(error);
2065 out1:
2066         DTRACE_NFSV3_5(op__mkdir__done, struct svc_req *, req,
2067             cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
2068             MKDIR3res *, resp);
2069         if (dvp != NULL)
2070                 VN_RELE(dvp);
2071         vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc);
2072 }
2073 
2074 void *
2075 rfs3_mkdir_getfh(MKDIR3args *args)
2076 {
2077 
2078         return (&args->where.dir);
2079 }
2080 
2081 void
2082 rfs3_symlink(SYMLINK3args *args, SYMLINK3res *resp, struct exportinfo *exi,
2083     struct svc_req *req, cred_t *cr, bool_t ro)
2084 {
2085         int error;
2086         vnode_t *vp;
2087         vnode_t *dvp;
2088         struct vattr *vap;
2089         struct vattr va;
2090         struct vattr *dbvap;
2091         struct vattr dbva;
2092         struct vattr *davap;
2093         struct vattr dava;
2094         struct sockaddr *ca;
2095         char *name = NULL;
2096         char *symdata = NULL;
2097 
2098         dbvap = NULL;
2099         davap = NULL;
2100 
2101         dvp = nfs3_fhtovp(&args->where.dir, exi);
2102 
2103         DTRACE_NFSV3_5(op__symlink__start, struct svc_req *, req,
2104             cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
2105             SYMLINK3args *, args);
2106 
2107         if (dvp == NULL) {
2108                 error = ESTALE;
2109                 goto err;
2110         }
2111 
2112         dbva.va_mask = AT_ALL;
2113         dbvap = VOP_GETATTR(dvp, &dbva, 0, cr, NULL) ? NULL : &dbva;
2114         davap = dbvap;
2115 
2116         if (args->where.name == nfs3nametoolong) {
2117                 resp->status = NFS3ERR_NAMETOOLONG;
2118                 goto err1;
2119         }
2120 
2121         if (args->where.name == NULL || *(args->where.name) == '\0') {
2122                 resp->status = NFS3ERR_ACCES;
2123                 goto err1;
2124         }
2125 
2126         if (rdonly(ro, dvp)) {
2127                 resp->status = NFS3ERR_ROFS;
2128                 goto err1;
2129         }
2130 
2131         if (is_system_labeled()) {
2132                 bslabel_t *clabel = req->rq_label;
2133 
2134                 ASSERT(clabel != NULL);
2135                 DTRACE_PROBE2(tx__rfs3__log__info__opsymlink__clabel, char *,
2136                     "got client label from request(1)", struct svc_req *, req);
2137 
2138                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2139                         if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
2140                             exi)) {
2141                                 resp->status = NFS3ERR_ACCES;
2142                                 goto err1;
2143                         }
2144                 }
2145         }
2146 
2147         error = sattr3_to_vattr(&args->symlink.symlink_attributes, &va);
2148         if (error)
2149                 goto err;
2150 
2151         if (!(va.va_mask & AT_MODE)) {
2152                 resp->status = NFS3ERR_INVAL;
2153                 goto err1;
2154         }
2155 
2156         if (args->symlink.symlink_data == nfs3nametoolong) {
2157                 resp->status = NFS3ERR_NAMETOOLONG;
2158                 goto err1;
2159         }
2160 
2161         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2162         name = nfscmd_convname(ca, exi, args->where.name,
2163             NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2164 
2165         if (name == NULL) {
2166                 /* This is really a Solaris EILSEQ */
2167                 resp->status = NFS3ERR_INVAL;
2168                 goto err1;
2169         }
2170 
2171         symdata = nfscmd_convname(ca, exi, args->symlink.symlink_data,
2172             NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2173         if (symdata == NULL) {
2174                 /* This is really a Solaris EILSEQ */
2175                 resp->status = NFS3ERR_INVAL;
2176                 goto err1;
2177         }
2178 
2179 
2180         va.va_mask |= AT_TYPE;
2181         va.va_type = VLNK;
2182 
2183         error = VOP_SYMLINK(dvp, name, &va, symdata, cr, NULL, 0);
2184 
2185         dava.va_mask = AT_ALL;
2186         davap = VOP_GETATTR(dvp, &dava, 0, cr, NULL) ? NULL : &dava;
2187 
2188         if (error)
2189                 goto err;
2190 
2191         error = VOP_LOOKUP(dvp, name, &vp, NULL, 0, NULL, cr,
2192             NULL, NULL, NULL);
2193 
2194         /*
2195          * Force modified data and metadata out to stable storage.
2196          */
2197         (void) VOP_FSYNC(dvp, 0, cr, NULL);
2198 
2199 
2200         resp->status = NFS3_OK;
2201         if (error) {
2202                 resp->resok.obj.handle_follows = FALSE;
2203                 vattr_to_post_op_attr(NULL, &resp->resok.obj_attributes);
2204                 vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
2205                 goto out;
2206         }
2207 
2208         error = makefh3(&resp->resok.obj.handle, vp, exi);
2209         if (error)
2210                 resp->resok.obj.handle_follows = FALSE;
2211         else
2212                 resp->resok.obj.handle_follows = TRUE;
2213 
2214         va.va_mask = AT_ALL;
2215         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
2216 
2217         /*
2218          * Force modified data and metadata out to stable storage.
2219          */
2220         (void) VOP_FSYNC(vp, 0, cr, NULL);
2221 
2222         VN_RELE(vp);
2223 
2224         vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
2225         vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
2226         goto out;
2227 
2228 err:
2229         if (curthread->t_flag & T_WOULDBLOCK) {
2230                 curthread->t_flag &= ~T_WOULDBLOCK;
2231                 resp->status = NFS3ERR_JUKEBOX;
2232         } else
2233                 resp->status = puterrno3(error);
2234 err1:
2235         vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc);
2236 out:
2237         if (name != NULL && name != args->where.name)
2238                 kmem_free(name, MAXPATHLEN + 1);
2239         if (symdata != NULL && symdata != args->symlink.symlink_data)
2240                 kmem_free(symdata, MAXPATHLEN + 1);
2241 
2242         DTRACE_NFSV3_5(op__symlink__done, struct svc_req *, req,
2243             cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
2244             SYMLINK3res *, resp);
2245 
2246         if (dvp != NULL)
2247                 VN_RELE(dvp);
2248 }
2249 
2250 void *
2251 rfs3_symlink_getfh(SYMLINK3args *args)
2252 {
2253 
2254         return (&args->where.dir);
2255 }
2256 
2257 void
2258 rfs3_mknod(MKNOD3args *args, MKNOD3res *resp, struct exportinfo *exi,
2259     struct svc_req *req, cred_t *cr, bool_t ro)
2260 {
2261         int error;
2262         vnode_t *vp;
2263         vnode_t *realvp;
2264         vnode_t *dvp;
2265         struct vattr *vap;
2266         struct vattr va;
2267         struct vattr *dbvap;
2268         struct vattr dbva;
2269         struct vattr *davap;
2270         struct vattr dava;
2271         int mode;
2272         enum vcexcl excl;
2273         struct sockaddr *ca;
2274         char *name = NULL;
2275 
2276         dbvap = NULL;
2277         davap = NULL;
2278 
2279         dvp = nfs3_fhtovp(&args->where.dir, exi);
2280 
2281         DTRACE_NFSV3_5(op__mknod__start, struct svc_req *, req,
2282             cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
2283             MKNOD3args *, args);
2284 
2285         if (dvp == NULL) {
2286                 error = ESTALE;
2287                 goto out;
2288         }
2289 
2290         dbva.va_mask = AT_ALL;
2291         dbvap = VOP_GETATTR(dvp, &dbva, 0, cr, NULL) ? NULL : &dbva;
2292         davap = dbvap;
2293 
2294         if (args->where.name == nfs3nametoolong) {
2295                 resp->status = NFS3ERR_NAMETOOLONG;
2296                 goto out1;
2297         }
2298 
2299         if (args->where.name == NULL || *(args->where.name) == '\0') {
2300                 resp->status = NFS3ERR_ACCES;
2301                 goto out1;
2302         }
2303 
2304         if (rdonly(ro, dvp)) {
2305                 resp->status = NFS3ERR_ROFS;
2306                 goto out1;
2307         }
2308 
2309         if (is_system_labeled()) {
2310                 bslabel_t *clabel = req->rq_label;
2311 
2312                 ASSERT(clabel != NULL);
2313                 DTRACE_PROBE2(tx__rfs3__log__info__opmknod__clabel, char *,
2314                     "got client label from request(1)", struct svc_req *, req);
2315 
2316                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2317                         if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
2318                             exi)) {
2319                                 resp->status = NFS3ERR_ACCES;
2320                                 goto out1;
2321                         }
2322                 }
2323         }
2324 
2325         switch (args->what.type) {
2326         case NF3CHR:
2327         case NF3BLK:
2328                 error = sattr3_to_vattr(
2329                     &args->what.mknoddata3_u.device.dev_attributes, &va);
2330                 if (error)
2331                         goto out;
2332                 if (secpolicy_sys_devices(cr) != 0) {
2333                         resp->status = NFS3ERR_PERM;
2334                         goto out1;
2335                 }
2336                 if (args->what.type == NF3CHR)
2337                         va.va_type = VCHR;
2338                 else
2339                         va.va_type = VBLK;
2340                 va.va_rdev = makedevice(
2341                     args->what.mknoddata3_u.device.spec.specdata1,
2342                     args->what.mknoddata3_u.device.spec.specdata2);
2343                 va.va_mask |= AT_TYPE | AT_RDEV;
2344                 break;
2345         case NF3SOCK:
2346                 error = sattr3_to_vattr(
2347                     &args->what.mknoddata3_u.pipe_attributes, &va);
2348                 if (error)
2349                         goto out;
2350                 va.va_type = VSOCK;
2351                 va.va_mask |= AT_TYPE;
2352                 break;
2353         case NF3FIFO:
2354                 error = sattr3_to_vattr(
2355                     &args->what.mknoddata3_u.pipe_attributes, &va);
2356                 if (error)
2357                         goto out;
2358                 va.va_type = VFIFO;
2359                 va.va_mask |= AT_TYPE;
2360                 break;
2361         default:
2362                 resp->status = NFS3ERR_BADTYPE;
2363                 goto out1;
2364         }
2365 
2366         /*
2367          * Must specify the mode.
2368          */
2369         if (!(va.va_mask & AT_MODE)) {
2370                 resp->status = NFS3ERR_INVAL;
2371                 goto out1;
2372         }
2373 
2374         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2375         name = nfscmd_convname(ca, exi, args->where.name,
2376             NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2377 
2378         if (name == NULL) {
2379                 resp->status = NFS3ERR_INVAL;
2380                 goto out1;
2381         }
2382 
2383         excl = EXCL;
2384 
2385         mode = 0;
2386 
2387         error = VOP_CREATE(dvp, name, &va, excl, mode,
2388             &vp, cr, 0, NULL, NULL);
2389 
2390         if (name != args->where.name)
2391                 kmem_free(name, MAXPATHLEN + 1);
2392 
2393         dava.va_mask = AT_ALL;
2394         davap = VOP_GETATTR(dvp, &dava, 0, cr, NULL) ? NULL : &dava;
2395 
2396         /*
2397          * Force modified data and metadata out to stable storage.
2398          */
2399         (void) VOP_FSYNC(dvp, 0, cr, NULL);
2400 
2401         if (error)
2402                 goto out;
2403 
2404         resp->status = NFS3_OK;
2405 
2406         error = makefh3(&resp->resok.obj.handle, vp, exi);
2407         if (error)
2408                 resp->resok.obj.handle_follows = FALSE;
2409         else
2410                 resp->resok.obj.handle_follows = TRUE;
2411 
2412         va.va_mask = AT_ALL;
2413         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
2414 
2415         /*
2416          * Force modified metadata out to stable storage.
2417          *
2418          * if a underlying vp exists, pass it to VOP_FSYNC
2419          */
2420         if (VOP_REALVP(vp, &realvp, NULL) == 0)
2421                 (void) VOP_FSYNC(realvp, FNODSYNC, cr, NULL);
2422         else
2423                 (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
2424 
2425         VN_RELE(vp);
2426 
2427         vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
2428         vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
2429         DTRACE_NFSV3_5(op__mknod__done, struct svc_req *, req,
2430             cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
2431             MKNOD3res *, resp);
2432         VN_RELE(dvp);
2433         return;
2434 
2435 out:
2436         if (curthread->t_flag & T_WOULDBLOCK) {
2437                 curthread->t_flag &= ~T_WOULDBLOCK;
2438                 resp->status = NFS3ERR_JUKEBOX;
2439         } else
2440                 resp->status = puterrno3(error);
2441 out1:
2442         DTRACE_NFSV3_5(op__mknod__done, struct svc_req *, req,
2443             cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
2444             MKNOD3res *, resp);
2445         if (dvp != NULL)
2446                 VN_RELE(dvp);
2447         vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc);
2448 }
2449 
2450 void *
2451 rfs3_mknod_getfh(MKNOD3args *args)
2452 {
2453 
2454         return (&args->where.dir);
2455 }
2456 
2457 void
2458 rfs3_remove(REMOVE3args *args, REMOVE3res *resp, struct exportinfo *exi,
2459     struct svc_req *req, cred_t *cr, bool_t ro)
2460 {
2461         int error = 0;
2462         vnode_t *vp;
2463         struct vattr *bvap;
2464         struct vattr bva;
2465         struct vattr *avap;
2466         struct vattr ava;
2467         vnode_t *targvp = NULL;
2468         struct sockaddr *ca;
2469         char *name = NULL;
2470 
2471         bvap = NULL;
2472         avap = NULL;
2473 
2474         vp = nfs3_fhtovp(&args->object.dir, exi);
2475 
2476         DTRACE_NFSV3_5(op__remove__start, struct svc_req *, req,
2477             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
2478             REMOVE3args *, args);
2479 
2480         if (vp == NULL) {
2481                 error = ESTALE;
2482                 goto err;
2483         }
2484 
2485         bva.va_mask = AT_ALL;
2486         bvap = VOP_GETATTR(vp, &bva, 0, cr, NULL) ? NULL : &bva;
2487         avap = bvap;
2488 
2489         if (vp->v_type != VDIR) {
2490                 resp->status = NFS3ERR_NOTDIR;
2491                 goto err1;
2492         }
2493 
2494         if (args->object.name == nfs3nametoolong) {
2495                 resp->status = NFS3ERR_NAMETOOLONG;
2496                 goto err1;
2497         }
2498 
2499         if (args->object.name == NULL || *(args->object.name) == '\0') {
2500                 resp->status = NFS3ERR_ACCES;
2501                 goto err1;
2502         }
2503 
2504         if (rdonly(ro, vp)) {
2505                 resp->status = NFS3ERR_ROFS;
2506                 goto err1;
2507         }
2508 
2509         if (is_system_labeled()) {
2510                 bslabel_t *clabel = req->rq_label;
2511 
2512                 ASSERT(clabel != NULL);
2513                 DTRACE_PROBE2(tx__rfs3__log__info__opremove__clabel, char *,
2514                     "got client label from request(1)", struct svc_req *, req);
2515 
2516                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2517                         if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
2518                             exi)) {
2519                                 resp->status = NFS3ERR_ACCES;
2520                                 goto err1;
2521                         }
2522                 }
2523         }
2524 
2525         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2526         name = nfscmd_convname(ca, exi, args->object.name,
2527             NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2528 
2529         if (name == NULL) {
2530                 resp->status = NFS3ERR_INVAL;
2531                 goto err1;
2532         }
2533 
2534         /*
2535          * Check for a conflict with a non-blocking mandatory share
2536          * reservation and V4 delegations
2537          */
2538         error = VOP_LOOKUP(vp, name, &targvp, NULL, 0,
2539             NULL, cr, NULL, NULL, NULL);
2540         if (error != 0)
2541                 goto err;
2542 
2543         if (rfs4_check_delegated(FWRITE, targvp, TRUE)) {
2544                 resp->status = NFS3ERR_JUKEBOX;
2545                 goto err1;
2546         }
2547 
2548         if (!nbl_need_check(targvp)) {
2549                 error = VOP_REMOVE(vp, name, cr, NULL, 0);
2550         } else {
2551                 nbl_start_crit(targvp, RW_READER);
2552                 if (nbl_conflict(targvp, NBL_REMOVE, 0, 0, 0, NULL)) {
2553                         error = EACCES;
2554                 } else {
2555                         error = VOP_REMOVE(vp, name, cr, NULL, 0);
2556                 }
2557                 nbl_end_crit(targvp);
2558         }
2559         VN_RELE(targvp);
2560         targvp = NULL;
2561 
2562         ava.va_mask = AT_ALL;
2563         avap = VOP_GETATTR(vp, &ava, 0, cr, NULL) ? NULL : &ava;
2564 
2565         /*
2566          * Force modified data and metadata out to stable storage.
2567          */
2568         (void) VOP_FSYNC(vp, 0, cr, NULL);
2569 
2570         if (error)
2571                 goto err;
2572 
2573         resp->status = NFS3_OK;
2574         vattr_to_wcc_data(bvap, avap, &resp->resok.dir_wcc);
2575         goto out;
2576 
2577 err:
2578         if (curthread->t_flag & T_WOULDBLOCK) {
2579                 curthread->t_flag &= ~T_WOULDBLOCK;
2580                 resp->status = NFS3ERR_JUKEBOX;
2581         } else
2582                 resp->status = puterrno3(error);
2583 err1:
2584         vattr_to_wcc_data(bvap, avap, &resp->resfail.dir_wcc);
2585 out:
2586         DTRACE_NFSV3_5(op__remove__done, struct svc_req *, req,
2587             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
2588             REMOVE3res *, resp);
2589 
2590         if (name != NULL && name != args->object.name)
2591                 kmem_free(name, MAXPATHLEN + 1);
2592 
2593         if (vp != NULL)
2594                 VN_RELE(vp);
2595 }
2596 
2597 void *
2598 rfs3_remove_getfh(REMOVE3args *args)
2599 {
2600 
2601         return (&args->object.dir);
2602 }
2603 
2604 void
2605 rfs3_rmdir(RMDIR3args *args, RMDIR3res *resp, struct exportinfo *exi,
2606     struct svc_req *req, cred_t *cr, bool_t ro)
2607 {
2608         int error;
2609         vnode_t *vp;
2610         struct vattr *bvap;
2611         struct vattr bva;
2612         struct vattr *avap;
2613         struct vattr ava;
2614         struct sockaddr *ca;
2615         char *name = NULL;
2616 
2617         bvap = NULL;
2618         avap = NULL;
2619 
2620         vp = nfs3_fhtovp(&args->object.dir, exi);
2621 
2622         DTRACE_NFSV3_5(op__rmdir__start, struct svc_req *, req,
2623             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
2624             RMDIR3args *, args);
2625 
2626         if (vp == NULL) {
2627                 error = ESTALE;
2628                 goto err;
2629         }
2630 
2631         bva.va_mask = AT_ALL;
2632         bvap = VOP_GETATTR(vp, &bva, 0, cr, NULL) ? NULL : &bva;
2633         avap = bvap;
2634 
2635         if (vp->v_type != VDIR) {
2636                 resp->status = NFS3ERR_NOTDIR;
2637                 goto err1;
2638         }
2639 
2640         if (args->object.name == nfs3nametoolong) {
2641                 resp->status = NFS3ERR_NAMETOOLONG;
2642                 goto err1;
2643         }
2644 
2645         if (args->object.name == NULL || *(args->object.name) == '\0') {
2646                 resp->status = NFS3ERR_ACCES;
2647                 goto err1;
2648         }
2649 
2650         if (rdonly(ro, vp)) {
2651                 resp->status = NFS3ERR_ROFS;
2652                 goto err1;
2653         }
2654 
2655         if (is_system_labeled()) {
2656                 bslabel_t *clabel = req->rq_label;
2657 
2658                 ASSERT(clabel != NULL);
2659                 DTRACE_PROBE2(tx__rfs3__log__info__opremovedir__clabel, char *,
2660                     "got client label from request(1)", struct svc_req *, req);
2661 
2662                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2663                         if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
2664                             exi)) {
2665                                 resp->status = NFS3ERR_ACCES;
2666                                 goto err1;
2667                         }
2668                 }
2669         }
2670 
2671         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2672         name = nfscmd_convname(ca, exi, args->object.name,
2673             NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2674 
2675         if (name == NULL) {
2676                 resp->status = NFS3ERR_INVAL;
2677                 goto err1;
2678         }
2679 
2680         ASSERT3U(exi->exi_zoneid, ==, curzone->zone_id);
2681         error = VOP_RMDIR(vp, name, ZONE_ROOTVP(), cr, NULL, 0);
2682 
2683         if (name != args->object.name)
2684                 kmem_free(name, MAXPATHLEN + 1);
2685 
2686         ava.va_mask = AT_ALL;
2687         avap = VOP_GETATTR(vp, &ava, 0, cr, NULL) ? NULL : &ava;
2688 
2689         /*
2690          * Force modified data and metadata out to stable storage.
2691          */
2692         (void) VOP_FSYNC(vp, 0, cr, NULL);
2693 
2694         if (error) {
2695                 /*
2696                  * System V defines rmdir to return EEXIST, not ENOTEMPTY,
2697                  * if the directory is not empty.  A System V NFS server
2698                  * needs to map NFS3ERR_EXIST to NFS3ERR_NOTEMPTY to transmit
2699                  * over the wire.
2700                  */
2701                 if (error == EEXIST)
2702                         error = ENOTEMPTY;
2703                 goto err;
2704         }
2705 
2706         resp->status = NFS3_OK;
2707         vattr_to_wcc_data(bvap, avap, &resp->resok.dir_wcc);
2708         goto out;
2709 
2710 err:
2711         if (curthread->t_flag & T_WOULDBLOCK) {
2712                 curthread->t_flag &= ~T_WOULDBLOCK;
2713                 resp->status = NFS3ERR_JUKEBOX;
2714         } else
2715                 resp->status = puterrno3(error);
2716 err1:
2717         vattr_to_wcc_data(bvap, avap, &resp->resfail.dir_wcc);
2718 out:
2719         DTRACE_NFSV3_5(op__rmdir__done, struct svc_req *, req,
2720             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
2721             RMDIR3res *, resp);
2722         if (vp != NULL)
2723                 VN_RELE(vp);
2724 
2725 }
2726 
2727 void *
2728 rfs3_rmdir_getfh(RMDIR3args *args)
2729 {
2730 
2731         return (&args->object.dir);
2732 }
2733 
2734 void
2735 rfs3_rename(RENAME3args *args, RENAME3res *resp, struct exportinfo *exi,
2736     struct svc_req *req, cred_t *cr, bool_t ro)
2737 {
2738         int error = 0;
2739         vnode_t *fvp;
2740         vnode_t *tvp;
2741         vnode_t *targvp;
2742         struct vattr *fbvap;
2743         struct vattr fbva;
2744         struct vattr *favap;
2745         struct vattr fava;
2746         struct vattr *tbvap;
2747         struct vattr tbva;
2748         struct vattr *tavap;
2749         struct vattr tava;
2750         nfs_fh3 *fh3;
2751         struct exportinfo *to_exi;
2752         vnode_t *srcvp = NULL;
2753         bslabel_t *clabel;
2754         struct sockaddr *ca;
2755         char *name = NULL;
2756         char *toname = NULL;
2757 
2758         fbvap = NULL;
2759         favap = NULL;
2760         tbvap = NULL;
2761         tavap = NULL;
2762         tvp = NULL;
2763 
2764         fvp = nfs3_fhtovp(&args->from.dir, exi);
2765 
2766         DTRACE_NFSV3_5(op__rename__start, struct svc_req *, req,
2767             cred_t *, cr, vnode_t *, fvp, struct exportinfo *, exi,
2768             RENAME3args *, args);
2769 
2770         if (fvp == NULL) {
2771                 error = ESTALE;
2772                 goto err;
2773         }
2774 
2775         if (is_system_labeled()) {
2776                 clabel = req->rq_label;
2777                 ASSERT(clabel != NULL);
2778                 DTRACE_PROBE2(tx__rfs3__log__info__oprename__clabel, char *,
2779                     "got client label from request(1)", struct svc_req *, req);
2780 
2781                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2782                         if (!do_rfs_label_check(clabel, fvp, EQUALITY_CHECK,
2783                             exi)) {
2784                                 resp->status = NFS3ERR_ACCES;
2785                                 goto err1;
2786                         }
2787                 }
2788         }
2789 
2790         fbva.va_mask = AT_ALL;
2791         fbvap = VOP_GETATTR(fvp, &fbva, 0, cr, NULL) ? NULL : &fbva;
2792         favap = fbvap;
2793 
2794         fh3 = &args->to.dir;
2795         to_exi = checkexport(&fh3->fh3_fsid, FH3TOXFIDP(fh3));
2796         if (to_exi == NULL) {
2797                 resp->status = NFS3ERR_ACCES;
2798                 goto err1;
2799         }
2800         exi_rele(to_exi);
2801 
2802         if (to_exi != exi) {
2803                 resp->status = NFS3ERR_XDEV;
2804                 goto err1;
2805         }
2806 
2807         tvp = nfs3_fhtovp(&args->to.dir, exi);
2808         if (tvp == NULL) {
2809                 error = ESTALE;
2810                 goto err;
2811         }
2812 
2813         tbva.va_mask = AT_ALL;
2814         tbvap = VOP_GETATTR(tvp, &tbva, 0, cr, NULL) ? NULL : &tbva;
2815         tavap = tbvap;
2816 
2817         if (fvp->v_type != VDIR || tvp->v_type != VDIR) {
2818                 resp->status = NFS3ERR_NOTDIR;
2819                 goto err1;
2820         }
2821 
2822         if (args->from.name == nfs3nametoolong ||
2823             args->to.name == nfs3nametoolong) {
2824                 resp->status = NFS3ERR_NAMETOOLONG;
2825                 goto err1;
2826         }
2827         if (args->from.name == NULL || *(args->from.name) == '\0' ||
2828             args->to.name == NULL || *(args->to.name) == '\0') {
2829                 resp->status = NFS3ERR_ACCES;
2830                 goto err1;
2831         }
2832 
2833         if (rdonly(ro, tvp)) {
2834                 resp->status = NFS3ERR_ROFS;
2835                 goto err1;
2836         }
2837 
2838         if (is_system_labeled()) {
2839                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2840                         if (!do_rfs_label_check(clabel, tvp, EQUALITY_CHECK,
2841                             exi)) {
2842                                 resp->status = NFS3ERR_ACCES;
2843                                 goto err1;
2844                         }
2845                 }
2846         }
2847 
2848         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2849         name = nfscmd_convname(ca, exi, args->from.name,
2850             NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2851 
2852         if (name == NULL) {
2853                 resp->status = NFS3ERR_INVAL;
2854                 goto err1;
2855         }
2856 
2857         toname = nfscmd_convname(ca, exi, args->to.name,
2858             NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2859 
2860         if (toname == NULL) {
2861                 resp->status = NFS3ERR_INVAL;
2862                 goto err1;
2863         }
2864 
2865         /*
2866          * Check for a conflict with a non-blocking mandatory share
2867          * reservation or V4 delegations.
2868          */
2869         error = VOP_LOOKUP(fvp, name, &srcvp, NULL, 0,
2870             NULL, cr, NULL, NULL, NULL);
2871         if (error != 0)
2872                 goto err;
2873 
2874         /*
2875          * If we rename a delegated file we should recall the
2876          * delegation, since future opens should fail or would
2877          * refer to a new file.
2878          */
2879         if (rfs4_check_delegated(FWRITE, srcvp, FALSE)) {
2880                 resp->status = NFS3ERR_JUKEBOX;
2881                 goto err1;
2882         }
2883 
2884         /*
2885          * Check for renaming over a delegated file.  Check nfs4_deleg_policy
2886          * first to avoid VOP_LOOKUP if possible.
2887          */
2888         if (nfs4_get_deleg_policy() != SRV_NEVER_DELEGATE &&
2889             VOP_LOOKUP(tvp, toname, &targvp, NULL, 0, NULL, cr,
2890             NULL, NULL, NULL) == 0) {
2891 
2892                 if (rfs4_check_delegated(FWRITE, targvp, TRUE)) {
2893                         VN_RELE(targvp);
2894                         resp->status = NFS3ERR_JUKEBOX;
2895                         goto err1;
2896                 }
2897                 VN_RELE(targvp);
2898         }
2899 
2900         if (!nbl_need_check(srcvp)) {
2901                 error = VOP_RENAME(fvp, name, tvp, toname, cr, NULL, 0);
2902         } else {
2903                 nbl_start_crit(srcvp, RW_READER);
2904                 if (nbl_conflict(srcvp, NBL_RENAME, 0, 0, 0, NULL))
2905                         error = EACCES;
2906                 else
2907                         error = VOP_RENAME(fvp, name, tvp, toname, cr, NULL, 0);
2908                 nbl_end_crit(srcvp);
2909         }
2910         if (error == 0)
2911                 vn_renamepath(tvp, srcvp, args->to.name,
2912                     strlen(args->to.name));
2913         VN_RELE(srcvp);
2914         srcvp = NULL;
2915 
2916         fava.va_mask = AT_ALL;
2917         favap = VOP_GETATTR(fvp, &fava, 0, cr, NULL) ? NULL : &fava;
2918         tava.va_mask = AT_ALL;
2919         tavap = VOP_GETATTR(tvp, &tava, 0, cr, NULL) ? NULL : &tava;
2920 
2921         /*
2922          * Force modified data and metadata out to stable storage.
2923          */
2924         (void) VOP_FSYNC(fvp, 0, cr, NULL);
2925         (void) VOP_FSYNC(tvp, 0, cr, NULL);
2926 
2927         if (error)
2928                 goto err;
2929 
2930         resp->status = NFS3_OK;
2931         vattr_to_wcc_data(fbvap, favap, &resp->resok.fromdir_wcc);
2932         vattr_to_wcc_data(tbvap, tavap, &resp->resok.todir_wcc);
2933         goto out;
2934 
2935 err:
2936         if (curthread->t_flag & T_WOULDBLOCK) {
2937                 curthread->t_flag &= ~T_WOULDBLOCK;
2938                 resp->status = NFS3ERR_JUKEBOX;
2939         } else {
2940                 resp->status = puterrno3(error);
2941         }
2942 err1:
2943         vattr_to_wcc_data(fbvap, favap, &resp->resfail.fromdir_wcc);
2944         vattr_to_wcc_data(tbvap, tavap, &resp->resfail.todir_wcc);
2945 
2946 out:
2947         if (name != NULL && name != args->from.name)
2948                 kmem_free(name, MAXPATHLEN + 1);
2949         if (toname != NULL && toname != args->to.name)
2950                 kmem_free(toname, MAXPATHLEN + 1);
2951 
2952         DTRACE_NFSV3_5(op__rename__done, struct svc_req *, req,
2953             cred_t *, cr, vnode_t *, fvp, struct exportinfo *, exi,
2954             RENAME3res *, resp);
2955         if (fvp != NULL)
2956                 VN_RELE(fvp);
2957         if (tvp != NULL)
2958                 VN_RELE(tvp);
2959 }
2960 
2961 void *
2962 rfs3_rename_getfh(RENAME3args *args)
2963 {
2964 
2965         return (&args->from.dir);
2966 }
2967 
2968 void
2969 rfs3_link(LINK3args *args, LINK3res *resp, struct exportinfo *exi,
2970     struct svc_req *req, cred_t *cr, bool_t ro)
2971 {
2972         int error;
2973         vnode_t *vp;
2974         vnode_t *dvp;
2975         struct vattr *vap;
2976         struct vattr va;
2977         struct vattr *bvap;
2978         struct vattr bva;
2979         struct vattr *avap;
2980         struct vattr ava;
2981         nfs_fh3 *fh3;
2982         struct exportinfo *to_exi;
2983         bslabel_t *clabel;
2984         struct sockaddr *ca;
2985         char *name = NULL;
2986 
2987         vap = NULL;
2988         bvap = NULL;
2989         avap = NULL;
2990         dvp = NULL;
2991 
2992         vp = nfs3_fhtovp(&args->file, exi);
2993 
2994         DTRACE_NFSV3_5(op__link__start, struct svc_req *, req,
2995             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
2996             LINK3args *, args);
2997 
2998         if (vp == NULL) {
2999                 error = ESTALE;
3000                 goto out;
3001         }
3002 
3003         va.va_mask = AT_ALL;
3004         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3005 
3006         fh3 = &args->link.dir;
3007         to_exi = checkexport(&fh3->fh3_fsid, FH3TOXFIDP(fh3));
3008         if (to_exi == NULL) {
3009                 resp->status = NFS3ERR_ACCES;
3010                 goto out1;
3011         }
3012         exi_rele(to_exi);
3013 
3014         if (to_exi != exi) {
3015                 resp->status = NFS3ERR_XDEV;
3016                 goto out1;
3017         }
3018 
3019         if (is_system_labeled()) {
3020                 clabel = req->rq_label;
3021 
3022                 ASSERT(clabel != NULL);
3023                 DTRACE_PROBE2(tx__rfs3__log__info__oplink__clabel, char *,
3024                     "got client label from request(1)", struct svc_req *, req);
3025 
3026                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3027                         if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3028                             exi)) {
3029                                 resp->status = NFS3ERR_ACCES;
3030                                 goto out1;
3031                         }
3032                 }
3033         }
3034 
3035         dvp = nfs3_fhtovp(&args->link.dir, exi);
3036         if (dvp == NULL) {
3037                 error = ESTALE;
3038                 goto out;
3039         }
3040 
3041         bva.va_mask = AT_ALL;
3042         bvap = VOP_GETATTR(dvp, &bva, 0, cr, NULL) ? NULL : &bva;
3043 
3044         if (dvp->v_type != VDIR) {
3045                 resp->status = NFS3ERR_NOTDIR;
3046                 goto out1;
3047         }
3048 
3049         if (args->link.name == nfs3nametoolong) {
3050                 resp->status = NFS3ERR_NAMETOOLONG;
3051                 goto out1;
3052         }
3053 
3054         if (args->link.name == NULL || *(args->link.name) == '\0') {
3055                 resp->status = NFS3ERR_ACCES;
3056                 goto out1;
3057         }
3058 
3059         if (rdonly(ro, dvp)) {
3060                 resp->status = NFS3ERR_ROFS;
3061                 goto out1;
3062         }
3063 
3064         if (is_system_labeled()) {
3065                 DTRACE_PROBE2(tx__rfs3__log__info__oplinkdir__clabel, char *,
3066                     "got client label from request(1)", struct svc_req *, req);
3067 
3068                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3069                         if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
3070                             exi)) {
3071                                 resp->status = NFS3ERR_ACCES;
3072                                 goto out1;
3073                         }
3074                 }
3075         }
3076 
3077         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
3078         name = nfscmd_convname(ca, exi, args->link.name,
3079             NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
3080 
3081         if (name == NULL) {
3082                 resp->status = NFS3ERR_SERVERFAULT;
3083                 goto out1;
3084         }
3085 
3086         error = VOP_LINK(dvp, vp, name, cr, NULL, 0);
3087 
3088         va.va_mask = AT_ALL;
3089         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3090         ava.va_mask = AT_ALL;
3091         avap = VOP_GETATTR(dvp, &ava, 0, cr, NULL) ? NULL : &ava;
3092 
3093         /*
3094          * Force modified data and metadata out to stable storage.
3095          */
3096         (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
3097         (void) VOP_FSYNC(dvp, 0, cr, NULL);
3098 
3099         if (error)
3100                 goto out;
3101 
3102         VN_RELE(dvp);
3103 
3104         resp->status = NFS3_OK;
3105         vattr_to_post_op_attr(vap, &resp->resok.file_attributes);
3106         vattr_to_wcc_data(bvap, avap, &resp->resok.linkdir_wcc);
3107 
3108         DTRACE_NFSV3_5(op__link__done, struct svc_req *, req,
3109             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3110             LINK3res *, resp);
3111 
3112         VN_RELE(vp);
3113 
3114         return;
3115 
3116 out:
3117         if (curthread->t_flag & T_WOULDBLOCK) {
3118                 curthread->t_flag &= ~T_WOULDBLOCK;
3119                 resp->status = NFS3ERR_JUKEBOX;
3120         } else
3121                 resp->status = puterrno3(error);
3122 out1:
3123         if (name != NULL && name != args->link.name)
3124                 kmem_free(name, MAXPATHLEN + 1);
3125 
3126         DTRACE_NFSV3_5(op__link__done, struct svc_req *, req,
3127             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3128             LINK3res *, resp);
3129 
3130         if (vp != NULL)
3131                 VN_RELE(vp);
3132         if (dvp != NULL)
3133                 VN_RELE(dvp);
3134         vattr_to_post_op_attr(vap, &resp->resfail.file_attributes);
3135         vattr_to_wcc_data(bvap, avap, &resp->resfail.linkdir_wcc);
3136 }
3137 
3138 void *
3139 rfs3_link_getfh(LINK3args *args)
3140 {
3141 
3142         return (&args->file);
3143 }
3144 
3145 /*
3146  * This macro defines the size of a response which contains attribute
3147  * information and one directory entry (whose length is specified by
3148  * the macro parameter).  If the incoming request is larger than this,
3149  * then we are guaranteed to be able to return at one directory entry
3150  * if one exists.  Therefore, we do not need to check for
3151  * NFS3ERR_TOOSMALL if the requested size is larger then this.  If it
3152  * is not, then we need to check to make sure that this error does not
3153  * need to be returned.
3154  *
3155  * NFS3_READDIR_MIN_COUNT is comprised of following :
3156  *
3157  * status - 1 * BYTES_PER_XDR_UNIT
3158  * attr. flag - 1 * BYTES_PER_XDR_UNIT
3159  * cookie verifier - 2 * BYTES_PER_XDR_UNIT
3160  * attributes  - NFS3_SIZEOF_FATTR3 * BYTES_PER_XDR_UNIT
3161  * boolean - 1 * BYTES_PER_XDR_UNIT
3162  * file id - 2 * BYTES_PER_XDR_UNIT
3163  * directory name length - 1 * BYTES_PER_XDR_UNIT
3164  * cookie - 2 * BYTES_PER_XDR_UNIT
3165  * end of list - 1 * BYTES_PER_XDR_UNIT
3166  * end of file - 1 * BYTES_PER_XDR_UNIT
3167  * Name length of directory to the nearest byte
3168  */
3169 
3170 #define NFS3_READDIR_MIN_COUNT(length)  \
3171         ((1 + 1 + 2 + NFS3_SIZEOF_FATTR3 + 1 + 2 + 1 + 2 + 1 + 1) * \
3172                 BYTES_PER_XDR_UNIT + roundup((length), BYTES_PER_XDR_UNIT))
3173 
3174 /* ARGSUSED */
3175 void
3176 rfs3_readdir(READDIR3args *args, READDIR3res *resp, struct exportinfo *exi,
3177     struct svc_req *req, cred_t *cr, bool_t ro)
3178 {
3179         int error;
3180         vnode_t *vp;
3181         struct vattr *vap;
3182         struct vattr va;
3183         struct iovec iov;
3184         struct uio uio;
3185         char *data;
3186         int iseof;
3187         int bufsize;
3188         int namlen;
3189         uint_t count;
3190         struct sockaddr *ca;
3191 
3192         vap = NULL;
3193 
3194         vp = nfs3_fhtovp(&args->dir, exi);
3195 
3196         DTRACE_NFSV3_5(op__readdir__start, struct svc_req *, req,
3197             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3198             READDIR3args *, args);
3199 
3200         if (vp == NULL) {
3201                 error = ESTALE;
3202                 goto out;
3203         }
3204 
3205         if (is_system_labeled()) {
3206                 bslabel_t *clabel = req->rq_label;
3207 
3208                 ASSERT(clabel != NULL);
3209                 DTRACE_PROBE2(tx__rfs3__log__info__opreaddir__clabel, char *,
3210                     "got client label from request(1)", struct svc_req *, req);
3211 
3212                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3213                         if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3214                             exi)) {
3215                                 resp->status = NFS3ERR_ACCES;
3216                                 goto out1;
3217                         }
3218                 }
3219         }
3220 
3221         (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL);
3222 
3223         va.va_mask = AT_ALL;
3224         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3225 
3226         if (vp->v_type != VDIR) {
3227                 resp->status = NFS3ERR_NOTDIR;
3228                 goto out1;
3229         }
3230 
3231         error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
3232         if (error)
3233                 goto out;
3234 
3235         /*
3236          * Now don't allow arbitrary count to alloc;
3237          * allow the maximum not to exceed rfs3_tsize()
3238          */
3239         if (args->count > rfs3_tsize(req))
3240                 args->count = rfs3_tsize(req);
3241 
3242         /*
3243          * Make sure that there is room to read at least one entry
3244          * if any are available.
3245          */
3246         if (args->count < DIRENT64_RECLEN(MAXNAMELEN))
3247                 count = DIRENT64_RECLEN(MAXNAMELEN);
3248         else
3249                 count = args->count;
3250 
3251         data = kmem_alloc(count, KM_SLEEP);
3252 
3253         iov.iov_base = data;
3254         iov.iov_len = count;
3255         uio.uio_iov = &iov;
3256         uio.uio_iovcnt = 1;
3257         uio.uio_segflg = UIO_SYSSPACE;
3258         uio.uio_extflg = UIO_COPY_CACHED;
3259         uio.uio_loffset = (offset_t)args->cookie;
3260         uio.uio_resid = count;
3261 
3262         error = VOP_READDIR(vp, &uio, cr, &iseof, NULL, 0);
3263 
3264         va.va_mask = AT_ALL;
3265         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3266 
3267         if (error) {
3268                 kmem_free(data, count);
3269                 goto out;
3270         }
3271 
3272         /*
3273          * If the count was not large enough to be able to guarantee
3274          * to be able to return at least one entry, then need to
3275          * check to see if NFS3ERR_TOOSMALL should be returned.
3276          */
3277         if (args->count < NFS3_READDIR_MIN_COUNT(MAXNAMELEN)) {
3278                 /*
3279                  * bufsize is used to keep track of the size of the response.
3280                  * It is primed with:
3281                  *      1 for the status +
3282                  *      1 for the dir_attributes.attributes boolean +
3283                  *      2 for the cookie verifier
3284                  * all times BYTES_PER_XDR_UNIT to convert from XDR units
3285                  * to bytes.  If there are directory attributes to be
3286                  * returned, then:
3287                  *      NFS3_SIZEOF_FATTR3 for the dir_attributes.attr fattr3
3288                  * time BYTES_PER_XDR_UNIT is added to account for them.
3289                  */
3290                 bufsize = (1 + 1 + 2) * BYTES_PER_XDR_UNIT;
3291                 if (vap != NULL)
3292                         bufsize += NFS3_SIZEOF_FATTR3 * BYTES_PER_XDR_UNIT;
3293                 /*
3294                  * An entry is composed of:
3295                  *      1 for the true/false list indicator +
3296                  *      2 for the fileid +
3297                  *      1 for the length of the name +
3298                  *      2 for the cookie +
3299                  * all times BYTES_PER_XDR_UNIT to convert from
3300                  * XDR units to bytes, plus the length of the name
3301                  * rounded up to the nearest BYTES_PER_XDR_UNIT.
3302                  */
3303                 if (count != uio.uio_resid) {
3304                         namlen = strlen(((struct dirent64 *)data)->d_name);
3305                         bufsize += (1 + 2 + 1 + 2) * BYTES_PER_XDR_UNIT +
3306                             roundup(namlen, BYTES_PER_XDR_UNIT);
3307                 }
3308                 /*
3309                  * We need to check to see if the number of bytes left
3310                  * to go into the buffer will actually fit into the
3311                  * buffer.  This is calculated as the size of this
3312                  * entry plus:
3313                  *      1 for the true/false list indicator +
3314                  *      1 for the eof indicator
3315                  * times BYTES_PER_XDR_UNIT to convert from from
3316                  * XDR units to bytes.
3317                  */
3318                 bufsize += (1 + 1) * BYTES_PER_XDR_UNIT;
3319                 if (bufsize > args->count) {
3320                         kmem_free(data, count);
3321                         resp->status = NFS3ERR_TOOSMALL;
3322                         goto out1;
3323                 }
3324         }
3325 
3326         /*
3327          * Have a valid readir buffer for the native character
3328          * set. Need to check if a conversion is necessary and
3329          * potentially rewrite the whole buffer. Note that if the
3330          * conversion expands names enough, the structure may not
3331          * fit. In this case, we need to drop entries until if fits
3332          * and patch the counts in order that the next readdir will
3333          * get the correct entries.
3334          */
3335         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
3336         data = nfscmd_convdirent(ca, exi, data, count, &resp->status);
3337 
3338 
3339         VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
3340 
3341 #if 0 /* notyet */
3342         /*
3343          * Don't do this.  It causes local disk writes when just
3344          * reading the file and the overhead is deemed larger
3345          * than the benefit.
3346          */
3347         /*
3348          * Force modified metadata out to stable storage.
3349          */
3350         (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
3351 #endif
3352 
3353         resp->status = NFS3_OK;
3354         vattr_to_post_op_attr(vap, &resp->resok.dir_attributes);
3355         resp->resok.cookieverf = 0;
3356         resp->resok.reply.entries = (entry3 *)data;
3357         resp->resok.reply.eof = iseof;
3358         resp->resok.size = count - uio.uio_resid;
3359         resp->resok.count = args->count;
3360         resp->resok.freecount = count;
3361 
3362         DTRACE_NFSV3_5(op__readdir__done, struct svc_req *, req,
3363             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3364             READDIR3res *, resp);
3365 
3366         VN_RELE(vp);
3367 
3368         return;
3369 
3370 out:
3371         if (curthread->t_flag & T_WOULDBLOCK) {
3372                 curthread->t_flag &= ~T_WOULDBLOCK;
3373                 resp->status = NFS3ERR_JUKEBOX;
3374         } else
3375                 resp->status = puterrno3(error);
3376 out1:
3377         vattr_to_post_op_attr(vap, &resp->resfail.dir_attributes);
3378 
3379         DTRACE_NFSV3_5(op__readdir__done, struct svc_req *, req,
3380             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3381             READDIR3res *, resp);
3382 
3383         if (vp != NULL) {
3384                 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
3385                 VN_RELE(vp);
3386         }
3387         vattr_to_post_op_attr(vap, &resp->resfail.dir_attributes);
3388 }
3389 
3390 void *
3391 rfs3_readdir_getfh(READDIR3args *args)
3392 {
3393 
3394         return (&args->dir);
3395 }
3396 
3397 void
3398 rfs3_readdir_free(READDIR3res *resp)
3399 {
3400 
3401         if (resp->status == NFS3_OK)
3402                 kmem_free(resp->resok.reply.entries, resp->resok.freecount);
3403 }
3404 
3405 #ifdef nextdp
3406 #undef nextdp
3407 #endif
3408 #define nextdp(dp)      ((struct dirent64 *)((char *)(dp) + (dp)->d_reclen))
3409 
3410 /*
3411  * This macro computes the size of a response which contains
3412  * one directory entry including the attributes as well as file handle.
3413  * If the incoming request is larger than this, then we are guaranteed to be
3414  * able to return at least one more directory entry if one exists.
3415  *
3416  * NFS3_READDIRPLUS_ENTRY is made up of the following:
3417  *
3418  * boolean - 1 * BYTES_PER_XDR_UNIT
3419  * file id - 2 * BYTES_PER_XDR_UNIT
3420  * directory name length - 1 * BYTES_PER_XDR_UNIT
3421  * cookie - 2 * BYTES_PER_XDR_UNIT
3422  * attribute flag - 1 * BYTES_PER_XDR_UNIT
3423  * attributes - NFS3_SIZEOF_FATTR3 * BYTES_PER_XDR_UNIT
3424  * status byte for file handle - 1 *  BYTES_PER_XDR_UNIT
3425  * length of a file handle - 1 * BYTES_PER_XDR_UNIT
3426  * Maximum length of a file handle (NFS3_MAXFHSIZE)
3427  * name length of the entry to the nearest bytes
3428  */
3429 #define NFS3_READDIRPLUS_ENTRY(namelen) \
3430         ((1 + 2 + 1 + 2 + 1 + NFS3_SIZEOF_FATTR3 + 1 + 1) * \
3431                 BYTES_PER_XDR_UNIT + \
3432         NFS3_MAXFHSIZE + roundup(namelen, BYTES_PER_XDR_UNIT))
3433 
3434 static int rfs3_readdir_unit = MAXBSIZE;
3435 
3436 /* ARGSUSED */
3437 void
3438 rfs3_readdirplus(READDIRPLUS3args *args, READDIRPLUS3res *resp,
3439     struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
3440 {
3441         int error;
3442         vnode_t *vp;
3443         struct vattr *vap;
3444         struct vattr va;
3445         struct iovec iov;
3446         struct uio uio;
3447         char *data;
3448         int iseof;
3449         struct dirent64 *dp;
3450         vnode_t *nvp;
3451         struct vattr *nvap;
3452         struct vattr nva;
3453         entryplus3_info *infop = NULL;
3454         int size = 0;
3455         int nents = 0;
3456         int bufsize = 0;
3457         int entrysize = 0;
3458         int tofit = 0;
3459         int rd_unit = rfs3_readdir_unit;
3460         int prev_len;
3461         int space_left;
3462         int i;
3463         uint_t *namlen = NULL;
3464         char *ndata = NULL;
3465         struct sockaddr *ca;
3466         size_t ret;
3467 
3468         vap = NULL;
3469 
3470         vp = nfs3_fhtovp(&args->dir, exi);
3471 
3472         DTRACE_NFSV3_5(op__readdirplus__start, struct svc_req *, req,
3473             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3474             READDIRPLUS3args *, args);
3475 
3476         if (vp == NULL) {
3477                 error = ESTALE;
3478                 goto out;
3479         }
3480 
3481         if (is_system_labeled()) {
3482                 bslabel_t *clabel = req->rq_label;
3483 
3484                 ASSERT(clabel != NULL);
3485                 DTRACE_PROBE2(tx__rfs3__log__info__opreaddirplus__clabel,
3486                     char *, "got client label from request(1)",
3487                     struct svc_req *, req);
3488 
3489                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3490                         if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3491                             exi)) {
3492                                 resp->status = NFS3ERR_ACCES;
3493                                 goto out1;
3494                         }
3495                 }
3496         }
3497 
3498         (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL);
3499 
3500         va.va_mask = AT_ALL;
3501         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3502 
3503         if (vp->v_type != VDIR) {
3504                 error = ENOTDIR;
3505                 goto out;
3506         }
3507 
3508         error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
3509         if (error)
3510                 goto out;
3511 
3512         /*
3513          * Don't allow arbitrary counts for allocation
3514          */
3515         if (args->maxcount > rfs3_tsize(req))
3516                 args->maxcount = rfs3_tsize(req);
3517 
3518         /*
3519          * Make sure that there is room to read at least one entry
3520          * if any are available
3521          */
3522         args->dircount = MIN(args->dircount, args->maxcount);
3523 
3524         if (args->dircount < DIRENT64_RECLEN(MAXNAMELEN))
3525                 args->dircount = DIRENT64_RECLEN(MAXNAMELEN);
3526 
3527         /*
3528          * This allocation relies on a minimum directory entry
3529          * being roughly 24 bytes.  Therefore, the namlen array
3530          * will have enough space based on the maximum number of
3531          * entries to read.
3532          */
3533         namlen = kmem_alloc(args->dircount, KM_SLEEP);
3534 
3535         space_left = args->dircount;
3536         data = kmem_alloc(args->dircount, KM_SLEEP);
3537         dp = (struct dirent64 *)data;
3538         uio.uio_iov = &iov;
3539         uio.uio_iovcnt = 1;
3540         uio.uio_segflg = UIO_SYSSPACE;
3541         uio.uio_extflg = UIO_COPY_CACHED;
3542         uio.uio_loffset = (offset_t)args->cookie;
3543 
3544         /*
3545          * bufsize is used to keep track of the size of the response as we
3546          * get post op attributes and filehandles for each entry.  This is
3547          * an optimization as the server may have read more entries than will
3548          * fit in the buffer specified by maxcount.  We stop calculating
3549          * post op attributes and filehandles once we have exceeded maxcount.
3550          * This will minimize the effect of truncation.
3551          *
3552          * It is primed with:
3553          *      1 for the status +
3554          *      1 for the dir_attributes.attributes boolean +
3555          *      2 for the cookie verifier
3556          * all times BYTES_PER_XDR_UNIT to convert from XDR units
3557          * to bytes.  If there are directory attributes to be
3558          * returned, then:
3559          *      NFS3_SIZEOF_FATTR3 for the dir_attributes.attr fattr3
3560          * time BYTES_PER_XDR_UNIT is added to account for them.
3561          */
3562         bufsize = (1 + 1 + 2) * BYTES_PER_XDR_UNIT;
3563         if (vap != NULL)
3564                 bufsize += NFS3_SIZEOF_FATTR3 * BYTES_PER_XDR_UNIT;
3565 
3566 getmoredents:
3567         /*
3568          * Here we make a check so that our read unit is not larger than
3569          * the space left in the buffer.
3570          */
3571         rd_unit = MIN(rd_unit, space_left);
3572         iov.iov_base = (char *)dp;
3573         iov.iov_len = rd_unit;
3574         uio.uio_resid = rd_unit;
3575         prev_len = rd_unit;
3576 
3577         error = VOP_READDIR(vp, &uio, cr, &iseof, NULL, 0);
3578 
3579         if (error) {
3580                 kmem_free(data, args->dircount);
3581                 goto out;
3582         }
3583 
3584         if (uio.uio_resid == prev_len && !iseof) {
3585                 if (nents == 0) {
3586                         kmem_free(data, args->dircount);
3587                         resp->status = NFS3ERR_TOOSMALL;
3588                         goto out1;
3589                 }
3590 
3591                 /*
3592                  * We could not get any more entries, so get the attributes
3593                  * and filehandle for the entries already obtained.
3594                  */
3595                 goto good;
3596         }
3597 
3598         /*
3599          * We estimate the size of the response by assuming the
3600          * entry exists and attributes and filehandle are also valid
3601          */
3602         for (size = prev_len - uio.uio_resid;
3603             size > 0;
3604             size -= dp->d_reclen, dp = nextdp(dp)) {
3605 
3606                 if (dp->d_ino == 0) {
3607                         nents++;
3608                         continue;
3609                 }
3610 
3611                 namlen[nents] = strlen(dp->d_name);
3612                 entrysize = NFS3_READDIRPLUS_ENTRY(namlen[nents]);
3613 
3614                 /*
3615                  * We need to check to see if the number of bytes left
3616                  * to go into the buffer will actually fit into the
3617                  * buffer.  This is calculated as the size of this
3618                  * entry plus:
3619                  *      1 for the true/false list indicator +
3620                  *      1 for the eof indicator
3621                  * times BYTES_PER_XDR_UNIT to convert from XDR units
3622                  * to bytes.
3623                  *
3624                  * Also check the dircount limit against the first entry read
3625                  *
3626                  */
3627                 tofit = entrysize + (1 + 1) * BYTES_PER_XDR_UNIT;
3628                 if (bufsize + tofit > args->maxcount) {
3629                         /*
3630                          * We make a check here to see if this was the
3631                          * first entry being measured.  If so, then maxcount
3632                          * was too small to begin with and so we need to
3633                          * return with NFS3ERR_TOOSMALL.
3634                          */
3635                         if (nents == 0) {
3636                                 kmem_free(data, args->dircount);
3637                                 resp->status = NFS3ERR_TOOSMALL;
3638                                 goto out1;
3639                         }
3640                         iseof = FALSE;
3641                         goto good;
3642                 }
3643                 bufsize += entrysize;
3644                 nents++;
3645         }
3646 
3647         /*
3648          * If there is enough room to fit at least 1 more entry including
3649          * post op attributes and filehandle in the buffer AND that we haven't
3650          * exceeded dircount then go back and get some more.
3651          */
3652         if (!iseof &&
3653             (args->maxcount - bufsize) >= NFS3_READDIRPLUS_ENTRY(MAXNAMELEN)) {
3654                 space_left -= (prev_len - uio.uio_resid);
3655                 if (space_left >= DIRENT64_RECLEN(MAXNAMELEN))
3656                         goto getmoredents;
3657 
3658                 /* else, fall through */
3659         }
3660 good:
3661         va.va_mask = AT_ALL;
3662         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3663 
3664         VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
3665 
3666         infop = kmem_alloc(nents * sizeof (struct entryplus3_info), KM_SLEEP);
3667         resp->resok.infop = infop;
3668 
3669         dp = (struct dirent64 *)data;
3670         for (i = 0; i < nents; i++) {
3671 
3672                 if (dp->d_ino == 0) {
3673                         infop[i].attr.attributes = FALSE;
3674                         infop[i].fh.handle_follows = FALSE;
3675                         dp = nextdp(dp);
3676                         continue;
3677                 }
3678 
3679                 infop[i].namelen = namlen[i];
3680 
3681                 error = VOP_LOOKUP(vp, dp->d_name, &nvp, NULL, 0, NULL, cr,
3682                     NULL, NULL, NULL);
3683                 if (error) {
3684                         infop[i].attr.attributes = FALSE;
3685                         infop[i].fh.handle_follows = FALSE;
3686                         dp = nextdp(dp);
3687                         continue;
3688                 }
3689 
3690                 nva.va_mask = AT_ALL;
3691                 nvap = rfs4_delegated_getattr(nvp, &nva, 0, cr) ? NULL : &nva;
3692 
3693                 /* Lie about the object type for a referral */
3694                 if (vn_is_nfs_reparse(nvp, cr))
3695                         nvap->va_type = VLNK;
3696 
3697                 if (vn_ismntpt(nvp)) {
3698                         infop[i].attr.attributes = FALSE;
3699                         infop[i].fh.handle_follows = FALSE;
3700                 } else {
3701                         vattr_to_post_op_attr(nvap, &infop[i].attr);
3702 
3703                         error = makefh3(&infop[i].fh.handle, nvp, exi);
3704                         if (!error)
3705                                 infop[i].fh.handle_follows = TRUE;
3706                         else
3707                                 infop[i].fh.handle_follows = FALSE;
3708                 }
3709 
3710                 VN_RELE(nvp);
3711                 dp = nextdp(dp);
3712         }
3713 
3714         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
3715         ret = nfscmd_convdirplus(ca, exi, data, nents, args->dircount, &ndata);
3716         if (ndata == NULL)
3717                 ndata = data;
3718 
3719         if (ret > 0) {
3720                 /*
3721                  * We had to drop one or more entries in order to fit
3722                  * during the character conversion.  We need to patch
3723                  * up the size and eof info.
3724                  */
3725                 if (iseof)
3726                         iseof = FALSE;
3727 
3728                 ret = nfscmd_dropped_entrysize((struct dirent64 *)data,
3729                     nents, ret);
3730         }
3731 
3732 
3733 #if 0 /* notyet */
3734         /*
3735          * Don't do this.  It causes local disk writes when just
3736          * reading the file and the overhead is deemed larger
3737          * than the benefit.
3738          */
3739         /*
3740          * Force modified metadata out to stable storage.
3741          */
3742         (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
3743 #endif
3744 
3745         kmem_free(namlen, args->dircount);
3746 
3747         resp->status = NFS3_OK;
3748         vattr_to_post_op_attr(vap, &resp->resok.dir_attributes);
3749         resp->resok.cookieverf = 0;
3750         resp->resok.reply.entries = (entryplus3 *)ndata;
3751         resp->resok.reply.eof = iseof;
3752         resp->resok.size = nents;
3753         resp->resok.count = args->dircount - ret;
3754         resp->resok.maxcount = args->maxcount;
3755 
3756         DTRACE_NFSV3_5(op__readdirplus__done, struct svc_req *, req,
3757             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3758             READDIRPLUS3res *, resp);
3759 
3760         VN_RELE(vp);
3761 
3762         return;
3763 
3764 out:
3765         if (curthread->t_flag & T_WOULDBLOCK) {
3766                 curthread->t_flag &= ~T_WOULDBLOCK;
3767                 resp->status = NFS3ERR_JUKEBOX;
3768         } else {
3769                 resp->status = puterrno3(error);
3770         }
3771 out1:
3772         vattr_to_post_op_attr(vap, &resp->resfail.dir_attributes);
3773 
3774         DTRACE_NFSV3_5(op__readdirplus__done, struct svc_req *, req,
3775             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3776             READDIRPLUS3res *, resp);
3777 
3778         if (vp != NULL) {
3779                 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
3780                 VN_RELE(vp);
3781         }
3782 
3783         if (namlen != NULL)
3784                 kmem_free(namlen, args->dircount);
3785 
3786         vattr_to_post_op_attr(vap, &resp->resfail.dir_attributes);
3787 }
3788 
3789 void *
3790 rfs3_readdirplus_getfh(READDIRPLUS3args *args)
3791 {
3792 
3793         return (&args->dir);
3794 }
3795 
3796 void
3797 rfs3_readdirplus_free(READDIRPLUS3res *resp)
3798 {
3799 
3800         if (resp->status == NFS3_OK) {
3801                 kmem_free(resp->resok.reply.entries, resp->resok.count);
3802                 kmem_free(resp->resok.infop,
3803                     resp->resok.size * sizeof (struct entryplus3_info));
3804         }
3805 }
3806 
3807 /* ARGSUSED */
3808 void
3809 rfs3_fsstat(FSSTAT3args *args, FSSTAT3res *resp, struct exportinfo *exi,
3810     struct svc_req *req, cred_t *cr, bool_t ro)
3811 {
3812         int error;
3813         vnode_t *vp;
3814         struct vattr *vap;
3815         struct vattr va;
3816         struct statvfs64 sb;
3817 
3818         vap = NULL;
3819 
3820         vp = nfs3_fhtovp(&args->fsroot, exi);
3821 
3822         DTRACE_NFSV3_5(op__fsstat__start, struct svc_req *, req,
3823             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3824             FSSTAT3args *, args);
3825 
3826         if (vp == NULL) {
3827                 error = ESTALE;
3828                 goto out;
3829         }
3830 
3831         if (is_system_labeled()) {
3832                 bslabel_t *clabel = req->rq_label;
3833 
3834                 ASSERT(clabel != NULL);
3835                 DTRACE_PROBE2(tx__rfs3__log__info__opfsstat__clabel, char *,
3836                     "got client label from request(1)", struct svc_req *, req);
3837 
3838                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3839                         if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3840                             exi)) {
3841                                 resp->status = NFS3ERR_ACCES;
3842                                 goto out1;
3843                         }
3844                 }
3845         }
3846 
3847         error = VFS_STATVFS(vp->v_vfsp, &sb);
3848 
3849         va.va_mask = AT_ALL;
3850         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3851 
3852         if (error)
3853                 goto out;
3854 
3855         resp->status = NFS3_OK;
3856         vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
3857         if (sb.f_blocks != (fsblkcnt64_t)-1)
3858                 resp->resok.tbytes = (size3)sb.f_frsize * (size3)sb.f_blocks;
3859         else
3860                 resp->resok.tbytes = (size3)sb.f_blocks;
3861         if (sb.f_bfree != (fsblkcnt64_t)-1)
3862                 resp->resok.fbytes = (size3)sb.f_frsize * (size3)sb.f_bfree;
3863         else
3864                 resp->resok.fbytes = (size3)sb.f_bfree;
3865         if (sb.f_bavail != (fsblkcnt64_t)-1)
3866                 resp->resok.abytes = (size3)sb.f_frsize * (size3)sb.f_bavail;
3867         else
3868                 resp->resok.abytes = (size3)sb.f_bavail;
3869         resp->resok.tfiles = (size3)sb.f_files;
3870         resp->resok.ffiles = (size3)sb.f_ffree;
3871         resp->resok.afiles = (size3)sb.f_favail;
3872         resp->resok.invarsec = 0;
3873 
3874         DTRACE_NFSV3_5(op__fsstat__done, struct svc_req *, req,
3875             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3876             FSSTAT3res *, resp);
3877         VN_RELE(vp);
3878 
3879         return;
3880 
3881 out:
3882         if (curthread->t_flag & T_WOULDBLOCK) {
3883                 curthread->t_flag &= ~T_WOULDBLOCK;
3884                 resp->status = NFS3ERR_JUKEBOX;
3885         } else
3886                 resp->status = puterrno3(error);
3887 out1:
3888         DTRACE_NFSV3_5(op__fsstat__done, struct svc_req *, req,
3889             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3890             FSSTAT3res *, resp);
3891 
3892         if (vp != NULL)
3893                 VN_RELE(vp);
3894         vattr_to_post_op_attr(vap, &resp->resfail.obj_attributes);
3895 }
3896 
3897 void *
3898 rfs3_fsstat_getfh(FSSTAT3args *args)
3899 {
3900 
3901         return (&args->fsroot);
3902 }
3903 
3904 /* ARGSUSED */
3905 void
3906 rfs3_fsinfo(FSINFO3args *args, FSINFO3res *resp, struct exportinfo *exi,
3907     struct svc_req *req, cred_t *cr, bool_t ro)
3908 {
3909         vnode_t *vp;
3910         struct vattr *vap;
3911         struct vattr va;
3912         uint32_t xfer_size;
3913         ulong_t l = 0;
3914         int error;
3915 
3916         vp = nfs3_fhtovp(&args->fsroot, exi);
3917 
3918         DTRACE_NFSV3_5(op__fsinfo__start, struct svc_req *, req,
3919             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3920             FSINFO3args *, args);
3921 
3922         if (vp == NULL) {
3923                 if (curthread->t_flag & T_WOULDBLOCK) {
3924                         curthread->t_flag &= ~T_WOULDBLOCK;
3925                         resp->status = NFS3ERR_JUKEBOX;
3926                 } else
3927                         resp->status = NFS3ERR_STALE;
3928                 vattr_to_post_op_attr(NULL, &resp->resfail.obj_attributes);
3929                 goto out;
3930         }
3931 
3932         if (is_system_labeled()) {
3933                 bslabel_t *clabel = req->rq_label;
3934 
3935                 ASSERT(clabel != NULL);
3936                 DTRACE_PROBE2(tx__rfs3__log__info__opfsinfo__clabel, char *,
3937                     "got client label from request(1)", struct svc_req *, req);
3938 
3939                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3940                         if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3941                             exi)) {
3942                                 resp->status = NFS3ERR_STALE;
3943                                 vattr_to_post_op_attr(NULL,
3944                                     &resp->resfail.obj_attributes);
3945                                 goto out;
3946                         }
3947                 }
3948         }
3949 
3950         va.va_mask = AT_ALL;
3951         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3952 
3953         resp->status = NFS3_OK;
3954         vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
3955         xfer_size = rfs3_tsize(req);
3956         resp->resok.rtmax = xfer_size;
3957         resp->resok.rtpref = xfer_size;
3958         resp->resok.rtmult = DEV_BSIZE;
3959         resp->resok.wtmax = xfer_size;
3960         resp->resok.wtpref = xfer_size;
3961         resp->resok.wtmult = DEV_BSIZE;
3962         resp->resok.dtpref = MAXBSIZE;
3963 
3964         /*
3965          * Large file spec: want maxfilesize based on limit of
3966          * underlying filesystem.  We can guess 2^31-1 if need be.
3967          */
3968         error = VOP_PATHCONF(vp, _PC_FILESIZEBITS, &l, cr, NULL);
3969         if (error) {
3970                 resp->status = puterrno3(error);
3971                 goto out;
3972         }
3973 
3974         /*
3975          * If the underlying file system does not support _PC_FILESIZEBITS,
3976          * return a reasonable default. Note that error code on VOP_PATHCONF
3977          * will be 0, even if the underlying file system does not support
3978          * _PC_FILESIZEBITS.
3979          */
3980         if (l == (ulong_t)-1) {
3981                 resp->resok.maxfilesize = MAXOFF32_T;
3982         } else {
3983                 if (l >= (sizeof (uint64_t) * 8))
3984                         resp->resok.maxfilesize = INT64_MAX;
3985                 else
3986                         resp->resok.maxfilesize = (1LL << (l-1)) - 1;
3987         }
3988 
3989         resp->resok.time_delta.seconds = 0;
3990         resp->resok.time_delta.nseconds = 1000;
3991         resp->resok.properties = FSF3_LINK | FSF3_SYMLINK |
3992             FSF3_HOMOGENEOUS | FSF3_CANSETTIME;
3993 
3994         DTRACE_NFSV3_5(op__fsinfo__done, struct svc_req *, req,
3995             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3996             FSINFO3res *, resp);
3997 
3998         VN_RELE(vp);
3999 
4000         return;
4001 
4002 out:
4003         DTRACE_NFSV3_5(op__fsinfo__done, struct svc_req *, req,
4004             cred_t *, cr, vnode_t *, NULL, struct exportinfo *, exi,
4005             FSINFO3res *, resp);
4006         if (vp != NULL)
4007                 VN_RELE(vp);
4008 }
4009 
4010 void *
4011 rfs3_fsinfo_getfh(FSINFO3args *args)
4012 {
4013         return (&args->fsroot);
4014 }
4015 
4016 /* ARGSUSED */
4017 void
4018 rfs3_pathconf(PATHCONF3args *args, PATHCONF3res *resp, struct exportinfo *exi,
4019     struct svc_req *req, cred_t *cr, bool_t ro)
4020 {
4021         int error;
4022         vnode_t *vp;
4023         struct vattr *vap;
4024         struct vattr va;
4025         ulong_t val;
4026 
4027         vap = NULL;
4028 
4029         vp = nfs3_fhtovp(&args->object, exi);
4030 
4031         DTRACE_NFSV3_5(op__pathconf__start, struct svc_req *, req,
4032             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
4033             PATHCONF3args *, args);
4034 
4035         if (vp == NULL) {
4036                 error = ESTALE;
4037                 goto out;
4038         }
4039 
4040         if (is_system_labeled()) {
4041                 bslabel_t *clabel = req->rq_label;
4042 
4043                 ASSERT(clabel != NULL);
4044                 DTRACE_PROBE2(tx__rfs3__log__info__oppathconf__clabel, char *,
4045                     "got client label from request(1)", struct svc_req *, req);
4046 
4047                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
4048                         if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
4049                             exi)) {
4050                                 resp->status = NFS3ERR_ACCES;
4051                                 goto out1;
4052                         }
4053                 }
4054         }
4055 
4056         va.va_mask = AT_ALL;
4057         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
4058 
4059         error = VOP_PATHCONF(vp, _PC_LINK_MAX, &val, cr, NULL);
4060         if (error)
4061                 goto out;
4062         resp->resok.info.link_max = (uint32)val;
4063 
4064         error = VOP_PATHCONF(vp, _PC_NAME_MAX, &val, cr, NULL);
4065         if (error)
4066                 goto out;
4067         resp->resok.info.name_max = (uint32)val;
4068 
4069         error = VOP_PATHCONF(vp, _PC_NO_TRUNC, &val, cr, NULL);
4070         if (error)
4071                 goto out;
4072         if (val == 1)
4073                 resp->resok.info.no_trunc = TRUE;
4074         else
4075                 resp->resok.info.no_trunc = FALSE;
4076 
4077         error = VOP_PATHCONF(vp, _PC_CHOWN_RESTRICTED, &val, cr, NULL);
4078         if (error)
4079                 goto out;
4080         if (val == 1)
4081                 resp->resok.info.chown_restricted = TRUE;
4082         else
4083                 resp->resok.info.chown_restricted = FALSE;
4084 
4085         resp->status = NFS3_OK;
4086         vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
4087         resp->resok.info.case_insensitive = FALSE;
4088         resp->resok.info.case_preserving = TRUE;
4089         DTRACE_NFSV3_5(op__pathconf__done, struct svc_req *, req,
4090             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
4091             PATHCONF3res *, resp);
4092         VN_RELE(vp);
4093         return;
4094 
4095 out:
4096         if (curthread->t_flag & T_WOULDBLOCK) {
4097                 curthread->t_flag &= ~T_WOULDBLOCK;
4098                 resp->status = NFS3ERR_JUKEBOX;
4099         } else
4100                 resp->status = puterrno3(error);
4101 out1:
4102         DTRACE_NFSV3_5(op__pathconf__done, struct svc_req *, req,
4103             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
4104             PATHCONF3res *, resp);
4105         if (vp != NULL)
4106                 VN_RELE(vp);
4107         vattr_to_post_op_attr(vap, &resp->resfail.obj_attributes);
4108 }
4109 
4110 void *
4111 rfs3_pathconf_getfh(PATHCONF3args *args)
4112 {
4113 
4114         return (&args->object);
4115 }
4116 
4117 void
4118 rfs3_commit(COMMIT3args *args, COMMIT3res *resp, struct exportinfo *exi,
4119     struct svc_req *req, cred_t *cr, bool_t ro)
4120 {
4121         nfs3_srv_t *ns;
4122         int error;
4123         vnode_t *vp;
4124         struct vattr *bvap;
4125         struct vattr bva;
4126         struct vattr *avap;
4127         struct vattr ava;
4128 
4129         bvap = NULL;
4130         avap = NULL;
4131 
4132         vp = nfs3_fhtovp(&args->file, exi);
4133 
4134         DTRACE_NFSV3_5(op__commit__start, struct svc_req *, req,
4135             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
4136             COMMIT3args *, args);
4137 
4138         if (vp == NULL) {
4139                 error = ESTALE;
4140                 goto out;
4141         }
4142 
4143         ASSERT3U(curzone->zone_id, ==, exi->exi_zoneid); /* exi is non-NULL. */
4144         ns = nfs3_get_srv();
4145         bva.va_mask = AT_ALL;
4146         error = VOP_GETATTR(vp, &bva, 0, cr, NULL);
4147 
4148         /*
4149          * If we can't get the attributes, then we can't do the
4150          * right access checking.  So, we'll fail the request.
4151          */
4152         if (error)
4153                 goto out;
4154 
4155         bvap = &bva;
4156 
4157         if (rdonly(ro, vp)) {
4158                 resp->status = NFS3ERR_ROFS;
4159                 goto out1;
4160         }
4161 
4162         if (vp->v_type != VREG) {
4163                 resp->status = NFS3ERR_INVAL;
4164                 goto out1;
4165         }
4166 
4167         if (is_system_labeled()) {
4168                 bslabel_t *clabel = req->rq_label;
4169 
4170                 ASSERT(clabel != NULL);
4171                 DTRACE_PROBE2(tx__rfs3__log__info__opcommit__clabel, char *,
4172                     "got client label from request(1)", struct svc_req *, req);
4173 
4174                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
4175                         if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
4176                             exi)) {
4177                                 resp->status = NFS3ERR_ACCES;
4178                                 goto out1;
4179                         }
4180                 }
4181         }
4182 
4183         if (crgetuid(cr) != bva.va_uid &&
4184             (error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL)))
4185                 goto out;
4186 
4187         error = VOP_FSYNC(vp, FSYNC, cr, NULL);
4188 
4189         ava.va_mask = AT_ALL;
4190         avap = VOP_GETATTR(vp, &ava, 0, cr, NULL) ? NULL : &ava;
4191 
4192         if (error)
4193                 goto out;
4194 
4195         resp->status = NFS3_OK;
4196         vattr_to_wcc_data(bvap, avap, &resp->resok.file_wcc);
4197         resp->resok.verf = ns->write3verf;
4198 
4199         DTRACE_NFSV3_5(op__commit__done, struct svc_req *, req,
4200             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
4201             COMMIT3res *, resp);
4202 
4203         VN_RELE(vp);
4204 
4205         return;
4206 
4207 out:
4208         if (curthread->t_flag & T_WOULDBLOCK) {
4209                 curthread->t_flag &= ~T_WOULDBLOCK;
4210                 resp->status = NFS3ERR_JUKEBOX;
4211         } else
4212                 resp->status = puterrno3(error);
4213 out1:
4214         DTRACE_NFSV3_5(op__commit__done, struct svc_req *, req,
4215             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
4216             COMMIT3res *, resp);
4217 
4218         if (vp != NULL)
4219                 VN_RELE(vp);
4220         vattr_to_wcc_data(bvap, avap, &resp->resfail.file_wcc);
4221 }
4222 
4223 void *
4224 rfs3_commit_getfh(COMMIT3args *args)
4225 {
4226 
4227         return (&args->file);
4228 }
4229 
4230 static int
4231 sattr3_to_vattr(sattr3 *sap, struct vattr *vap)
4232 {
4233 
4234         vap->va_mask = 0;
4235 
4236         if (sap->mode.set_it) {
4237                 vap->va_mode = (mode_t)sap->mode.mode;
4238                 vap->va_mask |= AT_MODE;
4239         }
4240         if (sap->uid.set_it) {
4241                 vap->va_uid = (uid_t)sap->uid.uid;
4242                 vap->va_mask |= AT_UID;
4243         }
4244         if (sap->gid.set_it) {
4245                 vap->va_gid = (gid_t)sap->gid.gid;
4246                 vap->va_mask |= AT_GID;
4247         }
4248         if (sap->size.set_it) {
4249                 if (sap->size.size > (size3)((u_longlong_t)-1))
4250                         return (EINVAL);
4251                 vap->va_size = sap->size.size;
4252                 vap->va_mask |= AT_SIZE;
4253         }
4254         if (sap->atime.set_it == SET_TO_CLIENT_TIME) {
4255 #ifndef _LP64
4256                 /* check time validity */
4257                 if (!NFS3_TIME_OK(sap->atime.atime.seconds))
4258                         return (EOVERFLOW);
4259 #endif
4260                 /*
4261                  * nfs protocol defines times as unsigned so don't extend sign,
4262                  * unless sysadmin set nfs_allow_preepoch_time.
4263                  */
4264                 NFS_TIME_T_CONVERT(vap->va_atime.tv_sec,
4265                     sap->atime.atime.seconds);
4266                 vap->va_atime.tv_nsec = (uint32_t)sap->atime.atime.nseconds;
4267                 vap->va_mask |= AT_ATIME;
4268         } else if (sap->atime.set_it == SET_TO_SERVER_TIME) {
4269                 gethrestime(&vap->va_atime);
4270                 vap->va_mask |= AT_ATIME;
4271         }
4272         if (sap->mtime.set_it == SET_TO_CLIENT_TIME) {
4273 #ifndef _LP64
4274                 /* check time validity */
4275                 if (!NFS3_TIME_OK(sap->mtime.mtime.seconds))
4276                         return (EOVERFLOW);
4277 #endif
4278                 /*
4279                  * nfs protocol defines times as unsigned so don't extend sign,
4280                  * unless sysadmin set nfs_allow_preepoch_time.
4281                  */
4282                 NFS_TIME_T_CONVERT(vap->va_mtime.tv_sec,
4283                     sap->mtime.mtime.seconds);
4284                 vap->va_mtime.tv_nsec = (uint32_t)sap->mtime.mtime.nseconds;
4285                 vap->va_mask |= AT_MTIME;
4286         } else if (sap->mtime.set_it == SET_TO_SERVER_TIME) {
4287                 gethrestime(&vap->va_mtime);
4288                 vap->va_mask |= AT_MTIME;
4289         }
4290 
4291         return (0);
4292 }
4293 
4294 static const ftype3 vt_to_nf3[] = {
4295         0, NF3REG, NF3DIR, NF3BLK, NF3CHR, NF3LNK, NF3FIFO, 0, 0, NF3SOCK, 0
4296 };
4297 
4298 static int
4299 vattr_to_fattr3(struct vattr *vap, fattr3 *fap)
4300 {
4301 
4302         ASSERT(vap->va_type >= VNON && vap->va_type <= VBAD);
4303         /* Return error if time or size overflow */
4304         if (! (NFS_VAP_TIME_OK(vap) && NFS3_SIZE_OK(vap->va_size))) {
4305                 return (EOVERFLOW);
4306         }
4307         fap->type = vt_to_nf3[vap->va_type];
4308         fap->mode = (mode3)(vap->va_mode & MODEMASK);
4309         fap->nlink = (uint32)vap->va_nlink;
4310         if (vap->va_uid == UID_NOBODY)
4311                 fap->uid = (uid3)NFS_UID_NOBODY;
4312         else
4313                 fap->uid = (uid3)vap->va_uid;
4314         if (vap->va_gid == GID_NOBODY)
4315                 fap->gid = (gid3)NFS_GID_NOBODY;
4316         else
4317                 fap->gid = (gid3)vap->va_gid;
4318         fap->size = (size3)vap->va_size;
4319         fap->used = (size3)DEV_BSIZE * (size3)vap->va_nblocks;
4320         fap->rdev.specdata1 = (uint32)getmajor(vap->va_rdev);
4321         fap->rdev.specdata2 = (uint32)getminor(vap->va_rdev);
4322         fap->fsid = (uint64)vap->va_fsid;
4323         fap->fileid = (fileid3)vap->va_nodeid;
4324         fap->atime.seconds = vap->va_atime.tv_sec;
4325         fap->atime.nseconds = vap->va_atime.tv_nsec;
4326         fap->mtime.seconds = vap->va_mtime.tv_sec;
4327         fap->mtime.nseconds = vap->va_mtime.tv_nsec;
4328         fap->ctime.seconds = vap->va_ctime.tv_sec;
4329         fap->ctime.nseconds = vap->va_ctime.tv_nsec;
4330         return (0);
4331 }
4332 
4333 static int
4334 vattr_to_wcc_attr(struct vattr *vap, wcc_attr *wccap)
4335 {
4336 
4337         /* Return error if time or size overflow */
4338         if (!(NFS_TIME_T_OK(vap->va_mtime.tv_sec) &&
4339             NFS_TIME_T_OK(vap->va_ctime.tv_sec) &&
4340             NFS3_SIZE_OK(vap->va_size))) {
4341                 return (EOVERFLOW);
4342         }
4343         wccap->size = (size3)vap->va_size;
4344         wccap->mtime.seconds = vap->va_mtime.tv_sec;
4345         wccap->mtime.nseconds = vap->va_mtime.tv_nsec;
4346         wccap->ctime.seconds = vap->va_ctime.tv_sec;
4347         wccap->ctime.nseconds = vap->va_ctime.tv_nsec;
4348         return (0);
4349 }
4350 
4351 static void
4352 vattr_to_pre_op_attr(struct vattr *vap, pre_op_attr *poap)
4353 {
4354 
4355         /* don't return attrs if time overflow */
4356         if ((vap != NULL) && !vattr_to_wcc_attr(vap, &poap->attr)) {
4357                 poap->attributes = TRUE;
4358         } else
4359                 poap->attributes = FALSE;
4360 }
4361 
4362 void
4363 vattr_to_post_op_attr(struct vattr *vap, post_op_attr *poap)
4364 {
4365 
4366         /* don't return attrs if time overflow */
4367         if ((vap != NULL) && !vattr_to_fattr3(vap, &poap->attr)) {
4368                 poap->attributes = TRUE;
4369         } else
4370                 poap->attributes = FALSE;
4371 }
4372 
4373 static void
4374 vattr_to_wcc_data(struct vattr *bvap, struct vattr *avap, wcc_data *wccp)
4375 {
4376         vattr_to_pre_op_attr(bvap, &wccp->before);
4377         vattr_to_post_op_attr(avap, &wccp->after);
4378 }
4379 
4380 static int
4381 rdma_setup_read_data3(READ3args *args, READ3resok *rok)
4382 {
4383         struct clist    *wcl;
4384         int             wlist_len;
4385         count3          count = rok->count;
4386 
4387         wcl = args->wlist;
4388         if (rdma_setup_read_chunks(wcl, count, &wlist_len) == FALSE)
4389                 return (FALSE);
4390 
4391         wcl = args->wlist;
4392         rok->wlist_len = wlist_len;
4393         rok->wlist = wcl;
4394         return (TRUE);
4395 }
4396 
4397 void
4398 rfs3_srv_zone_init(nfs_globals_t *ng)
4399 {
4400         nfs3_srv_t *ns;
4401         struct rfs3_verf_overlay {
4402                 uint_t id; /* a "unique" identifier */
4403                 int ts; /* a unique timestamp */
4404         } *verfp;
4405         timestruc_t now;
4406 
4407         ns = kmem_zalloc(sizeof (*ns), KM_SLEEP);
4408 
4409         /*
4410          * The following algorithm attempts to find a unique verifier
4411          * to be used as the write verifier returned from the server
4412          * to the client.  It is important that this verifier change
4413          * whenever the server reboots.  Of secondary importance, it
4414          * is important for the verifier to be unique between two
4415          * different servers.
4416          *
4417          * Thus, an attempt is made to use the system hostid and the
4418          * current time in seconds when the nfssrv kernel module is
4419          * loaded.  It is assumed that an NFS server will not be able
4420          * to boot and then to reboot in less than a second.  If the
4421          * hostid has not been set, then the current high resolution
4422          * time is used.  This will ensure different verifiers each
4423          * time the server reboots and minimize the chances that two
4424          * different servers will have the same verifier.
4425          */
4426 
4427 #ifndef lint
4428         /*
4429          * We ASSERT that this constant logic expression is
4430          * always true because in the past, it wasn't.
4431          */
4432         ASSERT(sizeof (*verfp) <= sizeof (ns->write3verf));
4433 #endif
4434 
4435         gethrestime(&now);
4436         verfp = (struct rfs3_verf_overlay *)&ns->write3verf;
4437         verfp->ts = (int)now.tv_sec;
4438         verfp->id = zone_get_hostid(NULL);
4439 
4440         if (verfp->id == 0)
4441                 verfp->id = (uint_t)now.tv_nsec;
4442 
4443         ng->nfs3_srv = ns;
4444 }
4445 
4446 void
4447 rfs3_srv_zone_fini(nfs_globals_t *ng)
4448 {
4449         nfs3_srv_t *ns = ng->nfs3_srv;
4450 
4451         ng->nfs3_srv = NULL;
4452 
4453         kmem_free(ns, sizeof (*ns));
4454 }
4455 
4456 void
4457 rfs3_srvrinit(void)
4458 {
4459         nfs3_srv_caller_id = fs_new_caller_id();
4460 }
4461 
4462 void
4463 rfs3_srvrfini(void)
4464 {
4465         /* Nothing to do */
4466 }