1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright 2018 Nexenta Systems, Inc.
  24  * Copyright (c) 1994, 2010, Oracle and/or its affiliates. All rights reserved.
  25  * Copyright (c) 2013 by Delphix. All rights reserved.
  26  */
  27 
  28 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
  29 /* All Rights Reserved */
  30 
  31 
  32 #include <sys/param.h>
  33 #include <sys/types.h>
  34 #include <sys/systm.h>
  35 #include <sys/cred.h>
  36 #include <sys/buf.h>
  37 #include <sys/vfs.h>
  38 #include <sys/vnode.h>
  39 #include <sys/uio.h>
  40 #include <sys/errno.h>
  41 #include <sys/sysmacros.h>
  42 #include <sys/statvfs.h>
  43 #include <sys/kmem.h>
  44 #include <sys/dirent.h>
  45 #include <sys/cmn_err.h>
  46 #include <sys/debug.h>
  47 #include <sys/systeminfo.h>
  48 #include <sys/flock.h>
  49 #include <sys/nbmlock.h>
  50 #include <sys/policy.h>
  51 #include <sys/sdt.h>
  52 
  53 #include <rpc/types.h>
  54 #include <rpc/auth.h>
  55 #include <rpc/svc.h>
  56 #include <rpc/rpc_rdma.h>
  57 
  58 #include <nfs/nfs.h>
  59 #include <nfs/export.h>
  60 #include <nfs/nfs_cmd.h>
  61 
  62 #include <sys/strsubr.h>
  63 #include <sys/tsol/label.h>
  64 #include <sys/tsol/tndb.h>
  65 
  66 #include <sys/zone.h>
  67 
  68 #include <inet/ip.h>
  69 #include <inet/ip6.h>
  70 
  71 /*
  72  * Zone global variables of NFSv3 server
  73  */
  74 typedef struct nfs3_srv {
  75         writeverf3      write3verf;
  76 } nfs3_srv_t;
  77 
  78 /*
  79  * These are the interface routines for the server side of the
  80  * Network File System.  See the NFS version 3 protocol specification
  81  * for a description of this interface.
  82  */
  83 
  84 static int      sattr3_to_vattr(sattr3 *, struct vattr *);
  85 static int      vattr_to_fattr3(struct vattr *, fattr3 *);
  86 static int      vattr_to_wcc_attr(struct vattr *, wcc_attr *);
  87 static void     vattr_to_pre_op_attr(struct vattr *, pre_op_attr *);
  88 static void     vattr_to_wcc_data(struct vattr *, struct vattr *, wcc_data *);
  89 static int      rdma_setup_read_data3(READ3args *, READ3resok *);
  90 
  91 extern int nfs_loaned_buffers;
  92 
  93 u_longlong_t nfs3_srv_caller_id;
  94 
  95 static nfs3_srv_t *
  96 nfs3_get_srv(void)
  97 {
  98         nfs_globals_t *ng = zone_getspecific(nfssrv_zone_key, curzone);
  99         nfs3_srv_t *srv = ng->nfs3_srv;
 100         ASSERT(srv != NULL);
 101         return (srv);
 102 }
 103 
 104 /* ARGSUSED */
 105 void
 106 rfs3_getattr(GETATTR3args *args, GETATTR3res *resp, struct exportinfo *exi,
 107     struct svc_req *req, cred_t *cr, bool_t ro)
 108 {
 109         int error;
 110         vnode_t *vp;
 111         struct vattr va;
 112 
 113         vp = nfs3_fhtovp(&args->object, exi);
 114 
 115         DTRACE_NFSV3_5(op__getattr__start, struct svc_req *, req,
 116             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
 117             GETATTR3args *, args);
 118 
 119         if (vp == NULL) {
 120                 error = ESTALE;
 121                 goto out;
 122         }
 123 
 124         va.va_mask = AT_ALL;
 125         error = rfs4_delegated_getattr(vp, &va, 0, cr);
 126 
 127         if (!error) {
 128                 /* Lie about the object type for a referral */
 129                 if (vn_is_nfs_reparse(vp, cr))
 130                         va.va_type = VLNK;
 131 
 132                 /* overflow error if time or size is out of range */
 133                 error = vattr_to_fattr3(&va, &resp->resok.obj_attributes);
 134                 if (error)
 135                         goto out;
 136                 resp->status = NFS3_OK;
 137 
 138                 DTRACE_NFSV3_5(op__getattr__done, struct svc_req *, req,
 139                     cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
 140                     GETATTR3res *, resp);
 141 
 142                 VN_RELE(vp);
 143 
 144                 return;
 145         }
 146 
 147 out:
 148         if (curthread->t_flag & T_WOULDBLOCK) {
 149                 curthread->t_flag &= ~T_WOULDBLOCK;
 150                 resp->status = NFS3ERR_JUKEBOX;
 151         } else
 152                 resp->status = puterrno3(error);
 153 
 154         DTRACE_NFSV3_5(op__getattr__done, struct svc_req *, req,
 155             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
 156             GETATTR3res *, resp);
 157 
 158         if (vp != NULL)
 159                 VN_RELE(vp);
 160 }
 161 
 162 void *
 163 rfs3_getattr_getfh(GETATTR3args *args)
 164 {
 165 
 166         return (&args->object);
 167 }
 168 
 169 void
 170 rfs3_setattr(SETATTR3args *args, SETATTR3res *resp, struct exportinfo *exi,
 171     struct svc_req *req, cred_t *cr, bool_t ro)
 172 {
 173         int error;
 174         vnode_t *vp;
 175         struct vattr *bvap;
 176         struct vattr bva;
 177         struct vattr *avap;
 178         struct vattr ava;
 179         int flag;
 180         int in_crit = 0;
 181         struct flock64 bf;
 182         caller_context_t ct;
 183 
 184         bvap = NULL;
 185         avap = NULL;
 186 
 187         vp = nfs3_fhtovp(&args->object, exi);
 188 
 189         DTRACE_NFSV3_5(op__setattr__start, struct svc_req *, req,
 190             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
 191             SETATTR3args *, args);
 192 
 193         if (vp == NULL) {
 194                 error = ESTALE;
 195                 goto out;
 196         }
 197 
 198         error = sattr3_to_vattr(&args->new_attributes, &ava);
 199         if (error)
 200                 goto out;
 201 
 202         if (is_system_labeled()) {
 203                 bslabel_t *clabel = req->rq_label;
 204 
 205                 ASSERT(clabel != NULL);
 206                 DTRACE_PROBE2(tx__rfs3__log__info__opsetattr__clabel, char *,
 207                     "got client label from request(1)", struct svc_req *, req);
 208 
 209                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
 210                         if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
 211                             exi)) {
 212                                 resp->status = NFS3ERR_ACCES;
 213                                 goto out1;
 214                         }
 215                 }
 216         }
 217 
 218         /*
 219          * We need to specially handle size changes because of
 220          * possible conflicting NBMAND locks. Get into critical
 221          * region before VOP_GETATTR, so the size attribute is
 222          * valid when checking conflicts.
 223          *
 224          * Also, check to see if the v4 side of the server has
 225          * delegated this file.  If so, then we return JUKEBOX to
 226          * allow the client to retrasmit its request.
 227          */
 228         if (vp->v_type == VREG && (ava.va_mask & AT_SIZE)) {
 229                 if (nbl_need_check(vp)) {
 230                         nbl_start_crit(vp, RW_READER);
 231                         in_crit = 1;
 232                 }
 233         }
 234 
 235         bva.va_mask = AT_ALL;
 236         error = rfs4_delegated_getattr(vp, &bva, 0, cr);
 237 
 238         /*
 239          * If we can't get the attributes, then we can't do the
 240          * right access checking.  So, we'll fail the request.
 241          */
 242         if (error)
 243                 goto out;
 244 
 245         bvap = &bva;
 246 
 247         if (rdonly(ro, vp)) {
 248                 resp->status = NFS3ERR_ROFS;
 249                 goto out1;
 250         }
 251 
 252         if (args->guard.check &&
 253             (args->guard.obj_ctime.seconds != bva.va_ctime.tv_sec ||
 254             args->guard.obj_ctime.nseconds != bva.va_ctime.tv_nsec)) {
 255                 resp->status = NFS3ERR_NOT_SYNC;
 256                 goto out1;
 257         }
 258 
 259         if (args->new_attributes.mtime.set_it == SET_TO_CLIENT_TIME)
 260                 flag = ATTR_UTIME;
 261         else
 262                 flag = 0;
 263 
 264         /*
 265          * If the filesystem is exported with nosuid, then mask off
 266          * the setuid and setgid bits.
 267          */
 268         if ((ava.va_mask & AT_MODE) && vp->v_type == VREG &&
 269             (exi->exi_export.ex_flags & EX_NOSUID))
 270                 ava.va_mode &= ~(VSUID | VSGID);
 271 
 272         ct.cc_sysid = 0;
 273         ct.cc_pid = 0;
 274         ct.cc_caller_id = nfs3_srv_caller_id;
 275         ct.cc_flags = CC_DONTBLOCK;
 276 
 277         /*
 278          * We need to specially handle size changes because it is
 279          * possible for the client to create a file with modes
 280          * which indicate read-only, but with the file opened for
 281          * writing.  If the client then tries to set the size of
 282          * the file, then the normal access checking done in
 283          * VOP_SETATTR would prevent the client from doing so,
 284          * although it should be legal for it to do so.  To get
 285          * around this, we do the access checking for ourselves
 286          * and then use VOP_SPACE which doesn't do the access
 287          * checking which VOP_SETATTR does. VOP_SPACE can only
 288          * operate on VREG files, let VOP_SETATTR handle the other
 289          * extremely rare cases.
 290          * Also the client should not be allowed to change the
 291          * size of the file if there is a conflicting non-blocking
 292          * mandatory lock in the region the change.
 293          */
 294         if (vp->v_type == VREG && (ava.va_mask & AT_SIZE)) {
 295                 if (in_crit) {
 296                         u_offset_t offset;
 297                         ssize_t length;
 298 
 299                         if (ava.va_size < bva.va_size) {
 300                                 offset = ava.va_size;
 301                                 length = bva.va_size - ava.va_size;
 302                         } else {
 303                                 offset = bva.va_size;
 304                                 length = ava.va_size - bva.va_size;
 305                         }
 306                         if (nbl_conflict(vp, NBL_WRITE, offset, length, 0,
 307                             NULL)) {
 308                                 error = EACCES;
 309                                 goto out;
 310                         }
 311                 }
 312 
 313                 if (crgetuid(cr) == bva.va_uid && ava.va_size != bva.va_size) {
 314                         ava.va_mask &= ~AT_SIZE;
 315                         bf.l_type = F_WRLCK;
 316                         bf.l_whence = 0;
 317                         bf.l_start = (off64_t)ava.va_size;
 318                         bf.l_len = 0;
 319                         bf.l_sysid = 0;
 320                         bf.l_pid = 0;
 321                         error = VOP_SPACE(vp, F_FREESP, &bf, FWRITE,
 322                             (offset_t)ava.va_size, cr, &ct);
 323                 }
 324         }
 325 
 326         if (!error && ava.va_mask)
 327                 error = VOP_SETATTR(vp, &ava, flag, cr, &ct);
 328 
 329         /* check if a monitor detected a delegation conflict */
 330         if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
 331                 resp->status = NFS3ERR_JUKEBOX;
 332                 goto out1;
 333         }
 334 
 335         ava.va_mask = AT_ALL;
 336         avap = rfs4_delegated_getattr(vp, &ava, 0, cr) ? NULL : &ava;
 337 
 338         /*
 339          * Force modified metadata out to stable storage.
 340          */
 341         (void) VOP_FSYNC(vp, FNODSYNC, cr, &ct);
 342 
 343         if (error)
 344                 goto out;
 345 
 346         if (in_crit)
 347                 nbl_end_crit(vp);
 348 
 349         resp->status = NFS3_OK;
 350         vattr_to_wcc_data(bvap, avap, &resp->resok.obj_wcc);
 351 
 352         DTRACE_NFSV3_5(op__setattr__done, struct svc_req *, req,
 353             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
 354             SETATTR3res *, resp);
 355 
 356         VN_RELE(vp);
 357 
 358         return;
 359 
 360 out:
 361         if (curthread->t_flag & T_WOULDBLOCK) {
 362                 curthread->t_flag &= ~T_WOULDBLOCK;
 363                 resp->status = NFS3ERR_JUKEBOX;
 364         } else
 365                 resp->status = puterrno3(error);
 366 out1:
 367         DTRACE_NFSV3_5(op__setattr__done, struct svc_req *, req,
 368             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
 369             SETATTR3res *, resp);
 370 
 371         if (vp != NULL) {
 372                 if (in_crit)
 373                         nbl_end_crit(vp);
 374                 VN_RELE(vp);
 375         }
 376         vattr_to_wcc_data(bvap, avap, &resp->resfail.obj_wcc);
 377 }
 378 
 379 void *
 380 rfs3_setattr_getfh(SETATTR3args *args)
 381 {
 382 
 383         return (&args->object);
 384 }
 385 
 386 /* ARGSUSED */
 387 void
 388 rfs3_lookup(LOOKUP3args *args, LOOKUP3res *resp, struct exportinfo *exi,
 389     struct svc_req *req, cred_t *cr, bool_t ro)
 390 {
 391         int error;
 392         vnode_t *vp;
 393         vnode_t *dvp;
 394         struct vattr *vap;
 395         struct vattr va;
 396         struct vattr *dvap;
 397         struct vattr dva;
 398         nfs_fh3 *fhp;
 399         struct sec_ol sec = {0, 0};
 400         bool_t publicfh_flag = FALSE, auth_weak = FALSE;
 401         struct sockaddr *ca;
 402         char *name = NULL;
 403 
 404         dvap = NULL;
 405 
 406         if (exi != NULL)
 407                 exi_hold(exi);
 408 
 409         /*
 410          * Allow lookups from the root - the default
 411          * location of the public filehandle.
 412          */
 413         if (exi != NULL && (exi->exi_export.ex_flags & EX_PUBLIC)) {
 414                 ASSERT3U(exi->exi_zoneid, ==, curzone->zone_id);
 415                 dvp = ZONE_ROOTVP();
 416                 VN_HOLD(dvp);
 417 
 418                 DTRACE_NFSV3_5(op__lookup__start, struct svc_req *, req,
 419                     cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
 420                     LOOKUP3args *, args);
 421         } else {
 422                 dvp = nfs3_fhtovp(&args->what.dir, exi);
 423 
 424                 DTRACE_NFSV3_5(op__lookup__start, struct svc_req *, req,
 425                     cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
 426                     LOOKUP3args *, args);
 427 
 428                 if (dvp == NULL) {
 429                         error = ESTALE;
 430                         goto out;
 431                 }
 432         }
 433 
 434         dva.va_mask = AT_ALL;
 435         dvap = VOP_GETATTR(dvp, &dva, 0, cr, NULL) ? NULL : &dva;
 436 
 437         if (args->what.name == nfs3nametoolong) {
 438                 resp->status = NFS3ERR_NAMETOOLONG;
 439                 goto out1;
 440         }
 441 
 442         if (args->what.name == NULL || *(args->what.name) == '\0') {
 443                 resp->status = NFS3ERR_ACCES;
 444                 goto out1;
 445         }
 446 
 447         fhp = &args->what.dir;
 448         ASSERT3U(curzone->zone_id, ==, exi->exi_zoneid); /* exi is non-NULL */
 449         if (strcmp(args->what.name, "..") == 0 &&
 450             EQFID(&exi->exi_fid, FH3TOFIDP(fhp))) {
 451                 if ((exi->exi_export.ex_flags & EX_NOHIDE) &&
 452                     ((dvp->v_flag & VROOT) || VN_IS_CURZONEROOT(dvp))) {
 453                         /*
 454                          * special case for ".." and 'nohide'exported root
 455                          */
 456                         if (rfs_climb_crossmnt(&dvp, &exi, cr) != 0) {
 457                                 resp->status = NFS3ERR_ACCES;
 458                                 goto out1;
 459                         }
 460                 } else {
 461                         resp->status = NFS3ERR_NOENT;
 462                         goto out1;
 463                 }
 464         }
 465 
 466         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
 467         name = nfscmd_convname(ca, exi, args->what.name,
 468             NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
 469 
 470         if (name == NULL) {
 471                 resp->status = NFS3ERR_ACCES;
 472                 goto out1;
 473         }
 474 
 475         /*
 476          * If the public filehandle is used then allow
 477          * a multi-component lookup
 478          */
 479         if (PUBLIC_FH3(&args->what.dir)) {
 480                 publicfh_flag = TRUE;
 481 
 482                 exi_rele(exi);
 483                 exi = NULL;
 484 
 485                 error = rfs_publicfh_mclookup(name, dvp, cr, &vp,
 486                     &exi, &sec);
 487 
 488                 /*
 489                  * Since WebNFS may bypass MOUNT, we need to ensure this
 490                  * request didn't come from an unlabeled admin_low client.
 491                  */
 492                 if (is_system_labeled() && error == 0) {
 493                         int             addr_type;
 494                         void            *ipaddr;
 495                         tsol_tpc_t      *tp;
 496 
 497                         if (ca->sa_family == AF_INET) {
 498                                 addr_type = IPV4_VERSION;
 499                                 ipaddr = &((struct sockaddr_in *)ca)->sin_addr;
 500                         } else if (ca->sa_family == AF_INET6) {
 501                                 addr_type = IPV6_VERSION;
 502                                 ipaddr = &((struct sockaddr_in6 *)
 503                                     ca)->sin6_addr;
 504                         }
 505                         tp = find_tpc(ipaddr, addr_type, B_FALSE);
 506                         if (tp == NULL || tp->tpc_tp.tp_doi !=
 507                             l_admin_low->tsl_doi || tp->tpc_tp.host_type !=
 508                             SUN_CIPSO) {
 509                                 VN_RELE(vp);
 510                                 error = EACCES;
 511                         }
 512                         if (tp != NULL)
 513                                 TPC_RELE(tp);
 514                 }
 515         } else {
 516                 error = VOP_LOOKUP(dvp, name, &vp,
 517                     NULL, 0, NULL, cr, NULL, NULL, NULL);
 518         }
 519 
 520         if (name != args->what.name)
 521                 kmem_free(name, MAXPATHLEN + 1);
 522 
 523         if (error == 0 && vn_ismntpt(vp)) {
 524                 error = rfs_cross_mnt(&vp, &exi);
 525                 if (error)
 526                         VN_RELE(vp);
 527         }
 528 
 529         if (is_system_labeled() && error == 0) {
 530                 bslabel_t *clabel = req->rq_label;
 531 
 532                 ASSERT(clabel != NULL);
 533                 DTRACE_PROBE2(tx__rfs3__log__info__oplookup__clabel, char *,
 534                     "got client label from request(1)", struct svc_req *, req);
 535 
 536                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
 537                         if (!do_rfs_label_check(clabel, dvp,
 538                             DOMINANCE_CHECK, exi)) {
 539                                 VN_RELE(vp);
 540                                 error = EACCES;
 541                         }
 542                 }
 543         }
 544 
 545         dva.va_mask = AT_ALL;
 546         dvap = VOP_GETATTR(dvp, &dva, 0, cr, NULL) ? NULL : &dva;
 547 
 548         if (error)
 549                 goto out;
 550 
 551         if (sec.sec_flags & SEC_QUERY) {
 552                 error = makefh3_ol(&resp->resok.object, exi, sec.sec_index);
 553         } else {
 554                 error = makefh3(&resp->resok.object, vp, exi);
 555                 if (!error && publicfh_flag && !chk_clnt_sec(exi, req))
 556                         auth_weak = TRUE;
 557         }
 558 
 559         if (error) {
 560                 VN_RELE(vp);
 561                 goto out;
 562         }
 563 
 564         va.va_mask = AT_ALL;
 565         vap = rfs4_delegated_getattr(vp, &va, 0, cr) ? NULL : &va;
 566 
 567         VN_RELE(vp);
 568 
 569         resp->status = NFS3_OK;
 570         vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
 571         vattr_to_post_op_attr(dvap, &resp->resok.dir_attributes);
 572 
 573         /*
 574          * If it's public fh, no 0x81, and client's flavor is
 575          * invalid, set WebNFS status to WNFSERR_CLNT_FLAVOR now.
 576          * Then set RPC status to AUTH_TOOWEAK in common_dispatch.
 577          */
 578         if (auth_weak)
 579                 resp->status = (enum nfsstat3)WNFSERR_CLNT_FLAVOR;
 580 
 581         DTRACE_NFSV3_5(op__lookup__done, struct svc_req *, req,
 582             cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
 583             LOOKUP3res *, resp);
 584         VN_RELE(dvp);
 585         exi_rele(exi);
 586 
 587         return;
 588 
 589 out:
 590         if (curthread->t_flag & T_WOULDBLOCK) {
 591                 curthread->t_flag &= ~T_WOULDBLOCK;
 592                 resp->status = NFS3ERR_JUKEBOX;
 593         } else
 594                 resp->status = puterrno3(error);
 595 out1:
 596         DTRACE_NFSV3_5(op__lookup__done, struct svc_req *, req,
 597             cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
 598             LOOKUP3res *, resp);
 599 
 600         if (exi != NULL)
 601                 exi_rele(exi);
 602 
 603         if (dvp != NULL)
 604                 VN_RELE(dvp);
 605         vattr_to_post_op_attr(dvap, &resp->resfail.dir_attributes);
 606 
 607 }
 608 
 609 void *
 610 rfs3_lookup_getfh(LOOKUP3args *args)
 611 {
 612 
 613         return (&args->what.dir);
 614 }
 615 
 616 /* ARGSUSED */
 617 void
 618 rfs3_access(ACCESS3args *args, ACCESS3res *resp, struct exportinfo *exi,
 619     struct svc_req *req, cred_t *cr, bool_t ro)
 620 {
 621         int error;
 622         vnode_t *vp;
 623         struct vattr *vap;
 624         struct vattr va;
 625         int checkwriteperm;
 626         boolean_t dominant_label = B_FALSE;
 627         boolean_t equal_label = B_FALSE;
 628         boolean_t admin_low_client;
 629 
 630         vap = NULL;
 631 
 632         vp = nfs3_fhtovp(&args->object, exi);
 633 
 634         DTRACE_NFSV3_5(op__access__start, struct svc_req *, req,
 635             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
 636             ACCESS3args *, args);
 637 
 638         if (vp == NULL) {
 639                 error = ESTALE;
 640                 goto out;
 641         }
 642 
 643         /*
 644          * If the file system is exported read only, it is not appropriate
 645          * to check write permissions for regular files and directories.
 646          * Special files are interpreted by the client, so the underlying
 647          * permissions are sent back to the client for interpretation.
 648          */
 649         if (rdonly(ro, vp) && (vp->v_type == VREG || vp->v_type == VDIR))
 650                 checkwriteperm = 0;
 651         else
 652                 checkwriteperm = 1;
 653 
 654         /*
 655          * We need the mode so that we can correctly determine access
 656          * permissions relative to a mandatory lock file.  Access to
 657          * mandatory lock files is denied on the server, so it might
 658          * as well be reflected to the server during the open.
 659          */
 660         va.va_mask = AT_MODE;
 661         error = VOP_GETATTR(vp, &va, 0, cr, NULL);
 662         if (error)
 663                 goto out;
 664 
 665         vap = &va;
 666 
 667         resp->resok.access = 0;
 668 
 669         if (is_system_labeled()) {
 670                 bslabel_t *clabel = req->rq_label;
 671 
 672                 ASSERT(clabel != NULL);
 673                 DTRACE_PROBE2(tx__rfs3__log__info__opaccess__clabel, char *,
 674                     "got client label from request(1)", struct svc_req *, req);
 675 
 676                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
 677                         if ((equal_label = do_rfs_label_check(clabel, vp,
 678                             EQUALITY_CHECK, exi)) == B_FALSE) {
 679                                 dominant_label = do_rfs_label_check(clabel,
 680                                     vp, DOMINANCE_CHECK, exi);
 681                         } else
 682                                 dominant_label = B_TRUE;
 683                         admin_low_client = B_FALSE;
 684                 } else
 685                         admin_low_client = B_TRUE;
 686         }
 687 
 688         if (args->access & ACCESS3_READ) {
 689                 error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
 690                 if (error) {
 691                         if (curthread->t_flag & T_WOULDBLOCK)
 692                                 goto out;
 693                 } else if (!MANDLOCK(vp, va.va_mode) &&
 694                     (!is_system_labeled() || admin_low_client ||
 695                     dominant_label))
 696                         resp->resok.access |= ACCESS3_READ;
 697         }
 698         if ((args->access & ACCESS3_LOOKUP) && vp->v_type == VDIR) {
 699                 error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL);
 700                 if (error) {
 701                         if (curthread->t_flag & T_WOULDBLOCK)
 702                                 goto out;
 703                 } else if (!is_system_labeled() || admin_low_client ||
 704                     dominant_label)
 705                         resp->resok.access |= ACCESS3_LOOKUP;
 706         }
 707         if (checkwriteperm &&
 708             (args->access & (ACCESS3_MODIFY|ACCESS3_EXTEND))) {
 709                 error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
 710                 if (error) {
 711                         if (curthread->t_flag & T_WOULDBLOCK)
 712                                 goto out;
 713                 } else if (!MANDLOCK(vp, va.va_mode) &&
 714                     (!is_system_labeled() || admin_low_client || equal_label)) {
 715                         resp->resok.access |=
 716                             (args->access & (ACCESS3_MODIFY|ACCESS3_EXTEND));
 717                 }
 718         }
 719         if (checkwriteperm &&
 720             (args->access & ACCESS3_DELETE) && vp->v_type == VDIR) {
 721                 error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
 722                 if (error) {
 723                         if (curthread->t_flag & T_WOULDBLOCK)
 724                                 goto out;
 725                 } else if (!is_system_labeled() || admin_low_client ||
 726                     equal_label)
 727                         resp->resok.access |= ACCESS3_DELETE;
 728         }
 729         if (args->access & ACCESS3_EXECUTE) {
 730                 error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL);
 731                 if (error) {
 732                         if (curthread->t_flag & T_WOULDBLOCK)
 733                                 goto out;
 734                 } else if (!MANDLOCK(vp, va.va_mode) &&
 735                     (!is_system_labeled() || admin_low_client ||
 736                     dominant_label))
 737                         resp->resok.access |= ACCESS3_EXECUTE;
 738         }
 739 
 740         va.va_mask = AT_ALL;
 741         vap = rfs4_delegated_getattr(vp, &va, 0, cr) ? NULL : &va;
 742 
 743         resp->status = NFS3_OK;
 744         vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
 745 
 746         DTRACE_NFSV3_5(op__access__done, struct svc_req *, req,
 747             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
 748             ACCESS3res *, resp);
 749 
 750         VN_RELE(vp);
 751 
 752         return;
 753 
 754 out:
 755         if (curthread->t_flag & T_WOULDBLOCK) {
 756                 curthread->t_flag &= ~T_WOULDBLOCK;
 757                 resp->status = NFS3ERR_JUKEBOX;
 758         } else
 759                 resp->status = puterrno3(error);
 760         DTRACE_NFSV3_5(op__access__done, struct svc_req *, req,
 761             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
 762             ACCESS3res *, resp);
 763         if (vp != NULL)
 764                 VN_RELE(vp);
 765         vattr_to_post_op_attr(vap, &resp->resfail.obj_attributes);
 766 }
 767 
 768 void *
 769 rfs3_access_getfh(ACCESS3args *args)
 770 {
 771 
 772         return (&args->object);
 773 }
 774 
 775 /* ARGSUSED */
 776 void
 777 rfs3_readlink(READLINK3args *args, READLINK3res *resp, struct exportinfo *exi,
 778     struct svc_req *req, cred_t *cr, bool_t ro)
 779 {
 780         int error;
 781         vnode_t *vp;
 782         struct vattr *vap;
 783         struct vattr va;
 784         struct iovec iov;
 785         struct uio uio;
 786         char *data;
 787         struct sockaddr *ca;
 788         char *name = NULL;
 789         int is_referral = 0;
 790 
 791         vap = NULL;
 792 
 793         vp = nfs3_fhtovp(&args->symlink, exi);
 794 
 795         DTRACE_NFSV3_5(op__readlink__start, struct svc_req *, req,
 796             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
 797             READLINK3args *, args);
 798 
 799         if (vp == NULL) {
 800                 error = ESTALE;
 801                 goto out;
 802         }
 803 
 804         va.va_mask = AT_ALL;
 805         error = VOP_GETATTR(vp, &va, 0, cr, NULL);
 806         if (error)
 807                 goto out;
 808 
 809         vap = &va;
 810 
 811         /* We lied about the object type for a referral */
 812         if (vn_is_nfs_reparse(vp, cr))
 813                 is_referral = 1;
 814 
 815         if (vp->v_type != VLNK && !is_referral) {
 816                 resp->status = NFS3ERR_INVAL;
 817                 goto out1;
 818         }
 819 
 820         if (MANDLOCK(vp, va.va_mode)) {
 821                 resp->status = NFS3ERR_ACCES;
 822                 goto out1;
 823         }
 824 
 825         if (is_system_labeled()) {
 826                 bslabel_t *clabel = req->rq_label;
 827 
 828                 ASSERT(clabel != NULL);
 829                 DTRACE_PROBE2(tx__rfs3__log__info__opreadlink__clabel, char *,
 830                     "got client label from request(1)", struct svc_req *, req);
 831 
 832                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
 833                         if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
 834                             exi)) {
 835                                 resp->status = NFS3ERR_ACCES;
 836                                 goto out1;
 837                         }
 838                 }
 839         }
 840 
 841         data = kmem_alloc(MAXPATHLEN + 1, KM_SLEEP);
 842 
 843         if (is_referral) {
 844                 char *s;
 845                 size_t strsz;
 846 
 847                 /* Get an artificial symlink based on a referral */
 848                 s = build_symlink(vp, cr, &strsz);
 849                 global_svstat_ptr[3][NFS_REFERLINKS].value.ui64++;
 850                 DTRACE_PROBE2(nfs3serv__func__referral__reflink,
 851                     vnode_t *, vp, char *, s);
 852                 if (s == NULL)
 853                         error = EINVAL;
 854                 else {
 855                         error = 0;
 856                         (void) strlcpy(data, s, MAXPATHLEN + 1);
 857                         kmem_free(s, strsz);
 858                 }
 859 
 860         } else {
 861 
 862                 iov.iov_base = data;
 863                 iov.iov_len = MAXPATHLEN;
 864                 uio.uio_iov = &iov;
 865                 uio.uio_iovcnt = 1;
 866                 uio.uio_segflg = UIO_SYSSPACE;
 867                 uio.uio_extflg = UIO_COPY_CACHED;
 868                 uio.uio_loffset = 0;
 869                 uio.uio_resid = MAXPATHLEN;
 870 
 871                 error = VOP_READLINK(vp, &uio, cr, NULL);
 872 
 873                 if (!error)
 874                         *(data + MAXPATHLEN - uio.uio_resid) = '\0';
 875         }
 876 
 877         va.va_mask = AT_ALL;
 878         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
 879 
 880         /* Lie about object type again just to be consistent */
 881         if (is_referral && vap != NULL)
 882                 vap->va_type = VLNK;
 883 
 884 #if 0 /* notyet */
 885         /*
 886          * Don't do this.  It causes local disk writes when just
 887          * reading the file and the overhead is deemed larger
 888          * than the benefit.
 889          */
 890         /*
 891          * Force modified metadata out to stable storage.
 892          */
 893         (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
 894 #endif
 895 
 896         if (error) {
 897                 kmem_free(data, MAXPATHLEN + 1);
 898                 goto out;
 899         }
 900 
 901         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
 902         name = nfscmd_convname(ca, exi, data, NFSCMD_CONV_OUTBOUND,
 903             MAXPATHLEN + 1);
 904 
 905         if (name == NULL) {
 906                 /*
 907                  * Even though the conversion failed, we return
 908                  * something. We just don't translate it.
 909                  */
 910                 name = data;
 911         }
 912 
 913         resp->status = NFS3_OK;
 914         vattr_to_post_op_attr(vap, &resp->resok.symlink_attributes);
 915         resp->resok.data = name;
 916 
 917         DTRACE_NFSV3_5(op__readlink__done, struct svc_req *, req,
 918             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
 919             READLINK3res *, resp);
 920         VN_RELE(vp);
 921 
 922         if (name != data)
 923                 kmem_free(data, MAXPATHLEN + 1);
 924 
 925         return;
 926 
 927 out:
 928         if (curthread->t_flag & T_WOULDBLOCK) {
 929                 curthread->t_flag &= ~T_WOULDBLOCK;
 930                 resp->status = NFS3ERR_JUKEBOX;
 931         } else
 932                 resp->status = puterrno3(error);
 933 out1:
 934         DTRACE_NFSV3_5(op__readlink__done, struct svc_req *, req,
 935             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
 936             READLINK3res *, resp);
 937         if (vp != NULL)
 938                 VN_RELE(vp);
 939         vattr_to_post_op_attr(vap, &resp->resfail.symlink_attributes);
 940 }
 941 
 942 void *
 943 rfs3_readlink_getfh(READLINK3args *args)
 944 {
 945 
 946         return (&args->symlink);
 947 }
 948 
 949 void
 950 rfs3_readlink_free(READLINK3res *resp)
 951 {
 952 
 953         if (resp->status == NFS3_OK)
 954                 kmem_free(resp->resok.data, MAXPATHLEN + 1);
 955 }
 956 
 957 /*
 958  * Server routine to handle read
 959  * May handle RDMA data as well as mblks
 960  */
 961 /* ARGSUSED */
 962 void
 963 rfs3_read(READ3args *args, READ3res *resp, struct exportinfo *exi,
 964     struct svc_req *req, cred_t *cr, bool_t ro)
 965 {
 966         int error;
 967         vnode_t *vp;
 968         struct vattr *vap;
 969         struct vattr va;
 970         struct iovec iov, *iovp = NULL;
 971         int iovcnt;
 972         struct uio uio;
 973         u_offset_t offset;
 974         mblk_t *mp = NULL;
 975         int in_crit = 0;
 976         int need_rwunlock = 0;
 977         caller_context_t ct;
 978         int rdma_used = 0;
 979         int loaned_buffers;
 980         struct uio *uiop;
 981 
 982         vap = NULL;
 983 
 984         vp = nfs3_fhtovp(&args->file, exi);
 985 
 986         DTRACE_NFSV3_5(op__read__start, struct svc_req *, req,
 987             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
 988             READ3args *, args);
 989 
 990 
 991         if (vp == NULL) {
 992                 error = ESTALE;
 993                 goto out;
 994         }
 995 
 996         if (args->wlist) {
 997                 if (args->count > clist_len(args->wlist)) {
 998                         error = EINVAL;
 999                         goto out;
1000                 }
1001                 rdma_used = 1;
1002         }
1003 
1004         /* use loaned buffers for TCP */
1005         loaned_buffers = (nfs_loaned_buffers && !rdma_used) ? 1 : 0;
1006 
1007         if (is_system_labeled()) {
1008                 bslabel_t *clabel = req->rq_label;
1009 
1010                 ASSERT(clabel != NULL);
1011                 DTRACE_PROBE2(tx__rfs3__log__info__opread__clabel, char *,
1012                     "got client label from request(1)", struct svc_req *, req);
1013 
1014                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
1015                         if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
1016                             exi)) {
1017                                 resp->status = NFS3ERR_ACCES;
1018                                 goto out1;
1019                         }
1020                 }
1021         }
1022 
1023         ct.cc_sysid = 0;
1024         ct.cc_pid = 0;
1025         ct.cc_caller_id = nfs3_srv_caller_id;
1026         ct.cc_flags = CC_DONTBLOCK;
1027 
1028         /*
1029          * Enter the critical region before calling VOP_RWLOCK
1030          * to avoid a deadlock with write requests.
1031          */
1032         if (nbl_need_check(vp)) {
1033                 nbl_start_crit(vp, RW_READER);
1034                 in_crit = 1;
1035                 if (nbl_conflict(vp, NBL_READ, args->offset, args->count, 0,
1036                     NULL)) {
1037                         error = EACCES;
1038                         goto out;
1039                 }
1040         }
1041 
1042         error = VOP_RWLOCK(vp, V_WRITELOCK_FALSE, &ct);
1043 
1044         /* check if a monitor detected a delegation conflict */
1045         if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1046                 resp->status = NFS3ERR_JUKEBOX;
1047                 goto out1;
1048         }
1049 
1050         need_rwunlock = 1;
1051 
1052         va.va_mask = AT_ALL;
1053         error = VOP_GETATTR(vp, &va, 0, cr, &ct);
1054 
1055         /*
1056          * If we can't get the attributes, then we can't do the
1057          * right access checking.  So, we'll fail the request.
1058          */
1059         if (error)
1060                 goto out;
1061 
1062         vap = &va;
1063 
1064         if (vp->v_type != VREG) {
1065                 resp->status = NFS3ERR_INVAL;
1066                 goto out1;
1067         }
1068 
1069         if (crgetuid(cr) != va.va_uid) {
1070                 error = VOP_ACCESS(vp, VREAD, 0, cr, &ct);
1071                 if (error) {
1072                         if (curthread->t_flag & T_WOULDBLOCK)
1073                                 goto out;
1074                         error = VOP_ACCESS(vp, VEXEC, 0, cr, &ct);
1075                         if (error)
1076                                 goto out;
1077                 }
1078         }
1079 
1080         if (MANDLOCK(vp, va.va_mode)) {
1081                 resp->status = NFS3ERR_ACCES;
1082                 goto out1;
1083         }
1084 
1085         offset = args->offset;
1086         if (offset >= va.va_size) {
1087                 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
1088                 if (in_crit)
1089                         nbl_end_crit(vp);
1090                 resp->status = NFS3_OK;
1091                 vattr_to_post_op_attr(vap, &resp->resok.file_attributes);
1092                 resp->resok.count = 0;
1093                 resp->resok.eof = TRUE;
1094                 resp->resok.data.data_len = 0;
1095                 resp->resok.data.data_val = NULL;
1096                 resp->resok.data.mp = NULL;
1097                 /* RDMA */
1098                 resp->resok.wlist = args->wlist;
1099                 resp->resok.wlist_len = resp->resok.count;
1100                 if (resp->resok.wlist)
1101                         clist_zero_len(resp->resok.wlist);
1102                 goto done;
1103         }
1104 
1105         if (args->count == 0) {
1106                 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
1107                 if (in_crit)
1108                         nbl_end_crit(vp);
1109                 resp->status = NFS3_OK;
1110                 vattr_to_post_op_attr(vap, &resp->resok.file_attributes);
1111                 resp->resok.count = 0;
1112                 resp->resok.eof = FALSE;
1113                 resp->resok.data.data_len = 0;
1114                 resp->resok.data.data_val = NULL;
1115                 resp->resok.data.mp = NULL;
1116                 /* RDMA */
1117                 resp->resok.wlist = args->wlist;
1118                 resp->resok.wlist_len = resp->resok.count;
1119                 if (resp->resok.wlist)
1120                         clist_zero_len(resp->resok.wlist);
1121                 goto done;
1122         }
1123 
1124         /*
1125          * do not allocate memory more the max. allowed
1126          * transfer size
1127          */
1128         if (args->count > rfs3_tsize(req))
1129                 args->count = rfs3_tsize(req);
1130 
1131         if (loaned_buffers) {
1132                 uiop = (uio_t *)rfs_setup_xuio(vp);
1133                 ASSERT(uiop != NULL);
1134                 uiop->uio_segflg = UIO_SYSSPACE;
1135                 uiop->uio_loffset = args->offset;
1136                 uiop->uio_resid = args->count;
1137 
1138                 /* Jump to do the read if successful */
1139                 if (VOP_REQZCBUF(vp, UIO_READ, (xuio_t *)uiop, cr, &ct) == 0) {
1140                         /*
1141                          * Need to hold the vnode until after VOP_RETZCBUF()
1142                          * is called.
1143                          */
1144                         VN_HOLD(vp);
1145                         goto doio_read;
1146                 }
1147 
1148                 DTRACE_PROBE2(nfss__i__reqzcbuf_failed, int,
1149                     uiop->uio_loffset, int, uiop->uio_resid);
1150 
1151                 uiop->uio_extflg = 0;
1152                 /* failure to setup for zero copy */
1153                 rfs_free_xuio((void *)uiop);
1154                 loaned_buffers = 0;
1155         }
1156 
1157         /*
1158          * If returning data via RDMA Write, then grab the chunk list.
1159          * If we aren't returning READ data w/RDMA_WRITE, then grab
1160          * a mblk.
1161          */
1162         if (rdma_used) {
1163                 (void) rdma_get_wchunk(req, &iov, args->wlist);
1164                 uio.uio_iov = &iov;
1165                 uio.uio_iovcnt = 1;
1166         } else {
1167                 /*
1168                  * mp will contain the data to be sent out in the read reply.
1169                  * For UDP, this will be freed after the reply has been sent
1170                  * out by the driver.  For TCP, it will be freed after the last
1171                  * segment associated with the reply has been ACKed by the
1172                  * client.
1173                  */
1174                 mp = rfs_read_alloc(args->count, &iovp, &iovcnt);
1175                 uio.uio_iov = iovp;
1176                 uio.uio_iovcnt = iovcnt;
1177         }
1178 
1179         uio.uio_segflg = UIO_SYSSPACE;
1180         uio.uio_extflg = UIO_COPY_CACHED;
1181         uio.uio_loffset = args->offset;
1182         uio.uio_resid = args->count;
1183         uiop = &uio;
1184 
1185 doio_read:
1186         error = VOP_READ(vp, uiop, 0, cr, &ct);
1187 
1188         if (error) {
1189                 if (mp)
1190                         freemsg(mp);
1191                 /* check if a monitor detected a delegation conflict */
1192                 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1193                         resp->status = NFS3ERR_JUKEBOX;
1194                         goto out1;
1195                 }
1196                 goto out;
1197         }
1198 
1199         /* make mblk using zc buffers */
1200         if (loaned_buffers) {
1201                 mp = uio_to_mblk(uiop);
1202                 ASSERT(mp != NULL);
1203         }
1204 
1205         va.va_mask = AT_ALL;
1206         error = VOP_GETATTR(vp, &va, 0, cr, &ct);
1207 
1208         if (error)
1209                 vap = NULL;
1210         else
1211                 vap = &va;
1212 
1213         VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
1214 
1215         if (in_crit)
1216                 nbl_end_crit(vp);
1217 
1218         resp->status = NFS3_OK;
1219         vattr_to_post_op_attr(vap, &resp->resok.file_attributes);
1220         resp->resok.count = args->count - uiop->uio_resid;
1221         if (!error && offset + resp->resok.count == va.va_size)
1222                 resp->resok.eof = TRUE;
1223         else
1224                 resp->resok.eof = FALSE;
1225         resp->resok.data.data_len = resp->resok.count;
1226 
1227         if (mp)
1228                 rfs_rndup_mblks(mp, resp->resok.count, loaned_buffers);
1229 
1230         resp->resok.data.mp = mp;
1231         resp->resok.size = (uint_t)args->count;
1232 
1233         if (rdma_used) {
1234                 resp->resok.data.data_val = (caddr_t)iov.iov_base;
1235                 if (!rdma_setup_read_data3(args, &(resp->resok))) {
1236                         resp->status = NFS3ERR_INVAL;
1237                 }
1238         } else {
1239                 resp->resok.data.data_val = (caddr_t)mp->b_datap->db_base;
1240                 (resp->resok).wlist = NULL;
1241         }
1242 
1243 done:
1244         DTRACE_NFSV3_5(op__read__done, struct svc_req *, req,
1245             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
1246             READ3res *, resp);
1247 
1248         VN_RELE(vp);
1249 
1250         if (iovp != NULL)
1251                 kmem_free(iovp, iovcnt * sizeof (struct iovec));
1252 
1253         return;
1254 
1255 out:
1256         if (curthread->t_flag & T_WOULDBLOCK) {
1257                 curthread->t_flag &= ~T_WOULDBLOCK;
1258                 resp->status = NFS3ERR_JUKEBOX;
1259         } else
1260                 resp->status = puterrno3(error);
1261 out1:
1262         DTRACE_NFSV3_5(op__read__done, struct svc_req *, req,
1263             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
1264             READ3res *, resp);
1265 
1266         if (vp != NULL) {
1267                 if (need_rwunlock)
1268                         VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
1269                 if (in_crit)
1270                         nbl_end_crit(vp);
1271                 VN_RELE(vp);
1272         }
1273         vattr_to_post_op_attr(vap, &resp->resfail.file_attributes);
1274 
1275         if (iovp != NULL)
1276                 kmem_free(iovp, iovcnt * sizeof (struct iovec));
1277 }
1278 
1279 void
1280 rfs3_read_free(READ3res *resp)
1281 {
1282         mblk_t *mp;
1283 
1284         if (resp->status == NFS3_OK) {
1285                 mp = resp->resok.data.mp;
1286                 if (mp != NULL)
1287                         freemsg(mp);
1288         }
1289 }
1290 
1291 void *
1292 rfs3_read_getfh(READ3args *args)
1293 {
1294 
1295         return (&args->file);
1296 }
1297 
1298 #define MAX_IOVECS      12
1299 
1300 #ifdef DEBUG
1301 static int rfs3_write_hits = 0;
1302 static int rfs3_write_misses = 0;
1303 #endif
1304 
1305 void
1306 rfs3_write(WRITE3args *args, WRITE3res *resp, struct exportinfo *exi,
1307     struct svc_req *req, cred_t *cr, bool_t ro)
1308 {
1309         nfs3_srv_t *ns;
1310         int error;
1311         vnode_t *vp;
1312         struct vattr *bvap = NULL;
1313         struct vattr bva;
1314         struct vattr *avap = NULL;
1315         struct vattr ava;
1316         u_offset_t rlimit;
1317         struct uio uio;
1318         struct iovec iov[MAX_IOVECS];
1319         mblk_t *m;
1320         struct iovec *iovp;
1321         int iovcnt;
1322         int ioflag;
1323         cred_t *savecred;
1324         int in_crit = 0;
1325         int rwlock_ret = -1;
1326         caller_context_t ct;
1327 
1328         vp = nfs3_fhtovp(&args->file, exi);
1329 
1330         DTRACE_NFSV3_5(op__write__start, struct svc_req *, req,
1331             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
1332             WRITE3args *, args);
1333 
1334         if (vp == NULL) {
1335                 error = ESTALE;
1336                 goto err;
1337         }
1338 
1339         ASSERT3U(curzone->zone_id, ==, exi->exi_zoneid); /* exi is non-NULL. */
1340         ns = nfs3_get_srv();
1341 
1342         if (is_system_labeled()) {
1343                 bslabel_t *clabel = req->rq_label;
1344 
1345                 ASSERT(clabel != NULL);
1346                 DTRACE_PROBE2(tx__rfs3__log__info__opwrite__clabel, char *,
1347                     "got client label from request(1)", struct svc_req *, req);
1348 
1349                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
1350                         if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
1351                             exi)) {
1352                                 resp->status = NFS3ERR_ACCES;
1353                                 goto err1;
1354                         }
1355                 }
1356         }
1357 
1358         ct.cc_sysid = 0;
1359         ct.cc_pid = 0;
1360         ct.cc_caller_id = nfs3_srv_caller_id;
1361         ct.cc_flags = CC_DONTBLOCK;
1362 
1363         /*
1364          * We have to enter the critical region before calling VOP_RWLOCK
1365          * to avoid a deadlock with ufs.
1366          */
1367         if (nbl_need_check(vp)) {
1368                 nbl_start_crit(vp, RW_READER);
1369                 in_crit = 1;
1370                 if (nbl_conflict(vp, NBL_WRITE, args->offset, args->count, 0,
1371                     NULL)) {
1372                         error = EACCES;
1373                         goto err;
1374                 }
1375         }
1376 
1377         rwlock_ret = VOP_RWLOCK(vp, V_WRITELOCK_TRUE, &ct);
1378 
1379         /* check if a monitor detected a delegation conflict */
1380         if (rwlock_ret == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1381                 resp->status = NFS3ERR_JUKEBOX;
1382                 rwlock_ret = -1;
1383                 goto err1;
1384         }
1385 
1386 
1387         bva.va_mask = AT_ALL;
1388         error = VOP_GETATTR(vp, &bva, 0, cr, &ct);
1389 
1390         /*
1391          * If we can't get the attributes, then we can't do the
1392          * right access checking.  So, we'll fail the request.
1393          */
1394         if (error)
1395                 goto err;
1396 
1397         bvap = &bva;
1398         avap = bvap;
1399 
1400         if (args->count != args->data.data_len) {
1401                 resp->status = NFS3ERR_INVAL;
1402                 goto err1;
1403         }
1404 
1405         if (rdonly(ro, vp)) {
1406                 resp->status = NFS3ERR_ROFS;
1407                 goto err1;
1408         }
1409 
1410         if (vp->v_type != VREG) {
1411                 resp->status = NFS3ERR_INVAL;
1412                 goto err1;
1413         }
1414 
1415         if (crgetuid(cr) != bva.va_uid &&
1416             (error = VOP_ACCESS(vp, VWRITE, 0, cr, &ct)))
1417                 goto err;
1418 
1419         if (MANDLOCK(vp, bva.va_mode)) {
1420                 resp->status = NFS3ERR_ACCES;
1421                 goto err1;
1422         }
1423 
1424         if (args->count == 0) {
1425                 resp->status = NFS3_OK;
1426                 vattr_to_wcc_data(bvap, avap, &resp->resok.file_wcc);
1427                 resp->resok.count = 0;
1428                 resp->resok.committed = args->stable;
1429                 resp->resok.verf = ns->write3verf;
1430                 goto out;
1431         }
1432 
1433         if (args->mblk != NULL) {
1434                 iovcnt = 0;
1435                 for (m = args->mblk; m != NULL; m = m->b_cont)
1436                         iovcnt++;
1437                 if (iovcnt <= MAX_IOVECS) {
1438 #ifdef DEBUG
1439                         rfs3_write_hits++;
1440 #endif
1441                         iovp = iov;
1442                 } else {
1443 #ifdef DEBUG
1444                         rfs3_write_misses++;
1445 #endif
1446                         iovp = kmem_alloc(sizeof (*iovp) * iovcnt, KM_SLEEP);
1447                 }
1448                 mblk_to_iov(args->mblk, iovcnt, iovp);
1449 
1450         } else if (args->rlist != NULL) {
1451                 iovcnt = 1;
1452                 iovp = iov;
1453                 iovp->iov_base = (char *)((args->rlist)->u.c_daddr3);
1454                 iovp->iov_len = args->count;
1455         } else {
1456                 iovcnt = 1;
1457                 iovp = iov;
1458                 iovp->iov_base = args->data.data_val;
1459                 iovp->iov_len = args->count;
1460         }
1461 
1462         uio.uio_iov = iovp;
1463         uio.uio_iovcnt = iovcnt;
1464 
1465         uio.uio_segflg = UIO_SYSSPACE;
1466         uio.uio_extflg = UIO_COPY_DEFAULT;
1467         uio.uio_loffset = args->offset;
1468         uio.uio_resid = args->count;
1469         uio.uio_llimit = curproc->p_fsz_ctl;
1470         rlimit = uio.uio_llimit - args->offset;
1471         if (rlimit < (u_offset_t)uio.uio_resid)
1472                 uio.uio_resid = (int)rlimit;
1473 
1474         if (args->stable == UNSTABLE)
1475                 ioflag = 0;
1476         else if (args->stable == FILE_SYNC)
1477                 ioflag = FSYNC;
1478         else if (args->stable == DATA_SYNC)
1479                 ioflag = FDSYNC;
1480         else {
1481                 if (iovp != iov)
1482                         kmem_free(iovp, sizeof (*iovp) * iovcnt);
1483                 resp->status = NFS3ERR_INVAL;
1484                 goto err1;
1485         }
1486 
1487         /*
1488          * We're changing creds because VM may fault and we need
1489          * the cred of the current thread to be used if quota
1490          * checking is enabled.
1491          */
1492         savecred = curthread->t_cred;
1493         curthread->t_cred = cr;
1494         error = VOP_WRITE(vp, &uio, ioflag, cr, &ct);
1495         curthread->t_cred = savecred;
1496 
1497         if (iovp != iov)
1498                 kmem_free(iovp, sizeof (*iovp) * iovcnt);
1499 
1500         /* check if a monitor detected a delegation conflict */
1501         if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1502                 resp->status = NFS3ERR_JUKEBOX;
1503                 goto err1;
1504         }
1505 
1506         ava.va_mask = AT_ALL;
1507         avap = VOP_GETATTR(vp, &ava, 0, cr, &ct) ? NULL : &ava;
1508 
1509         if (error)
1510                 goto err;
1511 
1512         /*
1513          * If we were unable to get the V_WRITELOCK_TRUE, then we
1514          * may not have accurate after attrs, so check if
1515          * we have both attributes, they have a non-zero va_seq, and
1516          * va_seq has changed by exactly one,
1517          * if not, turn off the before attr.
1518          */
1519         if (rwlock_ret != V_WRITELOCK_TRUE) {
1520                 if (bvap == NULL || avap == NULL ||
1521                     bvap->va_seq == 0 || avap->va_seq == 0 ||
1522                     avap->va_seq != (bvap->va_seq + 1)) {
1523                         bvap = NULL;
1524                 }
1525         }
1526 
1527         resp->status = NFS3_OK;
1528         vattr_to_wcc_data(bvap, avap, &resp->resok.file_wcc);
1529         resp->resok.count = args->count - uio.uio_resid;
1530         resp->resok.committed = args->stable;
1531         resp->resok.verf = ns->write3verf;
1532         goto out;
1533 
1534 err:
1535         if (curthread->t_flag & T_WOULDBLOCK) {
1536                 curthread->t_flag &= ~T_WOULDBLOCK;
1537                 resp->status = NFS3ERR_JUKEBOX;
1538         } else
1539                 resp->status = puterrno3(error);
1540 err1:
1541         vattr_to_wcc_data(bvap, avap, &resp->resfail.file_wcc);
1542 out:
1543         DTRACE_NFSV3_5(op__write__done, struct svc_req *, req,
1544             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
1545             WRITE3res *, resp);
1546 
1547         if (vp != NULL) {
1548                 if (rwlock_ret != -1)
1549                         VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, &ct);
1550                 if (in_crit)
1551                         nbl_end_crit(vp);
1552                 VN_RELE(vp);
1553         }
1554 }
1555 
1556 void *
1557 rfs3_write_getfh(WRITE3args *args)
1558 {
1559 
1560         return (&args->file);
1561 }
1562 
1563 void
1564 rfs3_create(CREATE3args *args, CREATE3res *resp, struct exportinfo *exi,
1565     struct svc_req *req, cred_t *cr, bool_t ro)
1566 {
1567         int error;
1568         int in_crit = 0;
1569         vnode_t *vp;
1570         vnode_t *tvp = NULL;
1571         vnode_t *dvp;
1572         struct vattr *vap;
1573         struct vattr va;
1574         struct vattr *dbvap;
1575         struct vattr dbva;
1576         struct vattr *davap;
1577         struct vattr dava;
1578         enum vcexcl excl;
1579         nfstime3 *mtime;
1580         len_t reqsize;
1581         bool_t trunc;
1582         struct sockaddr *ca;
1583         char *name = NULL;
1584 
1585         dbvap = NULL;
1586         davap = NULL;
1587 
1588         dvp = nfs3_fhtovp(&args->where.dir, exi);
1589 
1590         DTRACE_NFSV3_5(op__create__start, struct svc_req *, req,
1591             cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
1592             CREATE3args *, args);
1593 
1594         if (dvp == NULL) {
1595                 error = ESTALE;
1596                 goto out;
1597         }
1598 
1599         dbva.va_mask = AT_ALL;
1600         dbvap = VOP_GETATTR(dvp, &dbva, 0, cr, NULL) ? NULL : &dbva;
1601         davap = dbvap;
1602 
1603         if (args->where.name == nfs3nametoolong) {
1604                 resp->status = NFS3ERR_NAMETOOLONG;
1605                 goto out1;
1606         }
1607 
1608         if (args->where.name == NULL || *(args->where.name) == '\0') {
1609                 resp->status = NFS3ERR_ACCES;
1610                 goto out1;
1611         }
1612 
1613         if (rdonly(ro, dvp)) {
1614                 resp->status = NFS3ERR_ROFS;
1615                 goto out1;
1616         }
1617 
1618         if (is_system_labeled()) {
1619                 bslabel_t *clabel = req->rq_label;
1620 
1621                 ASSERT(clabel != NULL);
1622                 DTRACE_PROBE2(tx__rfs3__log__info__opcreate__clabel, char *,
1623                     "got client label from request(1)", struct svc_req *, req);
1624 
1625                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
1626                         if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
1627                             exi)) {
1628                                 resp->status = NFS3ERR_ACCES;
1629                                 goto out1;
1630                         }
1631                 }
1632         }
1633 
1634         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
1635         name = nfscmd_convname(ca, exi, args->where.name,
1636             NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
1637 
1638         if (name == NULL) {
1639                 /* This is really a Solaris EILSEQ */
1640                 resp->status = NFS3ERR_INVAL;
1641                 goto out1;
1642         }
1643 
1644         if (args->how.mode == EXCLUSIVE) {
1645                 va.va_mask = AT_TYPE | AT_MODE | AT_MTIME;
1646                 va.va_type = VREG;
1647                 va.va_mode = (mode_t)0;
1648                 /*
1649                  * Ensure no time overflows and that types match
1650                  */
1651                 mtime = (nfstime3 *)&args->how.createhow3_u.verf;
1652                 va.va_mtime.tv_sec = mtime->seconds % INT32_MAX;
1653                 va.va_mtime.tv_nsec = mtime->nseconds;
1654                 excl = EXCL;
1655         } else {
1656                 error = sattr3_to_vattr(&args->how.createhow3_u.obj_attributes,
1657                     &va);
1658                 if (error)
1659                         goto out;
1660                 va.va_mask |= AT_TYPE;
1661                 va.va_type = VREG;
1662                 if (args->how.mode == GUARDED)
1663                         excl = EXCL;
1664                 else {
1665                         excl = NONEXCL;
1666 
1667                         /*
1668                          * During creation of file in non-exclusive mode
1669                          * if size of file is being set then make sure
1670                          * that if the file already exists that no conflicting
1671                          * non-blocking mandatory locks exists in the region
1672                          * being modified. If there are conflicting locks fail
1673                          * the operation with EACCES.
1674                          */
1675                         if (va.va_mask & AT_SIZE) {
1676                                 struct vattr tva;
1677 
1678                                 /*
1679                                  * Does file already exist?
1680                                  */
1681                                 error = VOP_LOOKUP(dvp, name, &tvp,
1682                                     NULL, 0, NULL, cr, NULL, NULL, NULL);
1683 
1684                                 /*
1685                                  * Check to see if the file has been delegated
1686                                  * to a v4 client.  If so, then begin recall of
1687                                  * the delegation and return JUKEBOX to allow
1688                                  * the client to retrasmit its request.
1689                                  */
1690 
1691                                 trunc = va.va_size == 0;
1692                                 if (!error &&
1693                                     rfs4_check_delegated(FWRITE, tvp, trunc)) {
1694                                         resp->status = NFS3ERR_JUKEBOX;
1695                                         goto out1;
1696                                 }
1697 
1698                                 /*
1699                                  * Check for NBMAND lock conflicts
1700                                  */
1701                                 if (!error && nbl_need_check(tvp)) {
1702                                         u_offset_t offset;
1703                                         ssize_t len;
1704 
1705                                         nbl_start_crit(tvp, RW_READER);
1706                                         in_crit = 1;
1707 
1708                                         tva.va_mask = AT_SIZE;
1709                                         error = VOP_GETATTR(tvp, &tva, 0, cr,
1710                                             NULL);
1711                                         /*
1712                                          * Can't check for conflicts, so return
1713                                          * error.
1714                                          */
1715                                         if (error)
1716                                                 goto out;
1717 
1718                                         offset = tva.va_size < va.va_size ?
1719                                             tva.va_size : va.va_size;
1720                                         len = tva.va_size < va.va_size ?
1721                                             va.va_size - tva.va_size :
1722                                             tva.va_size - va.va_size;
1723                                         if (nbl_conflict(tvp, NBL_WRITE,
1724                                             offset, len, 0, NULL)) {
1725                                                 error = EACCES;
1726                                                 goto out;
1727                                         }
1728                                 } else if (tvp) {
1729                                         VN_RELE(tvp);
1730                                         tvp = NULL;
1731                                 }
1732                         }
1733                 }
1734                 if (va.va_mask & AT_SIZE)
1735                         reqsize = va.va_size;
1736         }
1737 
1738         /*
1739          * Must specify the mode.
1740          */
1741         if (!(va.va_mask & AT_MODE)) {
1742                 resp->status = NFS3ERR_INVAL;
1743                 goto out1;
1744         }
1745 
1746         /*
1747          * If the filesystem is exported with nosuid, then mask off
1748          * the setuid and setgid bits.
1749          */
1750         if (va.va_type == VREG && (exi->exi_export.ex_flags & EX_NOSUID))
1751                 va.va_mode &= ~(VSUID | VSGID);
1752 
1753 tryagain:
1754         /*
1755          * The file open mode used is VWRITE.  If the client needs
1756          * some other semantic, then it should do the access checking
1757          * itself.  It would have been nice to have the file open mode
1758          * passed as part of the arguments.
1759          */
1760         error = VOP_CREATE(dvp, name, &va, excl, VWRITE,
1761             &vp, cr, 0, NULL, NULL);
1762 
1763         dava.va_mask = AT_ALL;
1764         davap = VOP_GETATTR(dvp, &dava, 0, cr, NULL) ? NULL : &dava;
1765 
1766         if (error) {
1767                 /*
1768                  * If we got something other than file already exists
1769                  * then just return this error.  Otherwise, we got
1770                  * EEXIST.  If we were doing a GUARDED create, then
1771                  * just return this error.  Otherwise, we need to
1772                  * make sure that this wasn't a duplicate of an
1773                  * exclusive create request.
1774                  *
1775                  * The assumption is made that a non-exclusive create
1776                  * request will never return EEXIST.
1777                  */
1778                 if (error != EEXIST || args->how.mode == GUARDED)
1779                         goto out;
1780                 /*
1781                  * Lookup the file so that we can get a vnode for it.
1782                  */
1783                 error = VOP_LOOKUP(dvp, name, &vp, NULL, 0,
1784                     NULL, cr, NULL, NULL, NULL);
1785                 if (error) {
1786                         /*
1787                          * We couldn't find the file that we thought that
1788                          * we just created.  So, we'll just try creating
1789                          * it again.
1790                          */
1791                         if (error == ENOENT)
1792                                 goto tryagain;
1793                         goto out;
1794                 }
1795 
1796                 /*
1797                  * If the file is delegated to a v4 client, go ahead
1798                  * and initiate recall, this create is a hint that a
1799                  * conflicting v3 open has occurred.
1800                  */
1801 
1802                 if (rfs4_check_delegated(FWRITE, vp, FALSE)) {
1803                         VN_RELE(vp);
1804                         resp->status = NFS3ERR_JUKEBOX;
1805                         goto out1;
1806                 }
1807 
1808                 va.va_mask = AT_ALL;
1809                 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
1810 
1811                 mtime = (nfstime3 *)&args->how.createhow3_u.verf;
1812                 /* % with INT32_MAX to prevent overflows */
1813                 if (args->how.mode == EXCLUSIVE && (vap == NULL ||
1814                     vap->va_mtime.tv_sec !=
1815                     (mtime->seconds % INT32_MAX) ||
1816                     vap->va_mtime.tv_nsec != mtime->nseconds)) {
1817                         VN_RELE(vp);
1818                         error = EEXIST;
1819                         goto out;
1820                 }
1821         } else {
1822 
1823                 if ((args->how.mode == UNCHECKED ||
1824                     args->how.mode == GUARDED) &&
1825                     args->how.createhow3_u.obj_attributes.size.set_it &&
1826                     va.va_size == 0)
1827                         trunc = TRUE;
1828                 else
1829                         trunc = FALSE;
1830 
1831                 if (rfs4_check_delegated(FWRITE, vp, trunc)) {
1832                         VN_RELE(vp);
1833                         resp->status = NFS3ERR_JUKEBOX;
1834                         goto out1;
1835                 }
1836 
1837                 va.va_mask = AT_ALL;
1838                 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
1839 
1840                 /*
1841                  * We need to check to make sure that the file got
1842                  * created to the indicated size.  If not, we do a
1843                  * setattr to try to change the size, but we don't
1844                  * try too hard.  This shouldn't a problem as most
1845                  * clients will only specifiy a size of zero which
1846                  * local file systems handle.  However, even if
1847                  * the client does specify a non-zero size, it can
1848                  * still recover by checking the size of the file
1849                  * after it has created it and then issue a setattr
1850                  * request of its own to set the size of the file.
1851                  */
1852                 if (vap != NULL &&
1853                     (args->how.mode == UNCHECKED ||
1854                     args->how.mode == GUARDED) &&
1855                     args->how.createhow3_u.obj_attributes.size.set_it &&
1856                     vap->va_size != reqsize) {
1857                         va.va_mask = AT_SIZE;
1858                         va.va_size = reqsize;
1859                         (void) VOP_SETATTR(vp, &va, 0, cr, NULL);
1860                         va.va_mask = AT_ALL;
1861                         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
1862                 }
1863         }
1864 
1865         if (name != args->where.name)
1866                 kmem_free(name, MAXPATHLEN + 1);
1867 
1868         error = makefh3(&resp->resok.obj.handle, vp, exi);
1869         if (error)
1870                 resp->resok.obj.handle_follows = FALSE;
1871         else
1872                 resp->resok.obj.handle_follows = TRUE;
1873 
1874         /*
1875          * Force modified data and metadata out to stable storage.
1876          */
1877         (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
1878         (void) VOP_FSYNC(dvp, 0, cr, NULL);
1879 
1880         VN_RELE(vp);
1881         if (tvp != NULL) {
1882                 if (in_crit)
1883                         nbl_end_crit(tvp);
1884                 VN_RELE(tvp);
1885         }
1886 
1887         resp->status = NFS3_OK;
1888         vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
1889         vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
1890 
1891         DTRACE_NFSV3_5(op__create__done, struct svc_req *, req,
1892             cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
1893             CREATE3res *, resp);
1894 
1895         VN_RELE(dvp);
1896         return;
1897 
1898 out:
1899         if (curthread->t_flag & T_WOULDBLOCK) {
1900                 curthread->t_flag &= ~T_WOULDBLOCK;
1901                 resp->status = NFS3ERR_JUKEBOX;
1902         } else
1903                 resp->status = puterrno3(error);
1904 out1:
1905         DTRACE_NFSV3_5(op__create__done, struct svc_req *, req,
1906             cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
1907             CREATE3res *, resp);
1908 
1909         if (name != NULL && name != args->where.name)
1910                 kmem_free(name, MAXPATHLEN + 1);
1911 
1912         if (tvp != NULL) {
1913                 if (in_crit)
1914                         nbl_end_crit(tvp);
1915                 VN_RELE(tvp);
1916         }
1917         if (dvp != NULL)
1918                 VN_RELE(dvp);
1919         vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc);
1920 }
1921 
1922 void *
1923 rfs3_create_getfh(CREATE3args *args)
1924 {
1925 
1926         return (&args->where.dir);
1927 }
1928 
1929 void
1930 rfs3_mkdir(MKDIR3args *args, MKDIR3res *resp, struct exportinfo *exi,
1931     struct svc_req *req, cred_t *cr, bool_t ro)
1932 {
1933         int error;
1934         vnode_t *vp = NULL;
1935         vnode_t *dvp;
1936         struct vattr *vap;
1937         struct vattr va;
1938         struct vattr *dbvap;
1939         struct vattr dbva;
1940         struct vattr *davap;
1941         struct vattr dava;
1942         struct sockaddr *ca;
1943         char *name = NULL;
1944 
1945         dbvap = NULL;
1946         davap = NULL;
1947 
1948         dvp = nfs3_fhtovp(&args->where.dir, exi);
1949 
1950         DTRACE_NFSV3_5(op__mkdir__start, struct svc_req *, req,
1951             cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
1952             MKDIR3args *, args);
1953 
1954         if (dvp == NULL) {
1955                 error = ESTALE;
1956                 goto out;
1957         }
1958 
1959         dbva.va_mask = AT_ALL;
1960         dbvap = VOP_GETATTR(dvp, &dbva, 0, cr, NULL) ? NULL : &dbva;
1961         davap = dbvap;
1962 
1963         if (args->where.name == nfs3nametoolong) {
1964                 resp->status = NFS3ERR_NAMETOOLONG;
1965                 goto out1;
1966         }
1967 
1968         if (args->where.name == NULL || *(args->where.name) == '\0') {
1969                 resp->status = NFS3ERR_ACCES;
1970                 goto out1;
1971         }
1972 
1973         if (rdonly(ro, dvp)) {
1974                 resp->status = NFS3ERR_ROFS;
1975                 goto out1;
1976         }
1977 
1978         if (is_system_labeled()) {
1979                 bslabel_t *clabel = req->rq_label;
1980 
1981                 ASSERT(clabel != NULL);
1982                 DTRACE_PROBE2(tx__rfs3__log__info__opmkdir__clabel, char *,
1983                     "got client label from request(1)", struct svc_req *, req);
1984 
1985                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
1986                         if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
1987                             exi)) {
1988                                 resp->status = NFS3ERR_ACCES;
1989                                 goto out1;
1990                         }
1991                 }
1992         }
1993 
1994         error = sattr3_to_vattr(&args->attributes, &va);
1995         if (error)
1996                 goto out;
1997 
1998         if (!(va.va_mask & AT_MODE)) {
1999                 resp->status = NFS3ERR_INVAL;
2000                 goto out1;
2001         }
2002 
2003         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2004         name = nfscmd_convname(ca, exi, args->where.name,
2005             NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2006 
2007         if (name == NULL) {
2008                 resp->status = NFS3ERR_INVAL;
2009                 goto out1;
2010         }
2011 
2012         va.va_mask |= AT_TYPE;
2013         va.va_type = VDIR;
2014 
2015         error = VOP_MKDIR(dvp, name, &va, &vp, cr, NULL, 0, NULL);
2016 
2017         if (name != args->where.name)
2018                 kmem_free(name, MAXPATHLEN + 1);
2019 
2020         dava.va_mask = AT_ALL;
2021         davap = VOP_GETATTR(dvp, &dava, 0, cr, NULL) ? NULL : &dava;
2022 
2023         /*
2024          * Force modified data and metadata out to stable storage.
2025          */
2026         (void) VOP_FSYNC(dvp, 0, cr, NULL);
2027 
2028         if (error)
2029                 goto out;
2030 
2031         error = makefh3(&resp->resok.obj.handle, vp, exi);
2032         if (error)
2033                 resp->resok.obj.handle_follows = FALSE;
2034         else
2035                 resp->resok.obj.handle_follows = TRUE;
2036 
2037         va.va_mask = AT_ALL;
2038         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
2039 
2040         /*
2041          * Force modified data and metadata out to stable storage.
2042          */
2043         (void) VOP_FSYNC(vp, 0, cr, NULL);
2044 
2045         VN_RELE(vp);
2046 
2047         resp->status = NFS3_OK;
2048         vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
2049         vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
2050 
2051         DTRACE_NFSV3_5(op__mkdir__done, struct svc_req *, req,
2052             cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
2053             MKDIR3res *, resp);
2054         VN_RELE(dvp);
2055 
2056         return;
2057 
2058 out:
2059         if (curthread->t_flag & T_WOULDBLOCK) {
2060                 curthread->t_flag &= ~T_WOULDBLOCK;
2061                 resp->status = NFS3ERR_JUKEBOX;
2062         } else
2063                 resp->status = puterrno3(error);
2064 out1:
2065         DTRACE_NFSV3_5(op__mkdir__done, struct svc_req *, req,
2066             cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
2067             MKDIR3res *, resp);
2068         if (dvp != NULL)
2069                 VN_RELE(dvp);
2070         vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc);
2071 }
2072 
2073 void *
2074 rfs3_mkdir_getfh(MKDIR3args *args)
2075 {
2076 
2077         return (&args->where.dir);
2078 }
2079 
2080 void
2081 rfs3_symlink(SYMLINK3args *args, SYMLINK3res *resp, struct exportinfo *exi,
2082     struct svc_req *req, cred_t *cr, bool_t ro)
2083 {
2084         int error;
2085         vnode_t *vp;
2086         vnode_t *dvp;
2087         struct vattr *vap;
2088         struct vattr va;
2089         struct vattr *dbvap;
2090         struct vattr dbva;
2091         struct vattr *davap;
2092         struct vattr dava;
2093         struct sockaddr *ca;
2094         char *name = NULL;
2095         char *symdata = NULL;
2096 
2097         dbvap = NULL;
2098         davap = NULL;
2099 
2100         dvp = nfs3_fhtovp(&args->where.dir, exi);
2101 
2102         DTRACE_NFSV3_5(op__symlink__start, struct svc_req *, req,
2103             cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
2104             SYMLINK3args *, args);
2105 
2106         if (dvp == NULL) {
2107                 error = ESTALE;
2108                 goto err;
2109         }
2110 
2111         dbva.va_mask = AT_ALL;
2112         dbvap = VOP_GETATTR(dvp, &dbva, 0, cr, NULL) ? NULL : &dbva;
2113         davap = dbvap;
2114 
2115         if (args->where.name == nfs3nametoolong) {
2116                 resp->status = NFS3ERR_NAMETOOLONG;
2117                 goto err1;
2118         }
2119 
2120         if (args->where.name == NULL || *(args->where.name) == '\0') {
2121                 resp->status = NFS3ERR_ACCES;
2122                 goto err1;
2123         }
2124 
2125         if (rdonly(ro, dvp)) {
2126                 resp->status = NFS3ERR_ROFS;
2127                 goto err1;
2128         }
2129 
2130         if (is_system_labeled()) {
2131                 bslabel_t *clabel = req->rq_label;
2132 
2133                 ASSERT(clabel != NULL);
2134                 DTRACE_PROBE2(tx__rfs3__log__info__opsymlink__clabel, char *,
2135                     "got client label from request(1)", struct svc_req *, req);
2136 
2137                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2138                         if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
2139                             exi)) {
2140                                 resp->status = NFS3ERR_ACCES;
2141                                 goto err1;
2142                         }
2143                 }
2144         }
2145 
2146         error = sattr3_to_vattr(&args->symlink.symlink_attributes, &va);
2147         if (error)
2148                 goto err;
2149 
2150         if (!(va.va_mask & AT_MODE)) {
2151                 resp->status = NFS3ERR_INVAL;
2152                 goto err1;
2153         }
2154 
2155         if (args->symlink.symlink_data == nfs3nametoolong) {
2156                 resp->status = NFS3ERR_NAMETOOLONG;
2157                 goto err1;
2158         }
2159 
2160         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2161         name = nfscmd_convname(ca, exi, args->where.name,
2162             NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2163 
2164         if (name == NULL) {
2165                 /* This is really a Solaris EILSEQ */
2166                 resp->status = NFS3ERR_INVAL;
2167                 goto err1;
2168         }
2169 
2170         symdata = nfscmd_convname(ca, exi, args->symlink.symlink_data,
2171             NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2172         if (symdata == NULL) {
2173                 /* This is really a Solaris EILSEQ */
2174                 resp->status = NFS3ERR_INVAL;
2175                 goto err1;
2176         }
2177 
2178 
2179         va.va_mask |= AT_TYPE;
2180         va.va_type = VLNK;
2181 
2182         error = VOP_SYMLINK(dvp, name, &va, symdata, cr, NULL, 0);
2183 
2184         dava.va_mask = AT_ALL;
2185         davap = VOP_GETATTR(dvp, &dava, 0, cr, NULL) ? NULL : &dava;
2186 
2187         if (error)
2188                 goto err;
2189 
2190         error = VOP_LOOKUP(dvp, name, &vp, NULL, 0, NULL, cr,
2191             NULL, NULL, NULL);
2192 
2193         /*
2194          * Force modified data and metadata out to stable storage.
2195          */
2196         (void) VOP_FSYNC(dvp, 0, cr, NULL);
2197 
2198 
2199         resp->status = NFS3_OK;
2200         if (error) {
2201                 resp->resok.obj.handle_follows = FALSE;
2202                 vattr_to_post_op_attr(NULL, &resp->resok.obj_attributes);
2203                 vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
2204                 goto out;
2205         }
2206 
2207         error = makefh3(&resp->resok.obj.handle, vp, exi);
2208         if (error)
2209                 resp->resok.obj.handle_follows = FALSE;
2210         else
2211                 resp->resok.obj.handle_follows = TRUE;
2212 
2213         va.va_mask = AT_ALL;
2214         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
2215 
2216         /*
2217          * Force modified data and metadata out to stable storage.
2218          */
2219         (void) VOP_FSYNC(vp, 0, cr, NULL);
2220 
2221         VN_RELE(vp);
2222 
2223         vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
2224         vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
2225         goto out;
2226 
2227 err:
2228         if (curthread->t_flag & T_WOULDBLOCK) {
2229                 curthread->t_flag &= ~T_WOULDBLOCK;
2230                 resp->status = NFS3ERR_JUKEBOX;
2231         } else
2232                 resp->status = puterrno3(error);
2233 err1:
2234         vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc);
2235 out:
2236         if (name != NULL && name != args->where.name)
2237                 kmem_free(name, MAXPATHLEN + 1);
2238         if (symdata != NULL && symdata != args->symlink.symlink_data)
2239                 kmem_free(symdata, MAXPATHLEN + 1);
2240 
2241         DTRACE_NFSV3_5(op__symlink__done, struct svc_req *, req,
2242             cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
2243             SYMLINK3res *, resp);
2244 
2245         if (dvp != NULL)
2246                 VN_RELE(dvp);
2247 }
2248 
2249 void *
2250 rfs3_symlink_getfh(SYMLINK3args *args)
2251 {
2252 
2253         return (&args->where.dir);
2254 }
2255 
2256 void
2257 rfs3_mknod(MKNOD3args *args, MKNOD3res *resp, struct exportinfo *exi,
2258     struct svc_req *req, cred_t *cr, bool_t ro)
2259 {
2260         int error;
2261         vnode_t *vp;
2262         vnode_t *realvp;
2263         vnode_t *dvp;
2264         struct vattr *vap;
2265         struct vattr va;
2266         struct vattr *dbvap;
2267         struct vattr dbva;
2268         struct vattr *davap;
2269         struct vattr dava;
2270         int mode;
2271         enum vcexcl excl;
2272         struct sockaddr *ca;
2273         char *name = NULL;
2274 
2275         dbvap = NULL;
2276         davap = NULL;
2277 
2278         dvp = nfs3_fhtovp(&args->where.dir, exi);
2279 
2280         DTRACE_NFSV3_5(op__mknod__start, struct svc_req *, req,
2281             cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
2282             MKNOD3args *, args);
2283 
2284         if (dvp == NULL) {
2285                 error = ESTALE;
2286                 goto out;
2287         }
2288 
2289         dbva.va_mask = AT_ALL;
2290         dbvap = VOP_GETATTR(dvp, &dbva, 0, cr, NULL) ? NULL : &dbva;
2291         davap = dbvap;
2292 
2293         if (args->where.name == nfs3nametoolong) {
2294                 resp->status = NFS3ERR_NAMETOOLONG;
2295                 goto out1;
2296         }
2297 
2298         if (args->where.name == NULL || *(args->where.name) == '\0') {
2299                 resp->status = NFS3ERR_ACCES;
2300                 goto out1;
2301         }
2302 
2303         if (rdonly(ro, dvp)) {
2304                 resp->status = NFS3ERR_ROFS;
2305                 goto out1;
2306         }
2307 
2308         if (is_system_labeled()) {
2309                 bslabel_t *clabel = req->rq_label;
2310 
2311                 ASSERT(clabel != NULL);
2312                 DTRACE_PROBE2(tx__rfs3__log__info__opmknod__clabel, char *,
2313                     "got client label from request(1)", struct svc_req *, req);
2314 
2315                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2316                         if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
2317                             exi)) {
2318                                 resp->status = NFS3ERR_ACCES;
2319                                 goto out1;
2320                         }
2321                 }
2322         }
2323 
2324         switch (args->what.type) {
2325         case NF3CHR:
2326         case NF3BLK:
2327                 error = sattr3_to_vattr(
2328                     &args->what.mknoddata3_u.device.dev_attributes, &va);
2329                 if (error)
2330                         goto out;
2331                 if (secpolicy_sys_devices(cr) != 0) {
2332                         resp->status = NFS3ERR_PERM;
2333                         goto out1;
2334                 }
2335                 if (args->what.type == NF3CHR)
2336                         va.va_type = VCHR;
2337                 else
2338                         va.va_type = VBLK;
2339                 va.va_rdev = makedevice(
2340                     args->what.mknoddata3_u.device.spec.specdata1,
2341                     args->what.mknoddata3_u.device.spec.specdata2);
2342                 va.va_mask |= AT_TYPE | AT_RDEV;
2343                 break;
2344         case NF3SOCK:
2345                 error = sattr3_to_vattr(
2346                     &args->what.mknoddata3_u.pipe_attributes, &va);
2347                 if (error)
2348                         goto out;
2349                 va.va_type = VSOCK;
2350                 va.va_mask |= AT_TYPE;
2351                 break;
2352         case NF3FIFO:
2353                 error = sattr3_to_vattr(
2354                     &args->what.mknoddata3_u.pipe_attributes, &va);
2355                 if (error)
2356                         goto out;
2357                 va.va_type = VFIFO;
2358                 va.va_mask |= AT_TYPE;
2359                 break;
2360         default:
2361                 resp->status = NFS3ERR_BADTYPE;
2362                 goto out1;
2363         }
2364 
2365         /*
2366          * Must specify the mode.
2367          */
2368         if (!(va.va_mask & AT_MODE)) {
2369                 resp->status = NFS3ERR_INVAL;
2370                 goto out1;
2371         }
2372 
2373         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2374         name = nfscmd_convname(ca, exi, args->where.name,
2375             NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2376 
2377         if (name == NULL) {
2378                 resp->status = NFS3ERR_INVAL;
2379                 goto out1;
2380         }
2381 
2382         excl = EXCL;
2383 
2384         mode = 0;
2385 
2386         error = VOP_CREATE(dvp, name, &va, excl, mode,
2387             &vp, cr, 0, NULL, NULL);
2388 
2389         if (name != args->where.name)
2390                 kmem_free(name, MAXPATHLEN + 1);
2391 
2392         dava.va_mask = AT_ALL;
2393         davap = VOP_GETATTR(dvp, &dava, 0, cr, NULL) ? NULL : &dava;
2394 
2395         /*
2396          * Force modified data and metadata out to stable storage.
2397          */
2398         (void) VOP_FSYNC(dvp, 0, cr, NULL);
2399 
2400         if (error)
2401                 goto out;
2402 
2403         resp->status = NFS3_OK;
2404 
2405         error = makefh3(&resp->resok.obj.handle, vp, exi);
2406         if (error)
2407                 resp->resok.obj.handle_follows = FALSE;
2408         else
2409                 resp->resok.obj.handle_follows = TRUE;
2410 
2411         va.va_mask = AT_ALL;
2412         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
2413 
2414         /*
2415          * Force modified metadata out to stable storage.
2416          *
2417          * if a underlying vp exists, pass it to VOP_FSYNC
2418          */
2419         if (VOP_REALVP(vp, &realvp, NULL) == 0)
2420                 (void) VOP_FSYNC(realvp, FNODSYNC, cr, NULL);
2421         else
2422                 (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
2423 
2424         VN_RELE(vp);
2425 
2426         vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
2427         vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
2428         DTRACE_NFSV3_5(op__mknod__done, struct svc_req *, req,
2429             cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
2430             MKNOD3res *, resp);
2431         VN_RELE(dvp);
2432         return;
2433 
2434 out:
2435         if (curthread->t_flag & T_WOULDBLOCK) {
2436                 curthread->t_flag &= ~T_WOULDBLOCK;
2437                 resp->status = NFS3ERR_JUKEBOX;
2438         } else
2439                 resp->status = puterrno3(error);
2440 out1:
2441         DTRACE_NFSV3_5(op__mknod__done, struct svc_req *, req,
2442             cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
2443             MKNOD3res *, resp);
2444         if (dvp != NULL)
2445                 VN_RELE(dvp);
2446         vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc);
2447 }
2448 
2449 void *
2450 rfs3_mknod_getfh(MKNOD3args *args)
2451 {
2452 
2453         return (&args->where.dir);
2454 }
2455 
2456 void
2457 rfs3_remove(REMOVE3args *args, REMOVE3res *resp, struct exportinfo *exi,
2458     struct svc_req *req, cred_t *cr, bool_t ro)
2459 {
2460         int error = 0;
2461         vnode_t *vp;
2462         struct vattr *bvap;
2463         struct vattr bva;
2464         struct vattr *avap;
2465         struct vattr ava;
2466         vnode_t *targvp = NULL;
2467         struct sockaddr *ca;
2468         char *name = NULL;
2469 
2470         bvap = NULL;
2471         avap = NULL;
2472 
2473         vp = nfs3_fhtovp(&args->object.dir, exi);
2474 
2475         DTRACE_NFSV3_5(op__remove__start, struct svc_req *, req,
2476             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
2477             REMOVE3args *, args);
2478 
2479         if (vp == NULL) {
2480                 error = ESTALE;
2481                 goto err;
2482         }
2483 
2484         bva.va_mask = AT_ALL;
2485         bvap = VOP_GETATTR(vp, &bva, 0, cr, NULL) ? NULL : &bva;
2486         avap = bvap;
2487 
2488         if (vp->v_type != VDIR) {
2489                 resp->status = NFS3ERR_NOTDIR;
2490                 goto err1;
2491         }
2492 
2493         if (args->object.name == nfs3nametoolong) {
2494                 resp->status = NFS3ERR_NAMETOOLONG;
2495                 goto err1;
2496         }
2497 
2498         if (args->object.name == NULL || *(args->object.name) == '\0') {
2499                 resp->status = NFS3ERR_ACCES;
2500                 goto err1;
2501         }
2502 
2503         if (rdonly(ro, vp)) {
2504                 resp->status = NFS3ERR_ROFS;
2505                 goto err1;
2506         }
2507 
2508         if (is_system_labeled()) {
2509                 bslabel_t *clabel = req->rq_label;
2510 
2511                 ASSERT(clabel != NULL);
2512                 DTRACE_PROBE2(tx__rfs3__log__info__opremove__clabel, char *,
2513                     "got client label from request(1)", struct svc_req *, req);
2514 
2515                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2516                         if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
2517                             exi)) {
2518                                 resp->status = NFS3ERR_ACCES;
2519                                 goto err1;
2520                         }
2521                 }
2522         }
2523 
2524         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2525         name = nfscmd_convname(ca, exi, args->object.name,
2526             NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2527 
2528         if (name == NULL) {
2529                 resp->status = NFS3ERR_INVAL;
2530                 goto err1;
2531         }
2532 
2533         /*
2534          * Check for a conflict with a non-blocking mandatory share
2535          * reservation and V4 delegations
2536          */
2537         error = VOP_LOOKUP(vp, name, &targvp, NULL, 0,
2538             NULL, cr, NULL, NULL, NULL);
2539         if (error != 0)
2540                 goto err;
2541 
2542         if (rfs4_check_delegated(FWRITE, targvp, TRUE)) {
2543                 resp->status = NFS3ERR_JUKEBOX;
2544                 goto err1;
2545         }
2546 
2547         if (!nbl_need_check(targvp)) {
2548                 error = VOP_REMOVE(vp, name, cr, NULL, 0);
2549         } else {
2550                 nbl_start_crit(targvp, RW_READER);
2551                 if (nbl_conflict(targvp, NBL_REMOVE, 0, 0, 0, NULL)) {
2552                         error = EACCES;
2553                 } else {
2554                         error = VOP_REMOVE(vp, name, cr, NULL, 0);
2555                 }
2556                 nbl_end_crit(targvp);
2557         }
2558         VN_RELE(targvp);
2559         targvp = NULL;
2560 
2561         ava.va_mask = AT_ALL;
2562         avap = VOP_GETATTR(vp, &ava, 0, cr, NULL) ? NULL : &ava;
2563 
2564         /*
2565          * Force modified data and metadata out to stable storage.
2566          */
2567         (void) VOP_FSYNC(vp, 0, cr, NULL);
2568 
2569         if (error)
2570                 goto err;
2571 
2572         resp->status = NFS3_OK;
2573         vattr_to_wcc_data(bvap, avap, &resp->resok.dir_wcc);
2574         goto out;
2575 
2576 err:
2577         if (curthread->t_flag & T_WOULDBLOCK) {
2578                 curthread->t_flag &= ~T_WOULDBLOCK;
2579                 resp->status = NFS3ERR_JUKEBOX;
2580         } else
2581                 resp->status = puterrno3(error);
2582 err1:
2583         vattr_to_wcc_data(bvap, avap, &resp->resfail.dir_wcc);
2584 out:
2585         DTRACE_NFSV3_5(op__remove__done, struct svc_req *, req,
2586             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
2587             REMOVE3res *, resp);
2588 
2589         if (name != NULL && name != args->object.name)
2590                 kmem_free(name, MAXPATHLEN + 1);
2591 
2592         if (vp != NULL)
2593                 VN_RELE(vp);
2594 }
2595 
2596 void *
2597 rfs3_remove_getfh(REMOVE3args *args)
2598 {
2599 
2600         return (&args->object.dir);
2601 }
2602 
2603 void
2604 rfs3_rmdir(RMDIR3args *args, RMDIR3res *resp, struct exportinfo *exi,
2605     struct svc_req *req, cred_t *cr, bool_t ro)
2606 {
2607         int error;
2608         vnode_t *vp;
2609         struct vattr *bvap;
2610         struct vattr bva;
2611         struct vattr *avap;
2612         struct vattr ava;
2613         struct sockaddr *ca;
2614         char *name = NULL;
2615 
2616         bvap = NULL;
2617         avap = NULL;
2618 
2619         vp = nfs3_fhtovp(&args->object.dir, exi);
2620 
2621         DTRACE_NFSV3_5(op__rmdir__start, struct svc_req *, req,
2622             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
2623             RMDIR3args *, args);
2624 
2625         if (vp == NULL) {
2626                 error = ESTALE;
2627                 goto err;
2628         }
2629 
2630         bva.va_mask = AT_ALL;
2631         bvap = VOP_GETATTR(vp, &bva, 0, cr, NULL) ? NULL : &bva;
2632         avap = bvap;
2633 
2634         if (vp->v_type != VDIR) {
2635                 resp->status = NFS3ERR_NOTDIR;
2636                 goto err1;
2637         }
2638 
2639         if (args->object.name == nfs3nametoolong) {
2640                 resp->status = NFS3ERR_NAMETOOLONG;
2641                 goto err1;
2642         }
2643 
2644         if (args->object.name == NULL || *(args->object.name) == '\0') {
2645                 resp->status = NFS3ERR_ACCES;
2646                 goto err1;
2647         }
2648 
2649         if (rdonly(ro, vp)) {
2650                 resp->status = NFS3ERR_ROFS;
2651                 goto err1;
2652         }
2653 
2654         if (is_system_labeled()) {
2655                 bslabel_t *clabel = req->rq_label;
2656 
2657                 ASSERT(clabel != NULL);
2658                 DTRACE_PROBE2(tx__rfs3__log__info__opremovedir__clabel, char *,
2659                     "got client label from request(1)", struct svc_req *, req);
2660 
2661                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2662                         if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
2663                             exi)) {
2664                                 resp->status = NFS3ERR_ACCES;
2665                                 goto err1;
2666                         }
2667                 }
2668         }
2669 
2670         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2671         name = nfscmd_convname(ca, exi, args->object.name,
2672             NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2673 
2674         if (name == NULL) {
2675                 resp->status = NFS3ERR_INVAL;
2676                 goto err1;
2677         }
2678 
2679         ASSERT3U(exi->exi_zoneid, ==, curzone->zone_id);
2680         error = VOP_RMDIR(vp, name, ZONE_ROOTVP(), cr, NULL, 0);
2681 
2682         if (name != args->object.name)
2683                 kmem_free(name, MAXPATHLEN + 1);
2684 
2685         ava.va_mask = AT_ALL;
2686         avap = VOP_GETATTR(vp, &ava, 0, cr, NULL) ? NULL : &ava;
2687 
2688         /*
2689          * Force modified data and metadata out to stable storage.
2690          */
2691         (void) VOP_FSYNC(vp, 0, cr, NULL);
2692 
2693         if (error) {
2694                 /*
2695                  * System V defines rmdir to return EEXIST, not ENOTEMPTY,
2696                  * if the directory is not empty.  A System V NFS server
2697                  * needs to map NFS3ERR_EXIST to NFS3ERR_NOTEMPTY to transmit
2698                  * over the wire.
2699                  */
2700                 if (error == EEXIST)
2701                         error = ENOTEMPTY;
2702                 goto err;
2703         }
2704 
2705         resp->status = NFS3_OK;
2706         vattr_to_wcc_data(bvap, avap, &resp->resok.dir_wcc);
2707         goto out;
2708 
2709 err:
2710         if (curthread->t_flag & T_WOULDBLOCK) {
2711                 curthread->t_flag &= ~T_WOULDBLOCK;
2712                 resp->status = NFS3ERR_JUKEBOX;
2713         } else
2714                 resp->status = puterrno3(error);
2715 err1:
2716         vattr_to_wcc_data(bvap, avap, &resp->resfail.dir_wcc);
2717 out:
2718         DTRACE_NFSV3_5(op__rmdir__done, struct svc_req *, req,
2719             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
2720             RMDIR3res *, resp);
2721         if (vp != NULL)
2722                 VN_RELE(vp);
2723 
2724 }
2725 
2726 void *
2727 rfs3_rmdir_getfh(RMDIR3args *args)
2728 {
2729 
2730         return (&args->object.dir);
2731 }
2732 
2733 void
2734 rfs3_rename(RENAME3args *args, RENAME3res *resp, struct exportinfo *exi,
2735     struct svc_req *req, cred_t *cr, bool_t ro)
2736 {
2737         int error = 0;
2738         vnode_t *fvp;
2739         vnode_t *tvp;
2740         vnode_t *targvp;
2741         struct vattr *fbvap;
2742         struct vattr fbva;
2743         struct vattr *favap;
2744         struct vattr fava;
2745         struct vattr *tbvap;
2746         struct vattr tbva;
2747         struct vattr *tavap;
2748         struct vattr tava;
2749         nfs_fh3 *fh3;
2750         struct exportinfo *to_exi;
2751         vnode_t *srcvp = NULL;
2752         bslabel_t *clabel;
2753         struct sockaddr *ca;
2754         char *name = NULL;
2755         char *toname = NULL;
2756 
2757         fbvap = NULL;
2758         favap = NULL;
2759         tbvap = NULL;
2760         tavap = NULL;
2761         tvp = NULL;
2762 
2763         fvp = nfs3_fhtovp(&args->from.dir, exi);
2764 
2765         DTRACE_NFSV3_5(op__rename__start, struct svc_req *, req,
2766             cred_t *, cr, vnode_t *, fvp, struct exportinfo *, exi,
2767             RENAME3args *, args);
2768 
2769         if (fvp == NULL) {
2770                 error = ESTALE;
2771                 goto err;
2772         }
2773 
2774         if (is_system_labeled()) {
2775                 clabel = req->rq_label;
2776                 ASSERT(clabel != NULL);
2777                 DTRACE_PROBE2(tx__rfs3__log__info__oprename__clabel, char *,
2778                     "got client label from request(1)", struct svc_req *, req);
2779 
2780                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2781                         if (!do_rfs_label_check(clabel, fvp, EQUALITY_CHECK,
2782                             exi)) {
2783                                 resp->status = NFS3ERR_ACCES;
2784                                 goto err1;
2785                         }
2786                 }
2787         }
2788 
2789         fbva.va_mask = AT_ALL;
2790         fbvap = VOP_GETATTR(fvp, &fbva, 0, cr, NULL) ? NULL : &fbva;
2791         favap = fbvap;
2792 
2793         fh3 = &args->to.dir;
2794         to_exi = checkexport(&fh3->fh3_fsid, FH3TOXFIDP(fh3));
2795         if (to_exi == NULL) {
2796                 resp->status = NFS3ERR_ACCES;
2797                 goto err1;
2798         }
2799         exi_rele(to_exi);
2800 
2801         if (to_exi != exi) {
2802                 resp->status = NFS3ERR_XDEV;
2803                 goto err1;
2804         }
2805 
2806         tvp = nfs3_fhtovp(&args->to.dir, exi);
2807         if (tvp == NULL) {
2808                 error = ESTALE;
2809                 goto err;
2810         }
2811 
2812         tbva.va_mask = AT_ALL;
2813         tbvap = VOP_GETATTR(tvp, &tbva, 0, cr, NULL) ? NULL : &tbva;
2814         tavap = tbvap;
2815 
2816         if (fvp->v_type != VDIR || tvp->v_type != VDIR) {
2817                 resp->status = NFS3ERR_NOTDIR;
2818                 goto err1;
2819         }
2820 
2821         if (args->from.name == nfs3nametoolong ||
2822             args->to.name == nfs3nametoolong) {
2823                 resp->status = NFS3ERR_NAMETOOLONG;
2824                 goto err1;
2825         }
2826         if (args->from.name == NULL || *(args->from.name) == '\0' ||
2827             args->to.name == NULL || *(args->to.name) == '\0') {
2828                 resp->status = NFS3ERR_ACCES;
2829                 goto err1;
2830         }
2831 
2832         if (rdonly(ro, tvp)) {
2833                 resp->status = NFS3ERR_ROFS;
2834                 goto err1;
2835         }
2836 
2837         if (is_system_labeled()) {
2838                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2839                         if (!do_rfs_label_check(clabel, tvp, EQUALITY_CHECK,
2840                             exi)) {
2841                                 resp->status = NFS3ERR_ACCES;
2842                                 goto err1;
2843                         }
2844                 }
2845         }
2846 
2847         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2848         name = nfscmd_convname(ca, exi, args->from.name,
2849             NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2850 
2851         if (name == NULL) {
2852                 resp->status = NFS3ERR_INVAL;
2853                 goto err1;
2854         }
2855 
2856         toname = nfscmd_convname(ca, exi, args->to.name,
2857             NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2858 
2859         if (toname == NULL) {
2860                 resp->status = NFS3ERR_INVAL;
2861                 goto err1;
2862         }
2863 
2864         /*
2865          * Check for a conflict with a non-blocking mandatory share
2866          * reservation or V4 delegations.
2867          */
2868         error = VOP_LOOKUP(fvp, name, &srcvp, NULL, 0,
2869             NULL, cr, NULL, NULL, NULL);
2870         if (error != 0)
2871                 goto err;
2872 
2873         /*
2874          * If we rename a delegated file we should recall the
2875          * delegation, since future opens should fail or would
2876          * refer to a new file.
2877          */
2878         if (rfs4_check_delegated(FWRITE, srcvp, FALSE)) {
2879                 resp->status = NFS3ERR_JUKEBOX;
2880                 goto err1;
2881         }
2882 
2883         /*
2884          * Check for renaming over a delegated file.  Check nfs4_deleg_policy
2885          * first to avoid VOP_LOOKUP if possible.
2886          */
2887         if (nfs4_get_deleg_policy() != SRV_NEVER_DELEGATE &&
2888             VOP_LOOKUP(tvp, toname, &targvp, NULL, 0, NULL, cr,
2889             NULL, NULL, NULL) == 0) {
2890 
2891                 if (rfs4_check_delegated(FWRITE, targvp, TRUE)) {
2892                         VN_RELE(targvp);
2893                         resp->status = NFS3ERR_JUKEBOX;
2894                         goto err1;
2895                 }
2896                 VN_RELE(targvp);
2897         }
2898 
2899         if (!nbl_need_check(srcvp)) {
2900                 error = VOP_RENAME(fvp, name, tvp, toname, cr, NULL, 0);
2901         } else {
2902                 nbl_start_crit(srcvp, RW_READER);
2903                 if (nbl_conflict(srcvp, NBL_RENAME, 0, 0, 0, NULL))
2904                         error = EACCES;
2905                 else
2906                         error = VOP_RENAME(fvp, name, tvp, toname, cr, NULL, 0);
2907                 nbl_end_crit(srcvp);
2908         }
2909         if (error == 0)
2910                 vn_renamepath(tvp, srcvp, args->to.name,
2911                     strlen(args->to.name));
2912         VN_RELE(srcvp);
2913         srcvp = NULL;
2914 
2915         fava.va_mask = AT_ALL;
2916         favap = VOP_GETATTR(fvp, &fava, 0, cr, NULL) ? NULL : &fava;
2917         tava.va_mask = AT_ALL;
2918         tavap = VOP_GETATTR(tvp, &tava, 0, cr, NULL) ? NULL : &tava;
2919 
2920         /*
2921          * Force modified data and metadata out to stable storage.
2922          */
2923         (void) VOP_FSYNC(fvp, 0, cr, NULL);
2924         (void) VOP_FSYNC(tvp, 0, cr, NULL);
2925 
2926         if (error)
2927                 goto err;
2928 
2929         resp->status = NFS3_OK;
2930         vattr_to_wcc_data(fbvap, favap, &resp->resok.fromdir_wcc);
2931         vattr_to_wcc_data(tbvap, tavap, &resp->resok.todir_wcc);
2932         goto out;
2933 
2934 err:
2935         if (curthread->t_flag & T_WOULDBLOCK) {
2936                 curthread->t_flag &= ~T_WOULDBLOCK;
2937                 resp->status = NFS3ERR_JUKEBOX;
2938         } else {
2939                 resp->status = puterrno3(error);
2940         }
2941 err1:
2942         vattr_to_wcc_data(fbvap, favap, &resp->resfail.fromdir_wcc);
2943         vattr_to_wcc_data(tbvap, tavap, &resp->resfail.todir_wcc);
2944 
2945 out:
2946         if (name != NULL && name != args->from.name)
2947                 kmem_free(name, MAXPATHLEN + 1);
2948         if (toname != NULL && toname != args->to.name)
2949                 kmem_free(toname, MAXPATHLEN + 1);
2950 
2951         DTRACE_NFSV3_5(op__rename__done, struct svc_req *, req,
2952             cred_t *, cr, vnode_t *, fvp, struct exportinfo *, exi,
2953             RENAME3res *, resp);
2954         if (fvp != NULL)
2955                 VN_RELE(fvp);
2956         if (tvp != NULL)
2957                 VN_RELE(tvp);
2958 }
2959 
2960 void *
2961 rfs3_rename_getfh(RENAME3args *args)
2962 {
2963 
2964         return (&args->from.dir);
2965 }
2966 
2967 void
2968 rfs3_link(LINK3args *args, LINK3res *resp, struct exportinfo *exi,
2969     struct svc_req *req, cred_t *cr, bool_t ro)
2970 {
2971         int error;
2972         vnode_t *vp;
2973         vnode_t *dvp;
2974         struct vattr *vap;
2975         struct vattr va;
2976         struct vattr *bvap;
2977         struct vattr bva;
2978         struct vattr *avap;
2979         struct vattr ava;
2980         nfs_fh3 *fh3;
2981         struct exportinfo *to_exi;
2982         bslabel_t *clabel;
2983         struct sockaddr *ca;
2984         char *name = NULL;
2985 
2986         vap = NULL;
2987         bvap = NULL;
2988         avap = NULL;
2989         dvp = NULL;
2990 
2991         vp = nfs3_fhtovp(&args->file, exi);
2992 
2993         DTRACE_NFSV3_5(op__link__start, struct svc_req *, req,
2994             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
2995             LINK3args *, args);
2996 
2997         if (vp == NULL) {
2998                 error = ESTALE;
2999                 goto out;
3000         }
3001 
3002         va.va_mask = AT_ALL;
3003         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3004 
3005         fh3 = &args->link.dir;
3006         to_exi = checkexport(&fh3->fh3_fsid, FH3TOXFIDP(fh3));
3007         if (to_exi == NULL) {
3008                 resp->status = NFS3ERR_ACCES;
3009                 goto out1;
3010         }
3011         exi_rele(to_exi);
3012 
3013         if (to_exi != exi) {
3014                 resp->status = NFS3ERR_XDEV;
3015                 goto out1;
3016         }
3017 
3018         if (is_system_labeled()) {
3019                 clabel = req->rq_label;
3020 
3021                 ASSERT(clabel != NULL);
3022                 DTRACE_PROBE2(tx__rfs3__log__info__oplink__clabel, char *,
3023                     "got client label from request(1)", struct svc_req *, req);
3024 
3025                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3026                         if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3027                             exi)) {
3028                                 resp->status = NFS3ERR_ACCES;
3029                                 goto out1;
3030                         }
3031                 }
3032         }
3033 
3034         dvp = nfs3_fhtovp(&args->link.dir, exi);
3035         if (dvp == NULL) {
3036                 error = ESTALE;
3037                 goto out;
3038         }
3039 
3040         bva.va_mask = AT_ALL;
3041         bvap = VOP_GETATTR(dvp, &bva, 0, cr, NULL) ? NULL : &bva;
3042 
3043         if (dvp->v_type != VDIR) {
3044                 resp->status = NFS3ERR_NOTDIR;
3045                 goto out1;
3046         }
3047 
3048         if (args->link.name == nfs3nametoolong) {
3049                 resp->status = NFS3ERR_NAMETOOLONG;
3050                 goto out1;
3051         }
3052 
3053         if (args->link.name == NULL || *(args->link.name) == '\0') {
3054                 resp->status = NFS3ERR_ACCES;
3055                 goto out1;
3056         }
3057 
3058         if (rdonly(ro, dvp)) {
3059                 resp->status = NFS3ERR_ROFS;
3060                 goto out1;
3061         }
3062 
3063         if (is_system_labeled()) {
3064                 DTRACE_PROBE2(tx__rfs3__log__info__oplinkdir__clabel, char *,
3065                     "got client label from request(1)", struct svc_req *, req);
3066 
3067                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3068                         if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
3069                             exi)) {
3070                                 resp->status = NFS3ERR_ACCES;
3071                                 goto out1;
3072                         }
3073                 }
3074         }
3075 
3076         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
3077         name = nfscmd_convname(ca, exi, args->link.name,
3078             NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
3079 
3080         if (name == NULL) {
3081                 resp->status = NFS3ERR_SERVERFAULT;
3082                 goto out1;
3083         }
3084 
3085         error = VOP_LINK(dvp, vp, name, cr, NULL, 0);
3086 
3087         va.va_mask = AT_ALL;
3088         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3089         ava.va_mask = AT_ALL;
3090         avap = VOP_GETATTR(dvp, &ava, 0, cr, NULL) ? NULL : &ava;
3091 
3092         /*
3093          * Force modified data and metadata out to stable storage.
3094          */
3095         (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
3096         (void) VOP_FSYNC(dvp, 0, cr, NULL);
3097 
3098         if (error)
3099                 goto out;
3100 
3101         VN_RELE(dvp);
3102 
3103         resp->status = NFS3_OK;
3104         vattr_to_post_op_attr(vap, &resp->resok.file_attributes);
3105         vattr_to_wcc_data(bvap, avap, &resp->resok.linkdir_wcc);
3106 
3107         DTRACE_NFSV3_5(op__link__done, struct svc_req *, req,
3108             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3109             LINK3res *, resp);
3110 
3111         VN_RELE(vp);
3112 
3113         return;
3114 
3115 out:
3116         if (curthread->t_flag & T_WOULDBLOCK) {
3117                 curthread->t_flag &= ~T_WOULDBLOCK;
3118                 resp->status = NFS3ERR_JUKEBOX;
3119         } else
3120                 resp->status = puterrno3(error);
3121 out1:
3122         if (name != NULL && name != args->link.name)
3123                 kmem_free(name, MAXPATHLEN + 1);
3124 
3125         DTRACE_NFSV3_5(op__link__done, struct svc_req *, req,
3126             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3127             LINK3res *, resp);
3128 
3129         if (vp != NULL)
3130                 VN_RELE(vp);
3131         if (dvp != NULL)
3132                 VN_RELE(dvp);
3133         vattr_to_post_op_attr(vap, &resp->resfail.file_attributes);
3134         vattr_to_wcc_data(bvap, avap, &resp->resfail.linkdir_wcc);
3135 }
3136 
3137 void *
3138 rfs3_link_getfh(LINK3args *args)
3139 {
3140 
3141         return (&args->file);
3142 }
3143 
3144 /*
3145  * This macro defines the size of a response which contains attribute
3146  * information and one directory entry (whose length is specified by
3147  * the macro parameter).  If the incoming request is larger than this,
3148  * then we are guaranteed to be able to return at one directory entry
3149  * if one exists.  Therefore, we do not need to check for
3150  * NFS3ERR_TOOSMALL if the requested size is larger then this.  If it
3151  * is not, then we need to check to make sure that this error does not
3152  * need to be returned.
3153  *
3154  * NFS3_READDIR_MIN_COUNT is comprised of following :
3155  *
3156  * status - 1 * BYTES_PER_XDR_UNIT
3157  * attr. flag - 1 * BYTES_PER_XDR_UNIT
3158  * cookie verifier - 2 * BYTES_PER_XDR_UNIT
3159  * attributes  - NFS3_SIZEOF_FATTR3 * BYTES_PER_XDR_UNIT
3160  * boolean - 1 * BYTES_PER_XDR_UNIT
3161  * file id - 2 * BYTES_PER_XDR_UNIT
3162  * directory name length - 1 * BYTES_PER_XDR_UNIT
3163  * cookie - 2 * BYTES_PER_XDR_UNIT
3164  * end of list - 1 * BYTES_PER_XDR_UNIT
3165  * end of file - 1 * BYTES_PER_XDR_UNIT
3166  * Name length of directory to the nearest byte
3167  */
3168 
3169 #define NFS3_READDIR_MIN_COUNT(length)  \
3170         ((1 + 1 + 2 + NFS3_SIZEOF_FATTR3 + 1 + 2 + 1 + 2 + 1 + 1) * \
3171                 BYTES_PER_XDR_UNIT + roundup((length), BYTES_PER_XDR_UNIT))
3172 
3173 /* ARGSUSED */
3174 void
3175 rfs3_readdir(READDIR3args *args, READDIR3res *resp, struct exportinfo *exi,
3176     struct svc_req *req, cred_t *cr, bool_t ro)
3177 {
3178         int error;
3179         vnode_t *vp;
3180         struct vattr *vap;
3181         struct vattr va;
3182         struct iovec iov;
3183         struct uio uio;
3184         char *data;
3185         int iseof;
3186         int bufsize;
3187         int namlen;
3188         uint_t count;
3189         struct sockaddr *ca;
3190 
3191         vap = NULL;
3192 
3193         vp = nfs3_fhtovp(&args->dir, exi);
3194 
3195         DTRACE_NFSV3_5(op__readdir__start, struct svc_req *, req,
3196             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3197             READDIR3args *, args);
3198 
3199         if (vp == NULL) {
3200                 error = ESTALE;
3201                 goto out;
3202         }
3203 
3204         if (is_system_labeled()) {
3205                 bslabel_t *clabel = req->rq_label;
3206 
3207                 ASSERT(clabel != NULL);
3208                 DTRACE_PROBE2(tx__rfs3__log__info__opreaddir__clabel, char *,
3209                     "got client label from request(1)", struct svc_req *, req);
3210 
3211                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3212                         if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3213                             exi)) {
3214                                 resp->status = NFS3ERR_ACCES;
3215                                 goto out1;
3216                         }
3217                 }
3218         }
3219 
3220         (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL);
3221 
3222         va.va_mask = AT_ALL;
3223         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3224 
3225         if (vp->v_type != VDIR) {
3226                 resp->status = NFS3ERR_NOTDIR;
3227                 goto out1;
3228         }
3229 
3230         error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
3231         if (error)
3232                 goto out;
3233 
3234         /*
3235          * Now don't allow arbitrary count to alloc;
3236          * allow the maximum not to exceed rfs3_tsize()
3237          */
3238         if (args->count > rfs3_tsize(req))
3239                 args->count = rfs3_tsize(req);
3240 
3241         /*
3242          * Make sure that there is room to read at least one entry
3243          * if any are available.
3244          */
3245         if (args->count < DIRENT64_RECLEN(MAXNAMELEN))
3246                 count = DIRENT64_RECLEN(MAXNAMELEN);
3247         else
3248                 count = args->count;
3249 
3250         data = kmem_alloc(count, KM_SLEEP);
3251 
3252         iov.iov_base = data;
3253         iov.iov_len = count;
3254         uio.uio_iov = &iov;
3255         uio.uio_iovcnt = 1;
3256         uio.uio_segflg = UIO_SYSSPACE;
3257         uio.uio_extflg = UIO_COPY_CACHED;
3258         uio.uio_loffset = (offset_t)args->cookie;
3259         uio.uio_resid = count;
3260 
3261         error = VOP_READDIR(vp, &uio, cr, &iseof, NULL, 0);
3262 
3263         va.va_mask = AT_ALL;
3264         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3265 
3266         if (error) {
3267                 kmem_free(data, count);
3268                 goto out;
3269         }
3270 
3271         /*
3272          * If the count was not large enough to be able to guarantee
3273          * to be able to return at least one entry, then need to
3274          * check to see if NFS3ERR_TOOSMALL should be returned.
3275          */
3276         if (args->count < NFS3_READDIR_MIN_COUNT(MAXNAMELEN)) {
3277                 /*
3278                  * bufsize is used to keep track of the size of the response.
3279                  * It is primed with:
3280                  *      1 for the status +
3281                  *      1 for the dir_attributes.attributes boolean +
3282                  *      2 for the cookie verifier
3283                  * all times BYTES_PER_XDR_UNIT to convert from XDR units
3284                  * to bytes.  If there are directory attributes to be
3285                  * returned, then:
3286                  *      NFS3_SIZEOF_FATTR3 for the dir_attributes.attr fattr3
3287                  * time BYTES_PER_XDR_UNIT is added to account for them.
3288                  */
3289                 bufsize = (1 + 1 + 2) * BYTES_PER_XDR_UNIT;
3290                 if (vap != NULL)
3291                         bufsize += NFS3_SIZEOF_FATTR3 * BYTES_PER_XDR_UNIT;
3292                 /*
3293                  * An entry is composed of:
3294                  *      1 for the true/false list indicator +
3295                  *      2 for the fileid +
3296                  *      1 for the length of the name +
3297                  *      2 for the cookie +
3298                  * all times BYTES_PER_XDR_UNIT to convert from
3299                  * XDR units to bytes, plus the length of the name
3300                  * rounded up to the nearest BYTES_PER_XDR_UNIT.
3301                  */
3302                 if (count != uio.uio_resid) {
3303                         namlen = strlen(((struct dirent64 *)data)->d_name);
3304                         bufsize += (1 + 2 + 1 + 2) * BYTES_PER_XDR_UNIT +
3305                             roundup(namlen, BYTES_PER_XDR_UNIT);
3306                 }
3307                 /*
3308                  * We need to check to see if the number of bytes left
3309                  * to go into the buffer will actually fit into the
3310                  * buffer.  This is calculated as the size of this
3311                  * entry plus:
3312                  *      1 for the true/false list indicator +
3313                  *      1 for the eof indicator
3314                  * times BYTES_PER_XDR_UNIT to convert from from
3315                  * XDR units to bytes.
3316                  */
3317                 bufsize += (1 + 1) * BYTES_PER_XDR_UNIT;
3318                 if (bufsize > args->count) {
3319                         kmem_free(data, count);
3320                         resp->status = NFS3ERR_TOOSMALL;
3321                         goto out1;
3322                 }
3323         }
3324 
3325         /*
3326          * Have a valid readir buffer for the native character
3327          * set. Need to check if a conversion is necessary and
3328          * potentially rewrite the whole buffer. Note that if the
3329          * conversion expands names enough, the structure may not
3330          * fit. In this case, we need to drop entries until if fits
3331          * and patch the counts in order that the next readdir will
3332          * get the correct entries.
3333          */
3334         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
3335         data = nfscmd_convdirent(ca, exi, data, count, &resp->status);
3336 
3337 
3338         VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
3339 
3340 #if 0 /* notyet */
3341         /*
3342          * Don't do this.  It causes local disk writes when just
3343          * reading the file and the overhead is deemed larger
3344          * than the benefit.
3345          */
3346         /*
3347          * Force modified metadata out to stable storage.
3348          */
3349         (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
3350 #endif
3351 
3352         resp->status = NFS3_OK;
3353         vattr_to_post_op_attr(vap, &resp->resok.dir_attributes);
3354         resp->resok.cookieverf = 0;
3355         resp->resok.reply.entries = (entry3 *)data;
3356         resp->resok.reply.eof = iseof;
3357         resp->resok.size = count - uio.uio_resid;
3358         resp->resok.count = args->count;
3359         resp->resok.freecount = count;
3360 
3361         DTRACE_NFSV3_5(op__readdir__done, struct svc_req *, req,
3362             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3363             READDIR3res *, resp);
3364 
3365         VN_RELE(vp);
3366 
3367         return;
3368 
3369 out:
3370         if (curthread->t_flag & T_WOULDBLOCK) {
3371                 curthread->t_flag &= ~T_WOULDBLOCK;
3372                 resp->status = NFS3ERR_JUKEBOX;
3373         } else
3374                 resp->status = puterrno3(error);
3375 out1:
3376         vattr_to_post_op_attr(vap, &resp->resfail.dir_attributes);
3377 
3378         DTRACE_NFSV3_5(op__readdir__done, struct svc_req *, req,
3379             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3380             READDIR3res *, resp);
3381 
3382         if (vp != NULL) {
3383                 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
3384                 VN_RELE(vp);
3385         }
3386         vattr_to_post_op_attr(vap, &resp->resfail.dir_attributes);
3387 }
3388 
3389 void *
3390 rfs3_readdir_getfh(READDIR3args *args)
3391 {
3392 
3393         return (&args->dir);
3394 }
3395 
3396 void
3397 rfs3_readdir_free(READDIR3res *resp)
3398 {
3399 
3400         if (resp->status == NFS3_OK)
3401                 kmem_free(resp->resok.reply.entries, resp->resok.freecount);
3402 }
3403 
3404 #ifdef nextdp
3405 #undef nextdp
3406 #endif
3407 #define nextdp(dp)      ((struct dirent64 *)((char *)(dp) + (dp)->d_reclen))
3408 
3409 /*
3410  * This macro computes the size of a response which contains
3411  * one directory entry including the attributes as well as file handle.
3412  * If the incoming request is larger than this, then we are guaranteed to be
3413  * able to return at least one more directory entry if one exists.
3414  *
3415  * NFS3_READDIRPLUS_ENTRY is made up of the following:
3416  *
3417  * boolean - 1 * BYTES_PER_XDR_UNIT
3418  * file id - 2 * BYTES_PER_XDR_UNIT
3419  * directory name length - 1 * BYTES_PER_XDR_UNIT
3420  * cookie - 2 * BYTES_PER_XDR_UNIT
3421  * attribute flag - 1 * BYTES_PER_XDR_UNIT
3422  * attributes - NFS3_SIZEOF_FATTR3 * BYTES_PER_XDR_UNIT
3423  * status byte for file handle - 1 *  BYTES_PER_XDR_UNIT
3424  * length of a file handle - 1 * BYTES_PER_XDR_UNIT
3425  * Maximum length of a file handle (NFS3_MAXFHSIZE)
3426  * name length of the entry to the nearest bytes
3427  */
3428 #define NFS3_READDIRPLUS_ENTRY(namelen) \
3429         ((1 + 2 + 1 + 2 + 1 + NFS3_SIZEOF_FATTR3 + 1 + 1) * \
3430                 BYTES_PER_XDR_UNIT + \
3431         NFS3_MAXFHSIZE + roundup(namelen, BYTES_PER_XDR_UNIT))
3432 
3433 static int rfs3_readdir_unit = MAXBSIZE;
3434 
3435 /* ARGSUSED */
3436 void
3437 rfs3_readdirplus(READDIRPLUS3args *args, READDIRPLUS3res *resp,
3438     struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
3439 {
3440         int error;
3441         vnode_t *vp;
3442         struct vattr *vap;
3443         struct vattr va;
3444         struct iovec iov;
3445         struct uio uio;
3446         char *data;
3447         int iseof;
3448         struct dirent64 *dp;
3449         vnode_t *nvp;
3450         struct vattr *nvap;
3451         struct vattr nva;
3452         entryplus3_info *infop = NULL;
3453         int size = 0;
3454         int nents = 0;
3455         int bufsize = 0;
3456         int entrysize = 0;
3457         int tofit = 0;
3458         int rd_unit = rfs3_readdir_unit;
3459         int prev_len;
3460         int space_left;
3461         int i;
3462         uint_t *namlen = NULL;
3463         char *ndata = NULL;
3464         struct sockaddr *ca;
3465         size_t ret;
3466 
3467         vap = NULL;
3468 
3469         vp = nfs3_fhtovp(&args->dir, exi);
3470 
3471         DTRACE_NFSV3_5(op__readdirplus__start, struct svc_req *, req,
3472             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3473             READDIRPLUS3args *, args);
3474 
3475         if (vp == NULL) {
3476                 error = ESTALE;
3477                 goto out;
3478         }
3479 
3480         if (is_system_labeled()) {
3481                 bslabel_t *clabel = req->rq_label;
3482 
3483                 ASSERT(clabel != NULL);
3484                 DTRACE_PROBE2(tx__rfs3__log__info__opreaddirplus__clabel,
3485                     char *, "got client label from request(1)",
3486                     struct svc_req *, req);
3487 
3488                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3489                         if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3490                             exi)) {
3491                                 resp->status = NFS3ERR_ACCES;
3492                                 goto out1;
3493                         }
3494                 }
3495         }
3496 
3497         (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL);
3498 
3499         va.va_mask = AT_ALL;
3500         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3501 
3502         if (vp->v_type != VDIR) {
3503                 error = ENOTDIR;
3504                 goto out;
3505         }
3506 
3507         error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
3508         if (error)
3509                 goto out;
3510 
3511         /*
3512          * Don't allow arbitrary counts for allocation
3513          */
3514         if (args->maxcount > rfs3_tsize(req))
3515                 args->maxcount = rfs3_tsize(req);
3516 
3517         /*
3518          * Make sure that there is room to read at least one entry
3519          * if any are available
3520          */
3521         args->dircount = MIN(args->dircount, args->maxcount);
3522 
3523         if (args->dircount < DIRENT64_RECLEN(MAXNAMELEN))
3524                 args->dircount = DIRENT64_RECLEN(MAXNAMELEN);
3525 
3526         /*
3527          * This allocation relies on a minimum directory entry
3528          * being roughly 24 bytes.  Therefore, the namlen array
3529          * will have enough space based on the maximum number of
3530          * entries to read.
3531          */
3532         namlen = kmem_alloc(args->dircount, KM_SLEEP);
3533 
3534         space_left = args->dircount;
3535         data = kmem_alloc(args->dircount, KM_SLEEP);
3536         dp = (struct dirent64 *)data;
3537         uio.uio_iov = &iov;
3538         uio.uio_iovcnt = 1;
3539         uio.uio_segflg = UIO_SYSSPACE;
3540         uio.uio_extflg = UIO_COPY_CACHED;
3541         uio.uio_loffset = (offset_t)args->cookie;
3542 
3543         /*
3544          * bufsize is used to keep track of the size of the response as we
3545          * get post op attributes and filehandles for each entry.  This is
3546          * an optimization as the server may have read more entries than will
3547          * fit in the buffer specified by maxcount.  We stop calculating
3548          * post op attributes and filehandles once we have exceeded maxcount.
3549          * This will minimize the effect of truncation.
3550          *
3551          * It is primed with:
3552          *      1 for the status +
3553          *      1 for the dir_attributes.attributes boolean +
3554          *      2 for the cookie verifier
3555          * all times BYTES_PER_XDR_UNIT to convert from XDR units
3556          * to bytes.  If there are directory attributes to be
3557          * returned, then:
3558          *      NFS3_SIZEOF_FATTR3 for the dir_attributes.attr fattr3
3559          * time BYTES_PER_XDR_UNIT is added to account for them.
3560          */
3561         bufsize = (1 + 1 + 2) * BYTES_PER_XDR_UNIT;
3562         if (vap != NULL)
3563                 bufsize += NFS3_SIZEOF_FATTR3 * BYTES_PER_XDR_UNIT;
3564 
3565 getmoredents:
3566         /*
3567          * Here we make a check so that our read unit is not larger than
3568          * the space left in the buffer.
3569          */
3570         rd_unit = MIN(rd_unit, space_left);
3571         iov.iov_base = (char *)dp;
3572         iov.iov_len = rd_unit;
3573         uio.uio_resid = rd_unit;
3574         prev_len = rd_unit;
3575 
3576         error = VOP_READDIR(vp, &uio, cr, &iseof, NULL, 0);
3577 
3578         if (error) {
3579                 kmem_free(data, args->dircount);
3580                 goto out;
3581         }
3582 
3583         if (uio.uio_resid == prev_len && !iseof) {
3584                 if (nents == 0) {
3585                         kmem_free(data, args->dircount);
3586                         resp->status = NFS3ERR_TOOSMALL;
3587                         goto out1;
3588                 }
3589 
3590                 /*
3591                  * We could not get any more entries, so get the attributes
3592                  * and filehandle for the entries already obtained.
3593                  */
3594                 goto good;
3595         }
3596 
3597         /*
3598          * We estimate the size of the response by assuming the
3599          * entry exists and attributes and filehandle are also valid
3600          */
3601         for (size = prev_len - uio.uio_resid;
3602             size > 0;
3603             size -= dp->d_reclen, dp = nextdp(dp)) {
3604 
3605                 if (dp->d_ino == 0) {
3606                         nents++;
3607                         continue;
3608                 }
3609 
3610                 namlen[nents] = strlen(dp->d_name);
3611                 entrysize = NFS3_READDIRPLUS_ENTRY(namlen[nents]);
3612 
3613                 /*
3614                  * We need to check to see if the number of bytes left
3615                  * to go into the buffer will actually fit into the
3616                  * buffer.  This is calculated as the size of this
3617                  * entry plus:
3618                  *      1 for the true/false list indicator +
3619                  *      1 for the eof indicator
3620                  * times BYTES_PER_XDR_UNIT to convert from XDR units
3621                  * to bytes.
3622                  *
3623                  * Also check the dircount limit against the first entry read
3624                  *
3625                  */
3626                 tofit = entrysize + (1 + 1) * BYTES_PER_XDR_UNIT;
3627                 if (bufsize + tofit > args->maxcount) {
3628                         /*
3629                          * We make a check here to see if this was the
3630                          * first entry being measured.  If so, then maxcount
3631                          * was too small to begin with and so we need to
3632                          * return with NFS3ERR_TOOSMALL.
3633                          */
3634                         if (nents == 0) {
3635                                 kmem_free(data, args->dircount);
3636                                 resp->status = NFS3ERR_TOOSMALL;
3637                                 goto out1;
3638                         }
3639                         iseof = FALSE;
3640                         goto good;
3641                 }
3642                 bufsize += entrysize;
3643                 nents++;
3644         }
3645 
3646         /*
3647          * If there is enough room to fit at least 1 more entry including
3648          * post op attributes and filehandle in the buffer AND that we haven't
3649          * exceeded dircount then go back and get some more.
3650          */
3651         if (!iseof &&
3652             (args->maxcount - bufsize) >= NFS3_READDIRPLUS_ENTRY(MAXNAMELEN)) {
3653                 space_left -= (prev_len - uio.uio_resid);
3654                 if (space_left >= DIRENT64_RECLEN(MAXNAMELEN))
3655                         goto getmoredents;
3656 
3657                 /* else, fall through */
3658         }
3659 good:
3660         va.va_mask = AT_ALL;
3661         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3662 
3663         VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
3664 
3665         infop = kmem_alloc(nents * sizeof (struct entryplus3_info), KM_SLEEP);
3666         resp->resok.infop = infop;
3667 
3668         dp = (struct dirent64 *)data;
3669         for (i = 0; i < nents; i++) {
3670 
3671                 if (dp->d_ino == 0) {
3672                         infop[i].attr.attributes = FALSE;
3673                         infop[i].fh.handle_follows = FALSE;
3674                         dp = nextdp(dp);
3675                         continue;
3676                 }
3677 
3678                 infop[i].namelen = namlen[i];
3679 
3680                 error = VOP_LOOKUP(vp, dp->d_name, &nvp, NULL, 0, NULL, cr,
3681                     NULL, NULL, NULL);
3682                 if (error) {
3683                         infop[i].attr.attributes = FALSE;
3684                         infop[i].fh.handle_follows = FALSE;
3685                         dp = nextdp(dp);
3686                         continue;
3687                 }
3688 
3689                 nva.va_mask = AT_ALL;
3690                 nvap = rfs4_delegated_getattr(nvp, &nva, 0, cr) ? NULL : &nva;
3691 
3692                 /* Lie about the object type for a referral */
3693                 if (vn_is_nfs_reparse(nvp, cr))
3694                         nvap->va_type = VLNK;
3695 
3696                 if (vn_ismntpt(nvp)) {
3697                         infop[i].attr.attributes = FALSE;
3698                         infop[i].fh.handle_follows = FALSE;
3699                 } else {
3700                         vattr_to_post_op_attr(nvap, &infop[i].attr);
3701 
3702                         error = makefh3(&infop[i].fh.handle, nvp, exi);
3703                         if (!error)
3704                                 infop[i].fh.handle_follows = TRUE;
3705                         else
3706                                 infop[i].fh.handle_follows = FALSE;
3707                 }
3708 
3709                 VN_RELE(nvp);
3710                 dp = nextdp(dp);
3711         }
3712 
3713         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
3714         ret = nfscmd_convdirplus(ca, exi, data, nents, args->dircount, &ndata);
3715         if (ndata == NULL)
3716                 ndata = data;
3717 
3718         if (ret > 0) {
3719                 /*
3720                  * We had to drop one or more entries in order to fit
3721                  * during the character conversion.  We need to patch
3722                  * up the size and eof info.
3723                  */
3724                 if (iseof)
3725                         iseof = FALSE;
3726 
3727                 ret = nfscmd_dropped_entrysize((struct dirent64 *)data,
3728                     nents, ret);
3729         }
3730 
3731 
3732 #if 0 /* notyet */
3733         /*
3734          * Don't do this.  It causes local disk writes when just
3735          * reading the file and the overhead is deemed larger
3736          * than the benefit.
3737          */
3738         /*
3739          * Force modified metadata out to stable storage.
3740          */
3741         (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
3742 #endif
3743 
3744         kmem_free(namlen, args->dircount);
3745 
3746         resp->status = NFS3_OK;
3747         vattr_to_post_op_attr(vap, &resp->resok.dir_attributes);
3748         resp->resok.cookieverf = 0;
3749         resp->resok.reply.entries = (entryplus3 *)ndata;
3750         resp->resok.reply.eof = iseof;
3751         resp->resok.size = nents;
3752         resp->resok.count = args->dircount - ret;
3753         resp->resok.maxcount = args->maxcount;
3754 
3755         DTRACE_NFSV3_5(op__readdirplus__done, struct svc_req *, req,
3756             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3757             READDIRPLUS3res *, resp);
3758 
3759         VN_RELE(vp);
3760 
3761         return;
3762 
3763 out:
3764         if (curthread->t_flag & T_WOULDBLOCK) {
3765                 curthread->t_flag &= ~T_WOULDBLOCK;
3766                 resp->status = NFS3ERR_JUKEBOX;
3767         } else {
3768                 resp->status = puterrno3(error);
3769         }
3770 out1:
3771         vattr_to_post_op_attr(vap, &resp->resfail.dir_attributes);
3772 
3773         DTRACE_NFSV3_5(op__readdirplus__done, struct svc_req *, req,
3774             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3775             READDIRPLUS3res *, resp);
3776 
3777         if (vp != NULL) {
3778                 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
3779                 VN_RELE(vp);
3780         }
3781 
3782         if (namlen != NULL)
3783                 kmem_free(namlen, args->dircount);
3784 
3785         vattr_to_post_op_attr(vap, &resp->resfail.dir_attributes);
3786 }
3787 
3788 void *
3789 rfs3_readdirplus_getfh(READDIRPLUS3args *args)
3790 {
3791 
3792         return (&args->dir);
3793 }
3794 
3795 void
3796 rfs3_readdirplus_free(READDIRPLUS3res *resp)
3797 {
3798 
3799         if (resp->status == NFS3_OK) {
3800                 kmem_free(resp->resok.reply.entries, resp->resok.count);
3801                 kmem_free(resp->resok.infop,
3802                     resp->resok.size * sizeof (struct entryplus3_info));
3803         }
3804 }
3805 
3806 /* ARGSUSED */
3807 void
3808 rfs3_fsstat(FSSTAT3args *args, FSSTAT3res *resp, struct exportinfo *exi,
3809     struct svc_req *req, cred_t *cr, bool_t ro)
3810 {
3811         int error;
3812         vnode_t *vp;
3813         struct vattr *vap;
3814         struct vattr va;
3815         struct statvfs64 sb;
3816 
3817         vap = NULL;
3818 
3819         vp = nfs3_fhtovp(&args->fsroot, exi);
3820 
3821         DTRACE_NFSV3_5(op__fsstat__start, struct svc_req *, req,
3822             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3823             FSSTAT3args *, args);
3824 
3825         if (vp == NULL) {
3826                 error = ESTALE;
3827                 goto out;
3828         }
3829 
3830         if (is_system_labeled()) {
3831                 bslabel_t *clabel = req->rq_label;
3832 
3833                 ASSERT(clabel != NULL);
3834                 DTRACE_PROBE2(tx__rfs3__log__info__opfsstat__clabel, char *,
3835                     "got client label from request(1)", struct svc_req *, req);
3836 
3837                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3838                         if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3839                             exi)) {
3840                                 resp->status = NFS3ERR_ACCES;
3841                                 goto out1;
3842                         }
3843                 }
3844         }
3845 
3846         error = VFS_STATVFS(vp->v_vfsp, &sb);
3847 
3848         va.va_mask = AT_ALL;
3849         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3850 
3851         if (error)
3852                 goto out;
3853 
3854         resp->status = NFS3_OK;
3855         vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
3856         if (sb.f_blocks != (fsblkcnt64_t)-1)
3857                 resp->resok.tbytes = (size3)sb.f_frsize * (size3)sb.f_blocks;
3858         else
3859                 resp->resok.tbytes = (size3)sb.f_blocks;
3860         if (sb.f_bfree != (fsblkcnt64_t)-1)
3861                 resp->resok.fbytes = (size3)sb.f_frsize * (size3)sb.f_bfree;
3862         else
3863                 resp->resok.fbytes = (size3)sb.f_bfree;
3864         if (sb.f_bavail != (fsblkcnt64_t)-1)
3865                 resp->resok.abytes = (size3)sb.f_frsize * (size3)sb.f_bavail;
3866         else
3867                 resp->resok.abytes = (size3)sb.f_bavail;
3868         resp->resok.tfiles = (size3)sb.f_files;
3869         resp->resok.ffiles = (size3)sb.f_ffree;
3870         resp->resok.afiles = (size3)sb.f_favail;
3871         resp->resok.invarsec = 0;
3872 
3873         DTRACE_NFSV3_5(op__fsstat__done, struct svc_req *, req,
3874             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3875             FSSTAT3res *, resp);
3876         VN_RELE(vp);
3877 
3878         return;
3879 
3880 out:
3881         if (curthread->t_flag & T_WOULDBLOCK) {
3882                 curthread->t_flag &= ~T_WOULDBLOCK;
3883                 resp->status = NFS3ERR_JUKEBOX;
3884         } else
3885                 resp->status = puterrno3(error);
3886 out1:
3887         DTRACE_NFSV3_5(op__fsstat__done, struct svc_req *, req,
3888             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3889             FSSTAT3res *, resp);
3890 
3891         if (vp != NULL)
3892                 VN_RELE(vp);
3893         vattr_to_post_op_attr(vap, &resp->resfail.obj_attributes);
3894 }
3895 
3896 void *
3897 rfs3_fsstat_getfh(FSSTAT3args *args)
3898 {
3899 
3900         return (&args->fsroot);
3901 }
3902 
3903 /* ARGSUSED */
3904 void
3905 rfs3_fsinfo(FSINFO3args *args, FSINFO3res *resp, struct exportinfo *exi,
3906     struct svc_req *req, cred_t *cr, bool_t ro)
3907 {
3908         vnode_t *vp;
3909         struct vattr *vap;
3910         struct vattr va;
3911         uint32_t xfer_size;
3912         ulong_t l = 0;
3913         int error;
3914 
3915         vp = nfs3_fhtovp(&args->fsroot, exi);
3916 
3917         DTRACE_NFSV3_5(op__fsinfo__start, struct svc_req *, req,
3918             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3919             FSINFO3args *, args);
3920 
3921         if (vp == NULL) {
3922                 if (curthread->t_flag & T_WOULDBLOCK) {
3923                         curthread->t_flag &= ~T_WOULDBLOCK;
3924                         resp->status = NFS3ERR_JUKEBOX;
3925                 } else
3926                         resp->status = NFS3ERR_STALE;
3927                 vattr_to_post_op_attr(NULL, &resp->resfail.obj_attributes);
3928                 goto out;
3929         }
3930 
3931         if (is_system_labeled()) {
3932                 bslabel_t *clabel = req->rq_label;
3933 
3934                 ASSERT(clabel != NULL);
3935                 DTRACE_PROBE2(tx__rfs3__log__info__opfsinfo__clabel, char *,
3936                     "got client label from request(1)", struct svc_req *, req);
3937 
3938                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3939                         if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3940                             exi)) {
3941                                 resp->status = NFS3ERR_STALE;
3942                                 vattr_to_post_op_attr(NULL,
3943                                     &resp->resfail.obj_attributes);
3944                                 goto out;
3945                         }
3946                 }
3947         }
3948 
3949         va.va_mask = AT_ALL;
3950         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3951 
3952         resp->status = NFS3_OK;
3953         vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
3954         xfer_size = rfs3_tsize(req);
3955         resp->resok.rtmax = xfer_size;
3956         resp->resok.rtpref = xfer_size;
3957         resp->resok.rtmult = DEV_BSIZE;
3958         resp->resok.wtmax = xfer_size;
3959         resp->resok.wtpref = xfer_size;
3960         resp->resok.wtmult = DEV_BSIZE;
3961         resp->resok.dtpref = MAXBSIZE;
3962 
3963         /*
3964          * Large file spec: want maxfilesize based on limit of
3965          * underlying filesystem.  We can guess 2^31-1 if need be.
3966          */
3967         error = VOP_PATHCONF(vp, _PC_FILESIZEBITS, &l, cr, NULL);
3968         if (error) {
3969                 resp->status = puterrno3(error);
3970                 goto out;
3971         }
3972 
3973         /*
3974          * If the underlying file system does not support _PC_FILESIZEBITS,
3975          * return a reasonable default. Note that error code on VOP_PATHCONF
3976          * will be 0, even if the underlying file system does not support
3977          * _PC_FILESIZEBITS.
3978          */
3979         if (l == (ulong_t)-1) {
3980                 resp->resok.maxfilesize = MAXOFF32_T;
3981         } else {
3982                 if (l >= (sizeof (uint64_t) * 8))
3983                         resp->resok.maxfilesize = INT64_MAX;
3984                 else
3985                         resp->resok.maxfilesize = (1LL << (l-1)) - 1;
3986         }
3987 
3988         resp->resok.time_delta.seconds = 0;
3989         resp->resok.time_delta.nseconds = 1000;
3990         resp->resok.properties = FSF3_LINK | FSF3_SYMLINK |
3991             FSF3_HOMOGENEOUS | FSF3_CANSETTIME;
3992 
3993         DTRACE_NFSV3_5(op__fsinfo__done, struct svc_req *, req,
3994             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3995             FSINFO3res *, resp);
3996 
3997         VN_RELE(vp);
3998 
3999         return;
4000 
4001 out:
4002         DTRACE_NFSV3_5(op__fsinfo__done, struct svc_req *, req,
4003             cred_t *, cr, vnode_t *, NULL, struct exportinfo *, exi,
4004             FSINFO3res *, resp);
4005         if (vp != NULL)
4006                 VN_RELE(vp);
4007 }
4008 
4009 void *
4010 rfs3_fsinfo_getfh(FSINFO3args *args)
4011 {
4012         return (&args->fsroot);
4013 }
4014 
4015 /* ARGSUSED */
4016 void
4017 rfs3_pathconf(PATHCONF3args *args, PATHCONF3res *resp, struct exportinfo *exi,
4018     struct svc_req *req, cred_t *cr, bool_t ro)
4019 {
4020         int error;
4021         vnode_t *vp;
4022         struct vattr *vap;
4023         struct vattr va;
4024         ulong_t val;
4025 
4026         vap = NULL;
4027 
4028         vp = nfs3_fhtovp(&args->object, exi);
4029 
4030         DTRACE_NFSV3_5(op__pathconf__start, struct svc_req *, req,
4031             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
4032             PATHCONF3args *, args);
4033 
4034         if (vp == NULL) {
4035                 error = ESTALE;
4036                 goto out;
4037         }
4038 
4039         if (is_system_labeled()) {
4040                 bslabel_t *clabel = req->rq_label;
4041 
4042                 ASSERT(clabel != NULL);
4043                 DTRACE_PROBE2(tx__rfs3__log__info__oppathconf__clabel, char *,
4044                     "got client label from request(1)", struct svc_req *, req);
4045 
4046                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
4047                         if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
4048                             exi)) {
4049                                 resp->status = NFS3ERR_ACCES;
4050                                 goto out1;
4051                         }
4052                 }
4053         }
4054 
4055         va.va_mask = AT_ALL;
4056         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
4057 
4058         error = VOP_PATHCONF(vp, _PC_LINK_MAX, &val, cr, NULL);
4059         if (error)
4060                 goto out;
4061         resp->resok.info.link_max = (uint32)val;
4062 
4063         error = VOP_PATHCONF(vp, _PC_NAME_MAX, &val, cr, NULL);
4064         if (error)
4065                 goto out;
4066         resp->resok.info.name_max = (uint32)val;
4067 
4068         error = VOP_PATHCONF(vp, _PC_NO_TRUNC, &val, cr, NULL);
4069         if (error)
4070                 goto out;
4071         if (val == 1)
4072                 resp->resok.info.no_trunc = TRUE;
4073         else
4074                 resp->resok.info.no_trunc = FALSE;
4075 
4076         error = VOP_PATHCONF(vp, _PC_CHOWN_RESTRICTED, &val, cr, NULL);
4077         if (error)
4078                 goto out;
4079         if (val == 1)
4080                 resp->resok.info.chown_restricted = TRUE;
4081         else
4082                 resp->resok.info.chown_restricted = FALSE;
4083 
4084         resp->status = NFS3_OK;
4085         vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
4086         resp->resok.info.case_insensitive = FALSE;
4087         resp->resok.info.case_preserving = TRUE;
4088         DTRACE_NFSV3_5(op__pathconf__done, struct svc_req *, req,
4089             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
4090             PATHCONF3res *, resp);
4091         VN_RELE(vp);
4092         return;
4093 
4094 out:
4095         if (curthread->t_flag & T_WOULDBLOCK) {
4096                 curthread->t_flag &= ~T_WOULDBLOCK;
4097                 resp->status = NFS3ERR_JUKEBOX;
4098         } else
4099                 resp->status = puterrno3(error);
4100 out1:
4101         DTRACE_NFSV3_5(op__pathconf__done, struct svc_req *, req,
4102             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
4103             PATHCONF3res *, resp);
4104         if (vp != NULL)
4105                 VN_RELE(vp);
4106         vattr_to_post_op_attr(vap, &resp->resfail.obj_attributes);
4107 }
4108 
4109 void *
4110 rfs3_pathconf_getfh(PATHCONF3args *args)
4111 {
4112 
4113         return (&args->object);
4114 }
4115 
4116 void
4117 rfs3_commit(COMMIT3args *args, COMMIT3res *resp, struct exportinfo *exi,
4118     struct svc_req *req, cred_t *cr, bool_t ro)
4119 {
4120         nfs3_srv_t *ns;
4121         int error;
4122         vnode_t *vp;
4123         struct vattr *bvap;
4124         struct vattr bva;
4125         struct vattr *avap;
4126         struct vattr ava;
4127 
4128         bvap = NULL;
4129         avap = NULL;
4130 
4131         vp = nfs3_fhtovp(&args->file, exi);
4132 
4133         DTRACE_NFSV3_5(op__commit__start, struct svc_req *, req,
4134             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
4135             COMMIT3args *, args);
4136 
4137         if (vp == NULL) {
4138                 error = ESTALE;
4139                 goto out;
4140         }
4141 
4142         ASSERT3U(curzone->zone_id, ==, exi->exi_zoneid); /* exi is non-NULL. */
4143         ns = nfs3_get_srv();
4144         bva.va_mask = AT_ALL;
4145         error = VOP_GETATTR(vp, &bva, 0, cr, NULL);
4146 
4147         /*
4148          * If we can't get the attributes, then we can't do the
4149          * right access checking.  So, we'll fail the request.
4150          */
4151         if (error)
4152                 goto out;
4153 
4154         bvap = &bva;
4155 
4156         if (rdonly(ro, vp)) {
4157                 resp->status = NFS3ERR_ROFS;
4158                 goto out1;
4159         }
4160 
4161         if (vp->v_type != VREG) {
4162                 resp->status = NFS3ERR_INVAL;
4163                 goto out1;
4164         }
4165 
4166         if (is_system_labeled()) {
4167                 bslabel_t *clabel = req->rq_label;
4168 
4169                 ASSERT(clabel != NULL);
4170                 DTRACE_PROBE2(tx__rfs3__log__info__opcommit__clabel, char *,
4171                     "got client label from request(1)", struct svc_req *, req);
4172 
4173                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
4174                         if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
4175                             exi)) {
4176                                 resp->status = NFS3ERR_ACCES;
4177                                 goto out1;
4178                         }
4179                 }
4180         }
4181 
4182         if (crgetuid(cr) != bva.va_uid &&
4183             (error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL)))
4184                 goto out;
4185 
4186         error = VOP_FSYNC(vp, FSYNC, cr, NULL);
4187 
4188         ava.va_mask = AT_ALL;
4189         avap = VOP_GETATTR(vp, &ava, 0, cr, NULL) ? NULL : &ava;
4190 
4191         if (error)
4192                 goto out;
4193 
4194         resp->status = NFS3_OK;
4195         vattr_to_wcc_data(bvap, avap, &resp->resok.file_wcc);
4196         resp->resok.verf = ns->write3verf;
4197 
4198         DTRACE_NFSV3_5(op__commit__done, struct svc_req *, req,
4199             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
4200             COMMIT3res *, resp);
4201 
4202         VN_RELE(vp);
4203 
4204         return;
4205 
4206 out:
4207         if (curthread->t_flag & T_WOULDBLOCK) {
4208                 curthread->t_flag &= ~T_WOULDBLOCK;
4209                 resp->status = NFS3ERR_JUKEBOX;
4210         } else
4211                 resp->status = puterrno3(error);
4212 out1:
4213         DTRACE_NFSV3_5(op__commit__done, struct svc_req *, req,
4214             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
4215             COMMIT3res *, resp);
4216 
4217         if (vp != NULL)
4218                 VN_RELE(vp);
4219         vattr_to_wcc_data(bvap, avap, &resp->resfail.file_wcc);
4220 }
4221 
4222 void *
4223 rfs3_commit_getfh(COMMIT3args *args)
4224 {
4225 
4226         return (&args->file);
4227 }
4228 
4229 static int
4230 sattr3_to_vattr(sattr3 *sap, struct vattr *vap)
4231 {
4232 
4233         vap->va_mask = 0;
4234 
4235         if (sap->mode.set_it) {
4236                 vap->va_mode = (mode_t)sap->mode.mode;
4237                 vap->va_mask |= AT_MODE;
4238         }
4239         if (sap->uid.set_it) {
4240                 vap->va_uid = (uid_t)sap->uid.uid;
4241                 vap->va_mask |= AT_UID;
4242         }
4243         if (sap->gid.set_it) {
4244                 vap->va_gid = (gid_t)sap->gid.gid;
4245                 vap->va_mask |= AT_GID;
4246         }
4247         if (sap->size.set_it) {
4248                 if (sap->size.size > (size3)((u_longlong_t)-1))
4249                         return (EINVAL);
4250                 vap->va_size = sap->size.size;
4251                 vap->va_mask |= AT_SIZE;
4252         }
4253         if (sap->atime.set_it == SET_TO_CLIENT_TIME) {
4254 #ifndef _LP64
4255                 /* check time validity */
4256                 if (!NFS3_TIME_OK(sap->atime.atime.seconds))
4257                         return (EOVERFLOW);
4258 #endif
4259                 /*
4260                  * nfs protocol defines times as unsigned so don't extend sign,
4261                  * unless sysadmin set nfs_allow_preepoch_time.
4262                  */
4263                 NFS_TIME_T_CONVERT(vap->va_atime.tv_sec,
4264                     sap->atime.atime.seconds);
4265                 vap->va_atime.tv_nsec = (uint32_t)sap->atime.atime.nseconds;
4266                 vap->va_mask |= AT_ATIME;
4267         } else if (sap->atime.set_it == SET_TO_SERVER_TIME) {
4268                 gethrestime(&vap->va_atime);
4269                 vap->va_mask |= AT_ATIME;
4270         }
4271         if (sap->mtime.set_it == SET_TO_CLIENT_TIME) {
4272 #ifndef _LP64
4273                 /* check time validity */
4274                 if (!NFS3_TIME_OK(sap->mtime.mtime.seconds))
4275                         return (EOVERFLOW);
4276 #endif
4277                 /*
4278                  * nfs protocol defines times as unsigned so don't extend sign,
4279                  * unless sysadmin set nfs_allow_preepoch_time.
4280                  */
4281                 NFS_TIME_T_CONVERT(vap->va_mtime.tv_sec,
4282                     sap->mtime.mtime.seconds);
4283                 vap->va_mtime.tv_nsec = (uint32_t)sap->mtime.mtime.nseconds;
4284                 vap->va_mask |= AT_MTIME;
4285         } else if (sap->mtime.set_it == SET_TO_SERVER_TIME) {
4286                 gethrestime(&vap->va_mtime);
4287                 vap->va_mask |= AT_MTIME;
4288         }
4289 
4290         return (0);
4291 }
4292 
4293 static const ftype3 vt_to_nf3[] = {
4294         0, NF3REG, NF3DIR, NF3BLK, NF3CHR, NF3LNK, NF3FIFO, 0, 0, NF3SOCK, 0
4295 };
4296 
4297 static int
4298 vattr_to_fattr3(struct vattr *vap, fattr3 *fap)
4299 {
4300 
4301         ASSERT(vap->va_type >= VNON && vap->va_type <= VBAD);
4302         /* Return error if time or size overflow */
4303         if (! (NFS_VAP_TIME_OK(vap) && NFS3_SIZE_OK(vap->va_size))) {
4304                 return (EOVERFLOW);
4305         }
4306         fap->type = vt_to_nf3[vap->va_type];
4307         fap->mode = (mode3)(vap->va_mode & MODEMASK);
4308         fap->nlink = (uint32)vap->va_nlink;
4309         if (vap->va_uid == UID_NOBODY)
4310                 fap->uid = (uid3)NFS_UID_NOBODY;
4311         else
4312                 fap->uid = (uid3)vap->va_uid;
4313         if (vap->va_gid == GID_NOBODY)
4314                 fap->gid = (gid3)NFS_GID_NOBODY;
4315         else
4316                 fap->gid = (gid3)vap->va_gid;
4317         fap->size = (size3)vap->va_size;
4318         fap->used = (size3)DEV_BSIZE * (size3)vap->va_nblocks;
4319         fap->rdev.specdata1 = (uint32)getmajor(vap->va_rdev);
4320         fap->rdev.specdata2 = (uint32)getminor(vap->va_rdev);
4321         fap->fsid = (uint64)vap->va_fsid;
4322         fap->fileid = (fileid3)vap->va_nodeid;
4323         fap->atime.seconds = vap->va_atime.tv_sec;
4324         fap->atime.nseconds = vap->va_atime.tv_nsec;
4325         fap->mtime.seconds = vap->va_mtime.tv_sec;
4326         fap->mtime.nseconds = vap->va_mtime.tv_nsec;
4327         fap->ctime.seconds = vap->va_ctime.tv_sec;
4328         fap->ctime.nseconds = vap->va_ctime.tv_nsec;
4329         return (0);
4330 }
4331 
4332 static int
4333 vattr_to_wcc_attr(struct vattr *vap, wcc_attr *wccap)
4334 {
4335 
4336         /* Return error if time or size overflow */
4337         if (!(NFS_TIME_T_OK(vap->va_mtime.tv_sec) &&
4338             NFS_TIME_T_OK(vap->va_ctime.tv_sec) &&
4339             NFS3_SIZE_OK(vap->va_size))) {
4340                 return (EOVERFLOW);
4341         }
4342         wccap->size = (size3)vap->va_size;
4343         wccap->mtime.seconds = vap->va_mtime.tv_sec;
4344         wccap->mtime.nseconds = vap->va_mtime.tv_nsec;
4345         wccap->ctime.seconds = vap->va_ctime.tv_sec;
4346         wccap->ctime.nseconds = vap->va_ctime.tv_nsec;
4347         return (0);
4348 }
4349 
4350 static void
4351 vattr_to_pre_op_attr(struct vattr *vap, pre_op_attr *poap)
4352 {
4353 
4354         /* don't return attrs if time overflow */
4355         if ((vap != NULL) && !vattr_to_wcc_attr(vap, &poap->attr)) {
4356                 poap->attributes = TRUE;
4357         } else
4358                 poap->attributes = FALSE;
4359 }
4360 
4361 void
4362 vattr_to_post_op_attr(struct vattr *vap, post_op_attr *poap)
4363 {
4364 
4365         /* don't return attrs if time overflow */
4366         if ((vap != NULL) && !vattr_to_fattr3(vap, &poap->attr)) {
4367                 poap->attributes = TRUE;
4368         } else
4369                 poap->attributes = FALSE;
4370 }
4371 
4372 static void
4373 vattr_to_wcc_data(struct vattr *bvap, struct vattr *avap, wcc_data *wccp)
4374 {
4375         vattr_to_pre_op_attr(bvap, &wccp->before);
4376         vattr_to_post_op_attr(avap, &wccp->after);
4377 }
4378 
4379 static int
4380 rdma_setup_read_data3(READ3args *args, READ3resok *rok)
4381 {
4382         struct clist    *wcl;
4383         int             wlist_len;
4384         count3          count = rok->count;
4385 
4386         wcl = args->wlist;
4387         if (rdma_setup_read_chunks(wcl, count, &wlist_len) == FALSE)
4388                 return (FALSE);
4389 
4390         wcl = args->wlist;
4391         rok->wlist_len = wlist_len;
4392         rok->wlist = wcl;
4393         return (TRUE);
4394 }
4395 
4396 void
4397 rfs3_srv_zone_init(nfs_globals_t *ng)
4398 {
4399         nfs3_srv_t *ns;
4400         struct rfs3_verf_overlay {
4401                 uint_t id; /* a "unique" identifier */
4402                 int ts; /* a unique timestamp */
4403         } *verfp;
4404         timestruc_t now;
4405 
4406         ns = kmem_zalloc(sizeof (*ns), KM_SLEEP);
4407 
4408         /*
4409          * The following algorithm attempts to find a unique verifier
4410          * to be used as the write verifier returned from the server
4411          * to the client.  It is important that this verifier change
4412          * whenever the server reboots.  Of secondary importance, it
4413          * is important for the verifier to be unique between two
4414          * different servers.
4415          *
4416          * Thus, an attempt is made to use the system hostid and the
4417          * current time in seconds when the nfssrv kernel module is
4418          * loaded.  It is assumed that an NFS server will not be able
4419          * to boot and then to reboot in less than a second.  If the
4420          * hostid has not been set, then the current high resolution
4421          * time is used.  This will ensure different verifiers each
4422          * time the server reboots and minimize the chances that two
4423          * different servers will have the same verifier.
4424          */
4425 
4426 #ifndef lint
4427         /*
4428          * We ASSERT that this constant logic expression is
4429          * always true because in the past, it wasn't.
4430          */
4431         ASSERT(sizeof (*verfp) <= sizeof (ns->write3verf));
4432 #endif
4433 
4434         gethrestime(&now);
4435         verfp = (struct rfs3_verf_overlay *)&ns->write3verf;
4436         verfp->ts = (int)now.tv_sec;
4437         verfp->id = zone_get_hostid(NULL);
4438 
4439         if (verfp->id == 0)
4440                 verfp->id = (uint_t)now.tv_nsec;
4441 
4442         ng->nfs3_srv = ns;
4443 }
4444 
4445 void
4446 rfs3_srv_zone_fini(nfs_globals_t *ng)
4447 {
4448         nfs3_srv_t *ns = ng->nfs3_srv;
4449 
4450         ng->nfs3_srv = NULL;
4451 
4452         kmem_free(ns, sizeof (*ns));
4453 }
4454 
4455 void
4456 rfs3_srvrinit(void)
4457 {
4458         nfs3_srv_caller_id = fs_new_caller_id();
4459 }
4460 
4461 void
4462 rfs3_srvrfini(void)
4463 {
4464         /* Nothing to do */
4465 }