1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright 2018 Nexenta Systems, Inc.
  24  * Copyright (c) 1994, 2010, Oracle and/or its affiliates. All rights reserved.
  25  * Copyright (c) 2013 by Delphix. All rights reserved.
  26  */
  27 
  28 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
  29 /* All Rights Reserved */
  30 
  31 
  32 #include <sys/param.h>
  33 #include <sys/types.h>
  34 #include <sys/systm.h>
  35 #include <sys/cred.h>
  36 #include <sys/buf.h>
  37 #include <sys/vfs.h>
  38 #include <sys/vnode.h>
  39 #include <sys/uio.h>
  40 #include <sys/errno.h>
  41 #include <sys/sysmacros.h>
  42 #include <sys/statvfs.h>
  43 #include <sys/kmem.h>
  44 #include <sys/dirent.h>
  45 #include <sys/cmn_err.h>
  46 #include <sys/debug.h>
  47 #include <sys/systeminfo.h>
  48 #include <sys/flock.h>
  49 #include <sys/nbmlock.h>
  50 #include <sys/policy.h>
  51 #include <sys/sdt.h>
  52 
  53 #include <rpc/types.h>
  54 #include <rpc/auth.h>
  55 #include <rpc/svc.h>
  56 #include <rpc/rpc_rdma.h>
  57 
  58 #include <nfs/nfs.h>
  59 #include <nfs/export.h>
  60 #include <nfs/nfs_cmd.h>
  61 
  62 #include <sys/strsubr.h>
  63 #include <sys/tsol/label.h>
  64 #include <sys/tsol/tndb.h>
  65 
  66 #include <sys/zone.h>
  67 
  68 #include <inet/ip.h>
  69 #include <inet/ip6.h>
  70 
  71 /*
  72  * Zone global variables of NFSv3 server
  73  */
  74 typedef struct nfs3_srv {
  75         writeverf3      write3verf;
  76 } nfs3_srv_t;
  77 
  78 /*
  79  * These are the interface routines for the server side of the
  80  * Network File System.  See the NFS version 3 protocol specification
  81  * for a description of this interface.
  82  */
  83 
  84 static int      sattr3_to_vattr(sattr3 *, struct vattr *);
  85 static int      vattr_to_fattr3(struct vattr *, fattr3 *);
  86 static int      vattr_to_wcc_attr(struct vattr *, wcc_attr *);
  87 static void     vattr_to_pre_op_attr(struct vattr *, pre_op_attr *);
  88 static void     vattr_to_wcc_data(struct vattr *, struct vattr *, wcc_data *);
  89 static int      rdma_setup_read_data3(READ3args *, READ3resok *);
  90 
  91 extern int nfs_loaned_buffers;
  92 
  93 u_longlong_t nfs3_srv_caller_id;
  94 
  95 static nfs3_srv_t *
  96 nfs3_get_srv(void)
  97 {
  98         nfs_globals_t *ng = zone_getspecific(nfssrv_zone_key, curzone);
  99         nfs3_srv_t *srv = ng->nfs3_srv;
 100         ASSERT(srv != NULL);
 101         return (srv);
 102 }
 103 
 104 /* ARGSUSED */
 105 void
 106 rfs3_getattr(GETATTR3args *args, GETATTR3res *resp, struct exportinfo *exi,
 107     struct svc_req *req, cred_t *cr, bool_t ro)
 108 {
 109         int error;
 110         vnode_t *vp;
 111         struct vattr va;
 112 
 113         vp = nfs3_fhtovp(&args->object, exi);
 114 
 115         DTRACE_NFSV3_5(op__getattr__start, struct svc_req *, req,
 116             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
 117             GETATTR3args *, args);
 118 
 119         if (vp == NULL) {
 120                 error = ESTALE;
 121                 goto out;
 122         }
 123 
 124         va.va_mask = AT_ALL;
 125         error = rfs4_delegated_getattr(vp, &va, 0, cr);
 126 
 127         if (!error) {
 128                 /* Lie about the object type for a referral */
 129                 if (vn_is_nfs_reparse(vp, cr))
 130                         va.va_type = VLNK;
 131 
 132                 /* overflow error if time or size is out of range */
 133                 error = vattr_to_fattr3(&va, &resp->resok.obj_attributes);
 134                 if (error)
 135                         goto out;
 136                 resp->status = NFS3_OK;
 137 
 138                 DTRACE_NFSV3_5(op__getattr__done, struct svc_req *, req,
 139                     cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
 140                     GETATTR3res *, resp);
 141 
 142                 VN_RELE(vp);
 143 
 144                 return;
 145         }
 146 
 147 out:
 148         if (curthread->t_flag & T_WOULDBLOCK) {
 149                 curthread->t_flag &= ~T_WOULDBLOCK;
 150                 resp->status = NFS3ERR_JUKEBOX;
 151         } else
 152                 resp->status = puterrno3(error);
 153 
 154         DTRACE_NFSV3_5(op__getattr__done, struct svc_req *, req,
 155             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
 156             GETATTR3res *, resp);
 157 
 158         if (vp != NULL)
 159                 VN_RELE(vp);
 160 }
 161 
 162 void *
 163 rfs3_getattr_getfh(GETATTR3args *args)
 164 {
 165 
 166         return (&args->object);
 167 }
 168 
 169 void
 170 rfs3_setattr(SETATTR3args *args, SETATTR3res *resp, struct exportinfo *exi,
 171     struct svc_req *req, cred_t *cr, bool_t ro)
 172 {
 173         int error;
 174         vnode_t *vp;
 175         struct vattr *bvap;
 176         struct vattr bva;
 177         struct vattr *avap;
 178         struct vattr ava;
 179         int flag;
 180         int in_crit = 0;
 181         struct flock64 bf;
 182         caller_context_t ct;
 183 
 184         bvap = NULL;
 185         avap = NULL;
 186 
 187         vp = nfs3_fhtovp(&args->object, exi);
 188 
 189         DTRACE_NFSV3_5(op__setattr__start, struct svc_req *, req,
 190             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
 191             SETATTR3args *, args);
 192 
 193         if (vp == NULL) {
 194                 error = ESTALE;
 195                 goto out;
 196         }
 197 
 198         error = sattr3_to_vattr(&args->new_attributes, &ava);
 199         if (error)
 200                 goto out;
 201 
 202         if (is_system_labeled()) {
 203                 bslabel_t *clabel = req->rq_label;
 204 
 205                 ASSERT(clabel != NULL);
 206                 DTRACE_PROBE2(tx__rfs3__log__info__opsetattr__clabel, char *,
 207                     "got client label from request(1)", struct svc_req *, req);
 208 
 209                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
 210                         if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
 211                             exi)) {
 212                                 resp->status = NFS3ERR_ACCES;
 213                                 goto out1;
 214                         }
 215                 }
 216         }
 217 
 218         /*
 219          * We need to specially handle size changes because of
 220          * possible conflicting NBMAND locks. Get into critical
 221          * region before VOP_GETATTR, so the size attribute is
 222          * valid when checking conflicts.
 223          *
 224          * Also, check to see if the v4 side of the server has
 225          * delegated this file.  If so, then we return JUKEBOX to
 226          * allow the client to retrasmit its request.
 227          */
 228         if (vp->v_type == VREG && (ava.va_mask & AT_SIZE)) {
 229                 if (nbl_need_check(vp)) {
 230                         nbl_start_crit(vp, RW_READER);
 231                         in_crit = 1;
 232                 }
 233         }
 234 
 235         bva.va_mask = AT_ALL;
 236         error = rfs4_delegated_getattr(vp, &bva, 0, cr);
 237 
 238         /*
 239          * If we can't get the attributes, then we can't do the
 240          * right access checking.  So, we'll fail the request.
 241          */
 242         if (error)
 243                 goto out;
 244 
 245         bvap = &bva;
 246 
 247         if (rdonly(ro, vp)) {
 248                 resp->status = NFS3ERR_ROFS;
 249                 goto out1;
 250         }
 251 
 252         if (args->guard.check &&
 253             (args->guard.obj_ctime.seconds != bva.va_ctime.tv_sec ||
 254             args->guard.obj_ctime.nseconds != bva.va_ctime.tv_nsec)) {
 255                 resp->status = NFS3ERR_NOT_SYNC;
 256                 goto out1;
 257         }
 258 
 259         if (args->new_attributes.mtime.set_it == SET_TO_CLIENT_TIME)
 260                 flag = ATTR_UTIME;
 261         else
 262                 flag = 0;
 263 
 264         /*
 265          * If the filesystem is exported with nosuid, then mask off
 266          * the setuid and setgid bits.
 267          */
 268         if ((ava.va_mask & AT_MODE) && vp->v_type == VREG &&
 269             (exi->exi_export.ex_flags & EX_NOSUID))
 270                 ava.va_mode &= ~(VSUID | VSGID);
 271 
 272         ct.cc_sysid = 0;
 273         ct.cc_pid = 0;
 274         ct.cc_caller_id = nfs3_srv_caller_id;
 275         ct.cc_flags = CC_DONTBLOCK;
 276 
 277         /*
 278          * We need to specially handle size changes because it is
 279          * possible for the client to create a file with modes
 280          * which indicate read-only, but with the file opened for
 281          * writing.  If the client then tries to set the size of
 282          * the file, then the normal access checking done in
 283          * VOP_SETATTR would prevent the client from doing so,
 284          * although it should be legal for it to do so.  To get
 285          * around this, we do the access checking for ourselves
 286          * and then use VOP_SPACE which doesn't do the access
 287          * checking which VOP_SETATTR does. VOP_SPACE can only
 288          * operate on VREG files, let VOP_SETATTR handle the other
 289          * extremely rare cases.
 290          * Also the client should not be allowed to change the
 291          * size of the file if there is a conflicting non-blocking
 292          * mandatory lock in the region the change.
 293          */
 294         if (vp->v_type == VREG && (ava.va_mask & AT_SIZE)) {
 295                 if (in_crit) {
 296                         u_offset_t offset;
 297                         ssize_t length;
 298 
 299                         if (ava.va_size < bva.va_size) {
 300                                 offset = ava.va_size;
 301                                 length = bva.va_size - ava.va_size;
 302                         } else {
 303                                 offset = bva.va_size;
 304                                 length = ava.va_size - bva.va_size;
 305                         }
 306                         if (nbl_conflict(vp, NBL_WRITE, offset, length, 0,
 307                             NULL)) {
 308                                 error = EACCES;
 309                                 goto out;
 310                         }
 311                 }
 312 
 313                 if (crgetuid(cr) == bva.va_uid && ava.va_size != bva.va_size) {
 314                         ava.va_mask &= ~AT_SIZE;
 315                         bf.l_type = F_WRLCK;
 316                         bf.l_whence = 0;
 317                         bf.l_start = (off64_t)ava.va_size;
 318                         bf.l_len = 0;
 319                         bf.l_sysid = 0;
 320                         bf.l_pid = 0;
 321                         error = VOP_SPACE(vp, F_FREESP, &bf, FWRITE,
 322                             (offset_t)ava.va_size, cr, &ct);
 323                 }
 324         }
 325 
 326         if (!error && ava.va_mask)
 327                 error = VOP_SETATTR(vp, &ava, flag, cr, &ct);
 328 
 329         /* check if a monitor detected a delegation conflict */
 330         if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
 331                 resp->status = NFS3ERR_JUKEBOX;
 332                 goto out1;
 333         }
 334 
 335         ava.va_mask = AT_ALL;
 336         avap = rfs4_delegated_getattr(vp, &ava, 0, cr) ? NULL : &ava;
 337 
 338         /*
 339          * Force modified metadata out to stable storage.
 340          */
 341         (void) VOP_FSYNC(vp, FNODSYNC, cr, &ct);
 342 
 343         if (error)
 344                 goto out;
 345 
 346         if (in_crit)
 347                 nbl_end_crit(vp);
 348 
 349         resp->status = NFS3_OK;
 350         vattr_to_wcc_data(bvap, avap, &resp->resok.obj_wcc);
 351 
 352         DTRACE_NFSV3_5(op__setattr__done, struct svc_req *, req,
 353             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
 354             SETATTR3res *, resp);
 355 
 356         VN_RELE(vp);
 357 
 358         return;
 359 
 360 out:
 361         if (curthread->t_flag & T_WOULDBLOCK) {
 362                 curthread->t_flag &= ~T_WOULDBLOCK;
 363                 resp->status = NFS3ERR_JUKEBOX;
 364         } else
 365                 resp->status = puterrno3(error);
 366 out1:
 367         DTRACE_NFSV3_5(op__setattr__done, struct svc_req *, req,
 368             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
 369             SETATTR3res *, resp);
 370 
 371         if (vp != NULL) {
 372                 if (in_crit)
 373                         nbl_end_crit(vp);
 374                 VN_RELE(vp);
 375         }
 376         vattr_to_wcc_data(bvap, avap, &resp->resfail.obj_wcc);
 377 }
 378 
 379 void *
 380 rfs3_setattr_getfh(SETATTR3args *args)
 381 {
 382 
 383         return (&args->object);
 384 }
 385 
 386 /* ARGSUSED */
 387 void
 388 rfs3_lookup(LOOKUP3args *args, LOOKUP3res *resp, struct exportinfo *exi,
 389     struct svc_req *req, cred_t *cr, bool_t ro)
 390 {
 391         int error;
 392         vnode_t *vp;
 393         vnode_t *dvp;
 394         struct vattr *vap;
 395         struct vattr va;
 396         struct vattr *dvap;
 397         struct vattr dva;
 398         nfs_fh3 *fhp;
 399         struct sec_ol sec = {0, 0};
 400         bool_t publicfh_flag = FALSE, auth_weak = FALSE;
 401         struct sockaddr *ca;
 402         char *name = NULL;
 403 
 404         dvap = NULL;
 405 
 406         if (exi != NULL)
 407                 exi_hold(exi);
 408 
 409         /*
 410          * Allow lookups from the root - the default
 411          * location of the public filehandle.
 412          */
 413         if (exi != NULL && (exi->exi_export.ex_flags & EX_PUBLIC)) {
 414                 dvp = ZONE_ROOTVP();
 415                 VN_HOLD(dvp);
 416 
 417                 DTRACE_NFSV3_5(op__lookup__start, struct svc_req *, req,
 418                     cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
 419                     LOOKUP3args *, args);
 420         } else {
 421                 dvp = nfs3_fhtovp(&args->what.dir, exi);
 422 
 423                 DTRACE_NFSV3_5(op__lookup__start, struct svc_req *, req,
 424                     cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
 425                     LOOKUP3args *, args);
 426 
 427                 if (dvp == NULL) {
 428                         error = ESTALE;
 429                         goto out;
 430                 }
 431         }
 432 
 433         dva.va_mask = AT_ALL;
 434         dvap = VOP_GETATTR(dvp, &dva, 0, cr, NULL) ? NULL : &dva;
 435 
 436         if (args->what.name == nfs3nametoolong) {
 437                 resp->status = NFS3ERR_NAMETOOLONG;
 438                 goto out1;
 439         }
 440 
 441         if (args->what.name == NULL || *(args->what.name) == '\0') {
 442                 resp->status = NFS3ERR_ACCES;
 443                 goto out1;
 444         }
 445 
 446         fhp = &args->what.dir;
 447         if (strcmp(args->what.name, "..") == 0 &&
 448             EQFID(&exi->exi_fid, FH3TOFIDP(fhp))) {
 449                 if ((exi->exi_export.ex_flags & EX_NOHIDE) &&
 450                     (dvp->v_flag & VROOT)) {
 451                         /*
 452                          * special case for ".." and 'nohide'exported root
 453                          */
 454                         if (rfs_climb_crossmnt(&dvp, &exi, cr) != 0) {
 455                                 resp->status = NFS3ERR_ACCES;
 456                                 goto out1;
 457                         }
 458                 } else {
 459                         resp->status = NFS3ERR_NOENT;
 460                         goto out1;
 461                 }
 462         }
 463 
 464         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
 465         name = nfscmd_convname(ca, exi, args->what.name,
 466             NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
 467 
 468         if (name == NULL) {
 469                 resp->status = NFS3ERR_ACCES;
 470                 goto out1;
 471         }
 472 
 473         /*
 474          * If the public filehandle is used then allow
 475          * a multi-component lookup
 476          */
 477         if (PUBLIC_FH3(&args->what.dir)) {
 478                 publicfh_flag = TRUE;
 479 
 480                 exi_rele(exi);
 481 
 482                 error = rfs_publicfh_mclookup(name, dvp, cr, &vp,
 483                     &exi, &sec);
 484 
 485                 /*
 486                  * Since WebNFS may bypass MOUNT, we need to ensure this
 487                  * request didn't come from an unlabeled admin_low client.
 488                  */
 489                 if (is_system_labeled() && error == 0) {
 490                         int             addr_type;
 491                         void            *ipaddr;
 492                         tsol_tpc_t      *tp;
 493 
 494                         if (ca->sa_family == AF_INET) {
 495                                 addr_type = IPV4_VERSION;
 496                                 ipaddr = &((struct sockaddr_in *)ca)->sin_addr;
 497                         } else if (ca->sa_family == AF_INET6) {
 498                                 addr_type = IPV6_VERSION;
 499                                 ipaddr = &((struct sockaddr_in6 *)
 500                                     ca)->sin6_addr;
 501                         }
 502                         tp = find_tpc(ipaddr, addr_type, B_FALSE);
 503                         if (tp == NULL || tp->tpc_tp.tp_doi !=
 504                             l_admin_low->tsl_doi || tp->tpc_tp.host_type !=
 505                             SUN_CIPSO) {
 506                                 VN_RELE(vp);
 507                                 error = EACCES;
 508                         }
 509                         if (tp != NULL)
 510                                 TPC_RELE(tp);
 511                 }
 512         } else {
 513                 error = VOP_LOOKUP(dvp, name, &vp,
 514                     NULL, 0, NULL, cr, NULL, NULL, NULL);
 515         }
 516 
 517         if (name != args->what.name)
 518                 kmem_free(name, MAXPATHLEN + 1);
 519 
 520         if (error == 0 && vn_ismntpt(vp)) {
 521                 error = rfs_cross_mnt(&vp, &exi);
 522                 if (error)
 523                         VN_RELE(vp);
 524         }
 525 
 526         if (is_system_labeled() && error == 0) {
 527                 bslabel_t *clabel = req->rq_label;
 528 
 529                 ASSERT(clabel != NULL);
 530                 DTRACE_PROBE2(tx__rfs3__log__info__oplookup__clabel, char *,
 531                     "got client label from request(1)", struct svc_req *, req);
 532 
 533                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
 534                         if (!do_rfs_label_check(clabel, dvp,
 535                             DOMINANCE_CHECK, exi)) {
 536                                 VN_RELE(vp);
 537                                 error = EACCES;
 538                         }
 539                 }
 540         }
 541 
 542         dva.va_mask = AT_ALL;
 543         dvap = VOP_GETATTR(dvp, &dva, 0, cr, NULL) ? NULL : &dva;
 544 
 545         if (error)
 546                 goto out;
 547 
 548         if (sec.sec_flags & SEC_QUERY) {
 549                 error = makefh3_ol(&resp->resok.object, exi, sec.sec_index);
 550         } else {
 551                 error = makefh3(&resp->resok.object, vp, exi);
 552                 if (!error && publicfh_flag && !chk_clnt_sec(exi, req))
 553                         auth_weak = TRUE;
 554         }
 555 
 556         if (error) {
 557                 VN_RELE(vp);
 558                 goto out;
 559         }
 560 
 561         va.va_mask = AT_ALL;
 562         vap = rfs4_delegated_getattr(vp, &va, 0, cr) ? NULL : &va;
 563 
 564         exi_rele(exi);
 565         VN_RELE(vp);
 566 
 567         resp->status = NFS3_OK;
 568         vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
 569         vattr_to_post_op_attr(dvap, &resp->resok.dir_attributes);
 570 
 571         /*
 572          * If it's public fh, no 0x81, and client's flavor is
 573          * invalid, set WebNFS status to WNFSERR_CLNT_FLAVOR now.
 574          * Then set RPC status to AUTH_TOOWEAK in common_dispatch.
 575          */
 576         if (auth_weak)
 577                 resp->status = (enum nfsstat3)WNFSERR_CLNT_FLAVOR;
 578 
 579         DTRACE_NFSV3_5(op__lookup__done, struct svc_req *, req,
 580             cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
 581             LOOKUP3res *, resp);
 582         VN_RELE(dvp);
 583 
 584         return;
 585 
 586 out:
 587         if (curthread->t_flag & T_WOULDBLOCK) {
 588                 curthread->t_flag &= ~T_WOULDBLOCK;
 589                 resp->status = NFS3ERR_JUKEBOX;
 590         } else
 591                 resp->status = puterrno3(error);
 592 out1:
 593         DTRACE_NFSV3_5(op__lookup__done, struct svc_req *, req,
 594             cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
 595             LOOKUP3res *, resp);
 596 
 597         if (exi != NULL)
 598                 exi_rele(exi);
 599 
 600         if (dvp != NULL)
 601                 VN_RELE(dvp);
 602         vattr_to_post_op_attr(dvap, &resp->resfail.dir_attributes);
 603 
 604 }
 605 
 606 void *
 607 rfs3_lookup_getfh(LOOKUP3args *args)
 608 {
 609 
 610         return (&args->what.dir);
 611 }
 612 
 613 /* ARGSUSED */
 614 void
 615 rfs3_access(ACCESS3args *args, ACCESS3res *resp, struct exportinfo *exi,
 616     struct svc_req *req, cred_t *cr, bool_t ro)
 617 {
 618         int error;
 619         vnode_t *vp;
 620         struct vattr *vap;
 621         struct vattr va;
 622         int checkwriteperm;
 623         boolean_t dominant_label = B_FALSE;
 624         boolean_t equal_label = B_FALSE;
 625         boolean_t admin_low_client;
 626 
 627         vap = NULL;
 628 
 629         vp = nfs3_fhtovp(&args->object, exi);
 630 
 631         DTRACE_NFSV3_5(op__access__start, struct svc_req *, req,
 632             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
 633             ACCESS3args *, args);
 634 
 635         if (vp == NULL) {
 636                 error = ESTALE;
 637                 goto out;
 638         }
 639 
 640         /*
 641          * If the file system is exported read only, it is not appropriate
 642          * to check write permissions for regular files and directories.
 643          * Special files are interpreted by the client, so the underlying
 644          * permissions are sent back to the client for interpretation.
 645          */
 646         if (rdonly(ro, vp) && (vp->v_type == VREG || vp->v_type == VDIR))
 647                 checkwriteperm = 0;
 648         else
 649                 checkwriteperm = 1;
 650 
 651         /*
 652          * We need the mode so that we can correctly determine access
 653          * permissions relative to a mandatory lock file.  Access to
 654          * mandatory lock files is denied on the server, so it might
 655          * as well be reflected to the server during the open.
 656          */
 657         va.va_mask = AT_MODE;
 658         error = VOP_GETATTR(vp, &va, 0, cr, NULL);
 659         if (error)
 660                 goto out;
 661 
 662         vap = &va;
 663 
 664         resp->resok.access = 0;
 665 
 666         if (is_system_labeled()) {
 667                 bslabel_t *clabel = req->rq_label;
 668 
 669                 ASSERT(clabel != NULL);
 670                 DTRACE_PROBE2(tx__rfs3__log__info__opaccess__clabel, char *,
 671                     "got client label from request(1)", struct svc_req *, req);
 672 
 673                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
 674                         if ((equal_label = do_rfs_label_check(clabel, vp,
 675                             EQUALITY_CHECK, exi)) == B_FALSE) {
 676                                 dominant_label = do_rfs_label_check(clabel,
 677                                     vp, DOMINANCE_CHECK, exi);
 678                         } else
 679                                 dominant_label = B_TRUE;
 680                         admin_low_client = B_FALSE;
 681                 } else
 682                         admin_low_client = B_TRUE;
 683         }
 684 
 685         if (args->access & ACCESS3_READ) {
 686                 error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
 687                 if (error) {
 688                         if (curthread->t_flag & T_WOULDBLOCK)
 689                                 goto out;
 690                 } else if (!MANDLOCK(vp, va.va_mode) &&
 691                     (!is_system_labeled() || admin_low_client ||
 692                     dominant_label))
 693                         resp->resok.access |= ACCESS3_READ;
 694         }
 695         if ((args->access & ACCESS3_LOOKUP) && vp->v_type == VDIR) {
 696                 error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL);
 697                 if (error) {
 698                         if (curthread->t_flag & T_WOULDBLOCK)
 699                                 goto out;
 700                 } else if (!is_system_labeled() || admin_low_client ||
 701                     dominant_label)
 702                         resp->resok.access |= ACCESS3_LOOKUP;
 703         }
 704         if (checkwriteperm &&
 705             (args->access & (ACCESS3_MODIFY|ACCESS3_EXTEND))) {
 706                 error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
 707                 if (error) {
 708                         if (curthread->t_flag & T_WOULDBLOCK)
 709                                 goto out;
 710                 } else if (!MANDLOCK(vp, va.va_mode) &&
 711                     (!is_system_labeled() || admin_low_client || equal_label)) {
 712                         resp->resok.access |=
 713                             (args->access & (ACCESS3_MODIFY|ACCESS3_EXTEND));
 714                 }
 715         }
 716         if (checkwriteperm &&
 717             (args->access & ACCESS3_DELETE) && vp->v_type == VDIR) {
 718                 error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
 719                 if (error) {
 720                         if (curthread->t_flag & T_WOULDBLOCK)
 721                                 goto out;
 722                 } else if (!is_system_labeled() || admin_low_client ||
 723                     equal_label)
 724                         resp->resok.access |= ACCESS3_DELETE;
 725         }
 726         if (args->access & ACCESS3_EXECUTE) {
 727                 error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL);
 728                 if (error) {
 729                         if (curthread->t_flag & T_WOULDBLOCK)
 730                                 goto out;
 731                 } else if (!MANDLOCK(vp, va.va_mode) &&
 732                     (!is_system_labeled() || admin_low_client ||
 733                     dominant_label))
 734                         resp->resok.access |= ACCESS3_EXECUTE;
 735         }
 736 
 737         va.va_mask = AT_ALL;
 738         vap = rfs4_delegated_getattr(vp, &va, 0, cr) ? NULL : &va;
 739 
 740         resp->status = NFS3_OK;
 741         vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
 742 
 743         DTRACE_NFSV3_5(op__access__done, struct svc_req *, req,
 744             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
 745             ACCESS3res *, resp);
 746 
 747         VN_RELE(vp);
 748 
 749         return;
 750 
 751 out:
 752         if (curthread->t_flag & T_WOULDBLOCK) {
 753                 curthread->t_flag &= ~T_WOULDBLOCK;
 754                 resp->status = NFS3ERR_JUKEBOX;
 755         } else
 756                 resp->status = puterrno3(error);
 757         DTRACE_NFSV3_5(op__access__done, struct svc_req *, req,
 758             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
 759             ACCESS3res *, resp);
 760         if (vp != NULL)
 761                 VN_RELE(vp);
 762         vattr_to_post_op_attr(vap, &resp->resfail.obj_attributes);
 763 }
 764 
 765 void *
 766 rfs3_access_getfh(ACCESS3args *args)
 767 {
 768 
 769         return (&args->object);
 770 }
 771 
 772 /* ARGSUSED */
 773 void
 774 rfs3_readlink(READLINK3args *args, READLINK3res *resp, struct exportinfo *exi,
 775     struct svc_req *req, cred_t *cr, bool_t ro)
 776 {
 777         int error;
 778         vnode_t *vp;
 779         struct vattr *vap;
 780         struct vattr va;
 781         struct iovec iov;
 782         struct uio uio;
 783         char *data;
 784         struct sockaddr *ca;
 785         char *name = NULL;
 786         int is_referral = 0;
 787 
 788         vap = NULL;
 789 
 790         vp = nfs3_fhtovp(&args->symlink, exi);
 791 
 792         DTRACE_NFSV3_5(op__readlink__start, struct svc_req *, req,
 793             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
 794             READLINK3args *, args);
 795 
 796         if (vp == NULL) {
 797                 error = ESTALE;
 798                 goto out;
 799         }
 800 
 801         va.va_mask = AT_ALL;
 802         error = VOP_GETATTR(vp, &va, 0, cr, NULL);
 803         if (error)
 804                 goto out;
 805 
 806         vap = &va;
 807 
 808         /* We lied about the object type for a referral */
 809         if (vn_is_nfs_reparse(vp, cr))
 810                 is_referral = 1;
 811 
 812         if (vp->v_type != VLNK && !is_referral) {
 813                 resp->status = NFS3ERR_INVAL;
 814                 goto out1;
 815         }
 816 
 817         if (MANDLOCK(vp, va.va_mode)) {
 818                 resp->status = NFS3ERR_ACCES;
 819                 goto out1;
 820         }
 821 
 822         if (is_system_labeled()) {
 823                 bslabel_t *clabel = req->rq_label;
 824 
 825                 ASSERT(clabel != NULL);
 826                 DTRACE_PROBE2(tx__rfs3__log__info__opreadlink__clabel, char *,
 827                     "got client label from request(1)", struct svc_req *, req);
 828 
 829                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
 830                         if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
 831                             exi)) {
 832                                 resp->status = NFS3ERR_ACCES;
 833                                 goto out1;
 834                         }
 835                 }
 836         }
 837 
 838         data = kmem_alloc(MAXPATHLEN + 1, KM_SLEEP);
 839 
 840         if (is_referral) {
 841                 char *s;
 842                 size_t strsz;
 843 
 844                 /* Get an artificial symlink based on a referral */
 845                 s = build_symlink(vp, cr, &strsz);
 846                 global_svstat_ptr[3][NFS_REFERLINKS].value.ui64++;
 847                 DTRACE_PROBE2(nfs3serv__func__referral__reflink,
 848                     vnode_t *, vp, char *, s);
 849                 if (s == NULL)
 850                         error = EINVAL;
 851                 else {
 852                         error = 0;
 853                         (void) strlcpy(data, s, MAXPATHLEN + 1);
 854                         kmem_free(s, strsz);
 855                 }
 856 
 857         } else {
 858 
 859                 iov.iov_base = data;
 860                 iov.iov_len = MAXPATHLEN;
 861                 uio.uio_iov = &iov;
 862                 uio.uio_iovcnt = 1;
 863                 uio.uio_segflg = UIO_SYSSPACE;
 864                 uio.uio_extflg = UIO_COPY_CACHED;
 865                 uio.uio_loffset = 0;
 866                 uio.uio_resid = MAXPATHLEN;
 867 
 868                 error = VOP_READLINK(vp, &uio, cr, NULL);
 869 
 870                 if (!error)
 871                         *(data + MAXPATHLEN - uio.uio_resid) = '\0';
 872         }
 873 
 874         va.va_mask = AT_ALL;
 875         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
 876 
 877         /* Lie about object type again just to be consistent */
 878         if (is_referral && vap != NULL)
 879                 vap->va_type = VLNK;
 880 
 881 #if 0 /* notyet */
 882         /*
 883          * Don't do this.  It causes local disk writes when just
 884          * reading the file and the overhead is deemed larger
 885          * than the benefit.
 886          */
 887         /*
 888          * Force modified metadata out to stable storage.
 889          */
 890         (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
 891 #endif
 892 
 893         if (error) {
 894                 kmem_free(data, MAXPATHLEN + 1);
 895                 goto out;
 896         }
 897 
 898         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
 899         name = nfscmd_convname(ca, exi, data, NFSCMD_CONV_OUTBOUND,
 900             MAXPATHLEN + 1);
 901 
 902         if (name == NULL) {
 903                 /*
 904                  * Even though the conversion failed, we return
 905                  * something. We just don't translate it.
 906                  */
 907                 name = data;
 908         }
 909 
 910         resp->status = NFS3_OK;
 911         vattr_to_post_op_attr(vap, &resp->resok.symlink_attributes);
 912         resp->resok.data = name;
 913 
 914         DTRACE_NFSV3_5(op__readlink__done, struct svc_req *, req,
 915             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
 916             READLINK3res *, resp);
 917         VN_RELE(vp);
 918 
 919         if (name != data)
 920                 kmem_free(data, MAXPATHLEN + 1);
 921 
 922         return;
 923 
 924 out:
 925         if (curthread->t_flag & T_WOULDBLOCK) {
 926                 curthread->t_flag &= ~T_WOULDBLOCK;
 927                 resp->status = NFS3ERR_JUKEBOX;
 928         } else
 929                 resp->status = puterrno3(error);
 930 out1:
 931         DTRACE_NFSV3_5(op__readlink__done, struct svc_req *, req,
 932             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
 933             READLINK3res *, resp);
 934         if (vp != NULL)
 935                 VN_RELE(vp);
 936         vattr_to_post_op_attr(vap, &resp->resfail.symlink_attributes);
 937 }
 938 
 939 void *
 940 rfs3_readlink_getfh(READLINK3args *args)
 941 {
 942 
 943         return (&args->symlink);
 944 }
 945 
 946 void
 947 rfs3_readlink_free(READLINK3res *resp)
 948 {
 949 
 950         if (resp->status == NFS3_OK)
 951                 kmem_free(resp->resok.data, MAXPATHLEN + 1);
 952 }
 953 
 954 /*
 955  * Server routine to handle read
 956  * May handle RDMA data as well as mblks
 957  */
 958 /* ARGSUSED */
 959 void
 960 rfs3_read(READ3args *args, READ3res *resp, struct exportinfo *exi,
 961     struct svc_req *req, cred_t *cr, bool_t ro)
 962 {
 963         int error;
 964         vnode_t *vp;
 965         struct vattr *vap;
 966         struct vattr va;
 967         struct iovec iov, *iovp = NULL;
 968         int iovcnt;
 969         struct uio uio;
 970         u_offset_t offset;
 971         mblk_t *mp = NULL;
 972         int in_crit = 0;
 973         int need_rwunlock = 0;
 974         caller_context_t ct;
 975         int rdma_used = 0;
 976         int loaned_buffers;
 977         struct uio *uiop;
 978 
 979         vap = NULL;
 980 
 981         vp = nfs3_fhtovp(&args->file, exi);
 982 
 983         DTRACE_NFSV3_5(op__read__start, struct svc_req *, req,
 984             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
 985             READ3args *, args);
 986 
 987 
 988         if (vp == NULL) {
 989                 error = ESTALE;
 990                 goto out;
 991         }
 992 
 993         if (args->wlist) {
 994                 if (args->count > clist_len(args->wlist)) {
 995                         error = EINVAL;
 996                         goto out;
 997                 }
 998                 rdma_used = 1;
 999         }
1000 
1001         /* use loaned buffers for TCP */
1002         loaned_buffers = (nfs_loaned_buffers && !rdma_used) ? 1 : 0;
1003 
1004         if (is_system_labeled()) {
1005                 bslabel_t *clabel = req->rq_label;
1006 
1007                 ASSERT(clabel != NULL);
1008                 DTRACE_PROBE2(tx__rfs3__log__info__opread__clabel, char *,
1009                     "got client label from request(1)", struct svc_req *, req);
1010 
1011                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
1012                         if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
1013                             exi)) {
1014                                 resp->status = NFS3ERR_ACCES;
1015                                 goto out1;
1016                         }
1017                 }
1018         }
1019 
1020         ct.cc_sysid = 0;
1021         ct.cc_pid = 0;
1022         ct.cc_caller_id = nfs3_srv_caller_id;
1023         ct.cc_flags = CC_DONTBLOCK;
1024 
1025         /*
1026          * Enter the critical region before calling VOP_RWLOCK
1027          * to avoid a deadlock with write requests.
1028          */
1029         if (nbl_need_check(vp)) {
1030                 nbl_start_crit(vp, RW_READER);
1031                 in_crit = 1;
1032                 if (nbl_conflict(vp, NBL_READ, args->offset, args->count, 0,
1033                     NULL)) {
1034                         error = EACCES;
1035                         goto out;
1036                 }
1037         }
1038 
1039         error = VOP_RWLOCK(vp, V_WRITELOCK_FALSE, &ct);
1040 
1041         /* check if a monitor detected a delegation conflict */
1042         if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1043                 resp->status = NFS3ERR_JUKEBOX;
1044                 goto out1;
1045         }
1046 
1047         need_rwunlock = 1;
1048 
1049         va.va_mask = AT_ALL;
1050         error = VOP_GETATTR(vp, &va, 0, cr, &ct);
1051 
1052         /*
1053          * If we can't get the attributes, then we can't do the
1054          * right access checking.  So, we'll fail the request.
1055          */
1056         if (error)
1057                 goto out;
1058 
1059         vap = &va;
1060 
1061         if (vp->v_type != VREG) {
1062                 resp->status = NFS3ERR_INVAL;
1063                 goto out1;
1064         }
1065 
1066         if (crgetuid(cr) != va.va_uid) {
1067                 error = VOP_ACCESS(vp, VREAD, 0, cr, &ct);
1068                 if (error) {
1069                         if (curthread->t_flag & T_WOULDBLOCK)
1070                                 goto out;
1071                         error = VOP_ACCESS(vp, VEXEC, 0, cr, &ct);
1072                         if (error)
1073                                 goto out;
1074                 }
1075         }
1076 
1077         if (MANDLOCK(vp, va.va_mode)) {
1078                 resp->status = NFS3ERR_ACCES;
1079                 goto out1;
1080         }
1081 
1082         offset = args->offset;
1083         if (offset >= va.va_size) {
1084                 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
1085                 if (in_crit)
1086                         nbl_end_crit(vp);
1087                 resp->status = NFS3_OK;
1088                 vattr_to_post_op_attr(vap, &resp->resok.file_attributes);
1089                 resp->resok.count = 0;
1090                 resp->resok.eof = TRUE;
1091                 resp->resok.data.data_len = 0;
1092                 resp->resok.data.data_val = NULL;
1093                 resp->resok.data.mp = NULL;
1094                 /* RDMA */
1095                 resp->resok.wlist = args->wlist;
1096                 resp->resok.wlist_len = resp->resok.count;
1097                 if (resp->resok.wlist)
1098                         clist_zero_len(resp->resok.wlist);
1099                 goto done;
1100         }
1101 
1102         if (args->count == 0) {
1103                 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
1104                 if (in_crit)
1105                         nbl_end_crit(vp);
1106                 resp->status = NFS3_OK;
1107                 vattr_to_post_op_attr(vap, &resp->resok.file_attributes);
1108                 resp->resok.count = 0;
1109                 resp->resok.eof = FALSE;
1110                 resp->resok.data.data_len = 0;
1111                 resp->resok.data.data_val = NULL;
1112                 resp->resok.data.mp = NULL;
1113                 /* RDMA */
1114                 resp->resok.wlist = args->wlist;
1115                 resp->resok.wlist_len = resp->resok.count;
1116                 if (resp->resok.wlist)
1117                         clist_zero_len(resp->resok.wlist);
1118                 goto done;
1119         }
1120 
1121         /*
1122          * do not allocate memory more the max. allowed
1123          * transfer size
1124          */
1125         if (args->count > rfs3_tsize(req))
1126                 args->count = rfs3_tsize(req);
1127 
1128         if (loaned_buffers) {
1129                 uiop = (uio_t *)rfs_setup_xuio(vp);
1130                 ASSERT(uiop != NULL);
1131                 uiop->uio_segflg = UIO_SYSSPACE;
1132                 uiop->uio_loffset = args->offset;
1133                 uiop->uio_resid = args->count;
1134 
1135                 /* Jump to do the read if successful */
1136                 if (VOP_REQZCBUF(vp, UIO_READ, (xuio_t *)uiop, cr, &ct) == 0) {
1137                         /*
1138                          * Need to hold the vnode until after VOP_RETZCBUF()
1139                          * is called.
1140                          */
1141                         VN_HOLD(vp);
1142                         goto doio_read;
1143                 }
1144 
1145                 DTRACE_PROBE2(nfss__i__reqzcbuf_failed, int,
1146                     uiop->uio_loffset, int, uiop->uio_resid);
1147 
1148                 uiop->uio_extflg = 0;
1149                 /* failure to setup for zero copy */
1150                 rfs_free_xuio((void *)uiop);
1151                 loaned_buffers = 0;
1152         }
1153 
1154         /*
1155          * If returning data via RDMA Write, then grab the chunk list.
1156          * If we aren't returning READ data w/RDMA_WRITE, then grab
1157          * a mblk.
1158          */
1159         if (rdma_used) {
1160                 (void) rdma_get_wchunk(req, &iov, args->wlist);
1161                 uio.uio_iov = &iov;
1162                 uio.uio_iovcnt = 1;
1163         } else {
1164                 /*
1165                  * mp will contain the data to be sent out in the read reply.
1166                  * For UDP, this will be freed after the reply has been sent
1167                  * out by the driver.  For TCP, it will be freed after the last
1168                  * segment associated with the reply has been ACKed by the
1169                  * client.
1170                  */
1171                 mp = rfs_read_alloc(args->count, &iovp, &iovcnt);
1172                 uio.uio_iov = iovp;
1173                 uio.uio_iovcnt = iovcnt;
1174         }
1175 
1176         uio.uio_segflg = UIO_SYSSPACE;
1177         uio.uio_extflg = UIO_COPY_CACHED;
1178         uio.uio_loffset = args->offset;
1179         uio.uio_resid = args->count;
1180         uiop = &uio;
1181 
1182 doio_read:
1183         error = VOP_READ(vp, uiop, 0, cr, &ct);
1184 
1185         if (error) {
1186                 if (mp)
1187                         freemsg(mp);
1188                 /* check if a monitor detected a delegation conflict */
1189                 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1190                         resp->status = NFS3ERR_JUKEBOX;
1191                         goto out1;
1192                 }
1193                 goto out;
1194         }
1195 
1196         /* make mblk using zc buffers */
1197         if (loaned_buffers) {
1198                 mp = uio_to_mblk(uiop);
1199                 ASSERT(mp != NULL);
1200         }
1201 
1202         va.va_mask = AT_ALL;
1203         error = VOP_GETATTR(vp, &va, 0, cr, &ct);
1204 
1205         if (error)
1206                 vap = NULL;
1207         else
1208                 vap = &va;
1209 
1210         VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
1211 
1212         if (in_crit)
1213                 nbl_end_crit(vp);
1214 
1215         resp->status = NFS3_OK;
1216         vattr_to_post_op_attr(vap, &resp->resok.file_attributes);
1217         resp->resok.count = args->count - uiop->uio_resid;
1218         if (!error && offset + resp->resok.count == va.va_size)
1219                 resp->resok.eof = TRUE;
1220         else
1221                 resp->resok.eof = FALSE;
1222         resp->resok.data.data_len = resp->resok.count;
1223 
1224         if (mp)
1225                 rfs_rndup_mblks(mp, resp->resok.count, loaned_buffers);
1226 
1227         resp->resok.data.mp = mp;
1228         resp->resok.size = (uint_t)args->count;
1229 
1230         if (rdma_used) {
1231                 resp->resok.data.data_val = (caddr_t)iov.iov_base;
1232                 if (!rdma_setup_read_data3(args, &(resp->resok))) {
1233                         resp->status = NFS3ERR_INVAL;
1234                 }
1235         } else {
1236                 resp->resok.data.data_val = (caddr_t)mp->b_datap->db_base;
1237                 (resp->resok).wlist = NULL;
1238         }
1239 
1240 done:
1241         DTRACE_NFSV3_5(op__read__done, struct svc_req *, req,
1242             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
1243             READ3res *, resp);
1244 
1245         VN_RELE(vp);
1246 
1247         if (iovp != NULL)
1248                 kmem_free(iovp, iovcnt * sizeof (struct iovec));
1249 
1250         return;
1251 
1252 out:
1253         if (curthread->t_flag & T_WOULDBLOCK) {
1254                 curthread->t_flag &= ~T_WOULDBLOCK;
1255                 resp->status = NFS3ERR_JUKEBOX;
1256         } else
1257                 resp->status = puterrno3(error);
1258 out1:
1259         DTRACE_NFSV3_5(op__read__done, struct svc_req *, req,
1260             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
1261             READ3res *, resp);
1262 
1263         if (vp != NULL) {
1264                 if (need_rwunlock)
1265                         VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
1266                 if (in_crit)
1267                         nbl_end_crit(vp);
1268                 VN_RELE(vp);
1269         }
1270         vattr_to_post_op_attr(vap, &resp->resfail.file_attributes);
1271 
1272         if (iovp != NULL)
1273                 kmem_free(iovp, iovcnt * sizeof (struct iovec));
1274 }
1275 
1276 void
1277 rfs3_read_free(READ3res *resp)
1278 {
1279         mblk_t *mp;
1280 
1281         if (resp->status == NFS3_OK) {
1282                 mp = resp->resok.data.mp;
1283                 if (mp != NULL)
1284                         freemsg(mp);
1285         }
1286 }
1287 
1288 void *
1289 rfs3_read_getfh(READ3args *args)
1290 {
1291 
1292         return (&args->file);
1293 }
1294 
1295 #define MAX_IOVECS      12
1296 
1297 #ifdef DEBUG
1298 static int rfs3_write_hits = 0;
1299 static int rfs3_write_misses = 0;
1300 #endif
1301 
1302 void
1303 rfs3_write(WRITE3args *args, WRITE3res *resp, struct exportinfo *exi,
1304     struct svc_req *req, cred_t *cr, bool_t ro)
1305 {
1306         nfs3_srv_t *ns;
1307         int error;
1308         vnode_t *vp;
1309         struct vattr *bvap = NULL;
1310         struct vattr bva;
1311         struct vattr *avap = NULL;
1312         struct vattr ava;
1313         u_offset_t rlimit;
1314         struct uio uio;
1315         struct iovec iov[MAX_IOVECS];
1316         mblk_t *m;
1317         struct iovec *iovp;
1318         int iovcnt;
1319         int ioflag;
1320         cred_t *savecred;
1321         int in_crit = 0;
1322         int rwlock_ret = -1;
1323         caller_context_t ct;
1324 
1325         vp = nfs3_fhtovp(&args->file, exi);
1326 
1327         DTRACE_NFSV3_5(op__write__start, struct svc_req *, req,
1328             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
1329             WRITE3args *, args);
1330 
1331         if (vp == NULL) {
1332                 error = ESTALE;
1333                 goto err;
1334         }
1335 
1336         ns = nfs3_get_srv();
1337         if (is_system_labeled()) {
1338                 bslabel_t *clabel = req->rq_label;
1339 
1340                 ASSERT(clabel != NULL);
1341                 DTRACE_PROBE2(tx__rfs3__log__info__opwrite__clabel, char *,
1342                     "got client label from request(1)", struct svc_req *, req);
1343 
1344                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
1345                         if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
1346                             exi)) {
1347                                 resp->status = NFS3ERR_ACCES;
1348                                 goto err1;
1349                         }
1350                 }
1351         }
1352 
1353         ct.cc_sysid = 0;
1354         ct.cc_pid = 0;
1355         ct.cc_caller_id = nfs3_srv_caller_id;
1356         ct.cc_flags = CC_DONTBLOCK;
1357 
1358         /*
1359          * We have to enter the critical region before calling VOP_RWLOCK
1360          * to avoid a deadlock with ufs.
1361          */
1362         if (nbl_need_check(vp)) {
1363                 nbl_start_crit(vp, RW_READER);
1364                 in_crit = 1;
1365                 if (nbl_conflict(vp, NBL_WRITE, args->offset, args->count, 0,
1366                     NULL)) {
1367                         error = EACCES;
1368                         goto err;
1369                 }
1370         }
1371 
1372         rwlock_ret = VOP_RWLOCK(vp, V_WRITELOCK_TRUE, &ct);
1373 
1374         /* check if a monitor detected a delegation conflict */
1375         if (rwlock_ret == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1376                 resp->status = NFS3ERR_JUKEBOX;
1377                 rwlock_ret = -1;
1378                 goto err1;
1379         }
1380 
1381 
1382         bva.va_mask = AT_ALL;
1383         error = VOP_GETATTR(vp, &bva, 0, cr, &ct);
1384 
1385         /*
1386          * If we can't get the attributes, then we can't do the
1387          * right access checking.  So, we'll fail the request.
1388          */
1389         if (error)
1390                 goto err;
1391 
1392         bvap = &bva;
1393         avap = bvap;
1394 
1395         if (args->count != args->data.data_len) {
1396                 resp->status = NFS3ERR_INVAL;
1397                 goto err1;
1398         }
1399 
1400         if (rdonly(ro, vp)) {
1401                 resp->status = NFS3ERR_ROFS;
1402                 goto err1;
1403         }
1404 
1405         if (vp->v_type != VREG) {
1406                 resp->status = NFS3ERR_INVAL;
1407                 goto err1;
1408         }
1409 
1410         if (crgetuid(cr) != bva.va_uid &&
1411             (error = VOP_ACCESS(vp, VWRITE, 0, cr, &ct)))
1412                 goto err;
1413 
1414         if (MANDLOCK(vp, bva.va_mode)) {
1415                 resp->status = NFS3ERR_ACCES;
1416                 goto err1;
1417         }
1418 
1419         if (args->count == 0) {
1420                 resp->status = NFS3_OK;
1421                 vattr_to_wcc_data(bvap, avap, &resp->resok.file_wcc);
1422                 resp->resok.count = 0;
1423                 resp->resok.committed = args->stable;
1424                 resp->resok.verf = ns->write3verf;
1425                 goto out;
1426         }
1427 
1428         if (args->mblk != NULL) {
1429                 iovcnt = 0;
1430                 for (m = args->mblk; m != NULL; m = m->b_cont)
1431                         iovcnt++;
1432                 if (iovcnt <= MAX_IOVECS) {
1433 #ifdef DEBUG
1434                         rfs3_write_hits++;
1435 #endif
1436                         iovp = iov;
1437                 } else {
1438 #ifdef DEBUG
1439                         rfs3_write_misses++;
1440 #endif
1441                         iovp = kmem_alloc(sizeof (*iovp) * iovcnt, KM_SLEEP);
1442                 }
1443                 mblk_to_iov(args->mblk, iovcnt, iovp);
1444 
1445         } else if (args->rlist != NULL) {
1446                 iovcnt = 1;
1447                 iovp = iov;
1448                 iovp->iov_base = (char *)((args->rlist)->u.c_daddr3);
1449                 iovp->iov_len = args->count;
1450         } else {
1451                 iovcnt = 1;
1452                 iovp = iov;
1453                 iovp->iov_base = args->data.data_val;
1454                 iovp->iov_len = args->count;
1455         }
1456 
1457         uio.uio_iov = iovp;
1458         uio.uio_iovcnt = iovcnt;
1459 
1460         uio.uio_segflg = UIO_SYSSPACE;
1461         uio.uio_extflg = UIO_COPY_DEFAULT;
1462         uio.uio_loffset = args->offset;
1463         uio.uio_resid = args->count;
1464         uio.uio_llimit = curproc->p_fsz_ctl;
1465         rlimit = uio.uio_llimit - args->offset;
1466         if (rlimit < (u_offset_t)uio.uio_resid)
1467                 uio.uio_resid = (int)rlimit;
1468 
1469         if (args->stable == UNSTABLE)
1470                 ioflag = 0;
1471         else if (args->stable == FILE_SYNC)
1472                 ioflag = FSYNC;
1473         else if (args->stable == DATA_SYNC)
1474                 ioflag = FDSYNC;
1475         else {
1476                 if (iovp != iov)
1477                         kmem_free(iovp, sizeof (*iovp) * iovcnt);
1478                 resp->status = NFS3ERR_INVAL;
1479                 goto err1;
1480         }
1481 
1482         /*
1483          * We're changing creds because VM may fault and we need
1484          * the cred of the current thread to be used if quota
1485          * checking is enabled.
1486          */
1487         savecred = curthread->t_cred;
1488         curthread->t_cred = cr;
1489         error = VOP_WRITE(vp, &uio, ioflag, cr, &ct);
1490         curthread->t_cred = savecred;
1491 
1492         if (iovp != iov)
1493                 kmem_free(iovp, sizeof (*iovp) * iovcnt);
1494 
1495         /* check if a monitor detected a delegation conflict */
1496         if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1497                 resp->status = NFS3ERR_JUKEBOX;
1498                 goto err1;
1499         }
1500 
1501         ava.va_mask = AT_ALL;
1502         avap = VOP_GETATTR(vp, &ava, 0, cr, &ct) ? NULL : &ava;
1503 
1504         if (error)
1505                 goto err;
1506 
1507         /*
1508          * If we were unable to get the V_WRITELOCK_TRUE, then we
1509          * may not have accurate after attrs, so check if
1510          * we have both attributes, they have a non-zero va_seq, and
1511          * va_seq has changed by exactly one,
1512          * if not, turn off the before attr.
1513          */
1514         if (rwlock_ret != V_WRITELOCK_TRUE) {
1515                 if (bvap == NULL || avap == NULL ||
1516                     bvap->va_seq == 0 || avap->va_seq == 0 ||
1517                     avap->va_seq != (bvap->va_seq + 1)) {
1518                         bvap = NULL;
1519                 }
1520         }
1521 
1522         resp->status = NFS3_OK;
1523         vattr_to_wcc_data(bvap, avap, &resp->resok.file_wcc);
1524         resp->resok.count = args->count - uio.uio_resid;
1525         resp->resok.committed = args->stable;
1526         resp->resok.verf = ns->write3verf;
1527         goto out;
1528 
1529 err:
1530         if (curthread->t_flag & T_WOULDBLOCK) {
1531                 curthread->t_flag &= ~T_WOULDBLOCK;
1532                 resp->status = NFS3ERR_JUKEBOX;
1533         } else
1534                 resp->status = puterrno3(error);
1535 err1:
1536         vattr_to_wcc_data(bvap, avap, &resp->resfail.file_wcc);
1537 out:
1538         DTRACE_NFSV3_5(op__write__done, struct svc_req *, req,
1539             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
1540             WRITE3res *, resp);
1541 
1542         if (vp != NULL) {
1543                 if (rwlock_ret != -1)
1544                         VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, &ct);
1545                 if (in_crit)
1546                         nbl_end_crit(vp);
1547                 VN_RELE(vp);
1548         }
1549 }
1550 
1551 void *
1552 rfs3_write_getfh(WRITE3args *args)
1553 {
1554 
1555         return (&args->file);
1556 }
1557 
1558 void
1559 rfs3_create(CREATE3args *args, CREATE3res *resp, struct exportinfo *exi,
1560     struct svc_req *req, cred_t *cr, bool_t ro)
1561 {
1562         int error;
1563         int in_crit = 0;
1564         vnode_t *vp;
1565         vnode_t *tvp = NULL;
1566         vnode_t *dvp;
1567         struct vattr *vap;
1568         struct vattr va;
1569         struct vattr *dbvap;
1570         struct vattr dbva;
1571         struct vattr *davap;
1572         struct vattr dava;
1573         enum vcexcl excl;
1574         nfstime3 *mtime;
1575         len_t reqsize;
1576         bool_t trunc;
1577         struct sockaddr *ca;
1578         char *name = NULL;
1579 
1580         dbvap = NULL;
1581         davap = NULL;
1582 
1583         dvp = nfs3_fhtovp(&args->where.dir, exi);
1584 
1585         DTRACE_NFSV3_5(op__create__start, struct svc_req *, req,
1586             cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
1587             CREATE3args *, args);
1588 
1589         if (dvp == NULL) {
1590                 error = ESTALE;
1591                 goto out;
1592         }
1593 
1594         dbva.va_mask = AT_ALL;
1595         dbvap = VOP_GETATTR(dvp, &dbva, 0, cr, NULL) ? NULL : &dbva;
1596         davap = dbvap;
1597 
1598         if (args->where.name == nfs3nametoolong) {
1599                 resp->status = NFS3ERR_NAMETOOLONG;
1600                 goto out1;
1601         }
1602 
1603         if (args->where.name == NULL || *(args->where.name) == '\0') {
1604                 resp->status = NFS3ERR_ACCES;
1605                 goto out1;
1606         }
1607 
1608         if (rdonly(ro, dvp)) {
1609                 resp->status = NFS3ERR_ROFS;
1610                 goto out1;
1611         }
1612 
1613         if (is_system_labeled()) {
1614                 bslabel_t *clabel = req->rq_label;
1615 
1616                 ASSERT(clabel != NULL);
1617                 DTRACE_PROBE2(tx__rfs3__log__info__opcreate__clabel, char *,
1618                     "got client label from request(1)", struct svc_req *, req);
1619 
1620                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
1621                         if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
1622                             exi)) {
1623                                 resp->status = NFS3ERR_ACCES;
1624                                 goto out1;
1625                         }
1626                 }
1627         }
1628 
1629         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
1630         name = nfscmd_convname(ca, exi, args->where.name,
1631             NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
1632 
1633         if (name == NULL) {
1634                 /* This is really a Solaris EILSEQ */
1635                 resp->status = NFS3ERR_INVAL;
1636                 goto out1;
1637         }
1638 
1639         if (args->how.mode == EXCLUSIVE) {
1640                 va.va_mask = AT_TYPE | AT_MODE | AT_MTIME;
1641                 va.va_type = VREG;
1642                 va.va_mode = (mode_t)0;
1643                 /*
1644                  * Ensure no time overflows and that types match
1645                  */
1646                 mtime = (nfstime3 *)&args->how.createhow3_u.verf;
1647                 va.va_mtime.tv_sec = mtime->seconds % INT32_MAX;
1648                 va.va_mtime.tv_nsec = mtime->nseconds;
1649                 excl = EXCL;
1650         } else {
1651                 error = sattr3_to_vattr(&args->how.createhow3_u.obj_attributes,
1652                     &va);
1653                 if (error)
1654                         goto out;
1655                 va.va_mask |= AT_TYPE;
1656                 va.va_type = VREG;
1657                 if (args->how.mode == GUARDED)
1658                         excl = EXCL;
1659                 else {
1660                         excl = NONEXCL;
1661 
1662                         /*
1663                          * During creation of file in non-exclusive mode
1664                          * if size of file is being set then make sure
1665                          * that if the file already exists that no conflicting
1666                          * non-blocking mandatory locks exists in the region
1667                          * being modified. If there are conflicting locks fail
1668                          * the operation with EACCES.
1669                          */
1670                         if (va.va_mask & AT_SIZE) {
1671                                 struct vattr tva;
1672 
1673                                 /*
1674                                  * Does file already exist?
1675                                  */
1676                                 error = VOP_LOOKUP(dvp, name, &tvp,
1677                                     NULL, 0, NULL, cr, NULL, NULL, NULL);
1678 
1679                                 /*
1680                                  * Check to see if the file has been delegated
1681                                  * to a v4 client.  If so, then begin recall of
1682                                  * the delegation and return JUKEBOX to allow
1683                                  * the client to retrasmit its request.
1684                                  */
1685 
1686                                 trunc = va.va_size == 0;
1687                                 if (!error &&
1688                                     rfs4_check_delegated(FWRITE, tvp, trunc)) {
1689                                         resp->status = NFS3ERR_JUKEBOX;
1690                                         goto out1;
1691                                 }
1692 
1693                                 /*
1694                                  * Check for NBMAND lock conflicts
1695                                  */
1696                                 if (!error && nbl_need_check(tvp)) {
1697                                         u_offset_t offset;
1698                                         ssize_t len;
1699 
1700                                         nbl_start_crit(tvp, RW_READER);
1701                                         in_crit = 1;
1702 
1703                                         tva.va_mask = AT_SIZE;
1704                                         error = VOP_GETATTR(tvp, &tva, 0, cr,
1705                                             NULL);
1706                                         /*
1707                                          * Can't check for conflicts, so return
1708                                          * error.
1709                                          */
1710                                         if (error)
1711                                                 goto out;
1712 
1713                                         offset = tva.va_size < va.va_size ?
1714                                             tva.va_size : va.va_size;
1715                                         len = tva.va_size < va.va_size ?
1716                                             va.va_size - tva.va_size :
1717                                             tva.va_size - va.va_size;
1718                                         if (nbl_conflict(tvp, NBL_WRITE,
1719                                             offset, len, 0, NULL)) {
1720                                                 error = EACCES;
1721                                                 goto out;
1722                                         }
1723                                 } else if (tvp) {
1724                                         VN_RELE(tvp);
1725                                         tvp = NULL;
1726                                 }
1727                         }
1728                 }
1729                 if (va.va_mask & AT_SIZE)
1730                         reqsize = va.va_size;
1731         }
1732 
1733         /*
1734          * Must specify the mode.
1735          */
1736         if (!(va.va_mask & AT_MODE)) {
1737                 resp->status = NFS3ERR_INVAL;
1738                 goto out1;
1739         }
1740 
1741         /*
1742          * If the filesystem is exported with nosuid, then mask off
1743          * the setuid and setgid bits.
1744          */
1745         if (va.va_type == VREG && (exi->exi_export.ex_flags & EX_NOSUID))
1746                 va.va_mode &= ~(VSUID | VSGID);
1747 
1748 tryagain:
1749         /*
1750          * The file open mode used is VWRITE.  If the client needs
1751          * some other semantic, then it should do the access checking
1752          * itself.  It would have been nice to have the file open mode
1753          * passed as part of the arguments.
1754          */
1755         error = VOP_CREATE(dvp, name, &va, excl, VWRITE,
1756             &vp, cr, 0, NULL, NULL);
1757 
1758         dava.va_mask = AT_ALL;
1759         davap = VOP_GETATTR(dvp, &dava, 0, cr, NULL) ? NULL : &dava;
1760 
1761         if (error) {
1762                 /*
1763                  * If we got something other than file already exists
1764                  * then just return this error.  Otherwise, we got
1765                  * EEXIST.  If we were doing a GUARDED create, then
1766                  * just return this error.  Otherwise, we need to
1767                  * make sure that this wasn't a duplicate of an
1768                  * exclusive create request.
1769                  *
1770                  * The assumption is made that a non-exclusive create
1771                  * request will never return EEXIST.
1772                  */
1773                 if (error != EEXIST || args->how.mode == GUARDED)
1774                         goto out;
1775                 /*
1776                  * Lookup the file so that we can get a vnode for it.
1777                  */
1778                 error = VOP_LOOKUP(dvp, name, &vp, NULL, 0,
1779                     NULL, cr, NULL, NULL, NULL);
1780                 if (error) {
1781                         /*
1782                          * We couldn't find the file that we thought that
1783                          * we just created.  So, we'll just try creating
1784                          * it again.
1785                          */
1786                         if (error == ENOENT)
1787                                 goto tryagain;
1788                         goto out;
1789                 }
1790 
1791                 /*
1792                  * If the file is delegated to a v4 client, go ahead
1793                  * and initiate recall, this create is a hint that a
1794                  * conflicting v3 open has occurred.
1795                  */
1796 
1797                 if (rfs4_check_delegated(FWRITE, vp, FALSE)) {
1798                         VN_RELE(vp);
1799                         resp->status = NFS3ERR_JUKEBOX;
1800                         goto out1;
1801                 }
1802 
1803                 va.va_mask = AT_ALL;
1804                 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
1805 
1806                 mtime = (nfstime3 *)&args->how.createhow3_u.verf;
1807                 /* % with INT32_MAX to prevent overflows */
1808                 if (args->how.mode == EXCLUSIVE && (vap == NULL ||
1809                     vap->va_mtime.tv_sec !=
1810                     (mtime->seconds % INT32_MAX) ||
1811                     vap->va_mtime.tv_nsec != mtime->nseconds)) {
1812                         VN_RELE(vp);
1813                         error = EEXIST;
1814                         goto out;
1815                 }
1816         } else {
1817 
1818                 if ((args->how.mode == UNCHECKED ||
1819                     args->how.mode == GUARDED) &&
1820                     args->how.createhow3_u.obj_attributes.size.set_it &&
1821                     va.va_size == 0)
1822                         trunc = TRUE;
1823                 else
1824                         trunc = FALSE;
1825 
1826                 if (rfs4_check_delegated(FWRITE, vp, trunc)) {
1827                         VN_RELE(vp);
1828                         resp->status = NFS3ERR_JUKEBOX;
1829                         goto out1;
1830                 }
1831 
1832                 va.va_mask = AT_ALL;
1833                 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
1834 
1835                 /*
1836                  * We need to check to make sure that the file got
1837                  * created to the indicated size.  If not, we do a
1838                  * setattr to try to change the size, but we don't
1839                  * try too hard.  This shouldn't a problem as most
1840                  * clients will only specifiy a size of zero which
1841                  * local file systems handle.  However, even if
1842                  * the client does specify a non-zero size, it can
1843                  * still recover by checking the size of the file
1844                  * after it has created it and then issue a setattr
1845                  * request of its own to set the size of the file.
1846                  */
1847                 if (vap != NULL &&
1848                     (args->how.mode == UNCHECKED ||
1849                     args->how.mode == GUARDED) &&
1850                     args->how.createhow3_u.obj_attributes.size.set_it &&
1851                     vap->va_size != reqsize) {
1852                         va.va_mask = AT_SIZE;
1853                         va.va_size = reqsize;
1854                         (void) VOP_SETATTR(vp, &va, 0, cr, NULL);
1855                         va.va_mask = AT_ALL;
1856                         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
1857                 }
1858         }
1859 
1860         if (name != args->where.name)
1861                 kmem_free(name, MAXPATHLEN + 1);
1862 
1863         error = makefh3(&resp->resok.obj.handle, vp, exi);
1864         if (error)
1865                 resp->resok.obj.handle_follows = FALSE;
1866         else
1867                 resp->resok.obj.handle_follows = TRUE;
1868 
1869         /*
1870          * Force modified data and metadata out to stable storage.
1871          */
1872         (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
1873         (void) VOP_FSYNC(dvp, 0, cr, NULL);
1874 
1875         VN_RELE(vp);
1876         if (tvp != NULL) {
1877                 if (in_crit)
1878                         nbl_end_crit(tvp);
1879                 VN_RELE(tvp);
1880         }
1881 
1882         resp->status = NFS3_OK;
1883         vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
1884         vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
1885 
1886         DTRACE_NFSV3_5(op__create__done, struct svc_req *, req,
1887             cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
1888             CREATE3res *, resp);
1889 
1890         VN_RELE(dvp);
1891         return;
1892 
1893 out:
1894         if (curthread->t_flag & T_WOULDBLOCK) {
1895                 curthread->t_flag &= ~T_WOULDBLOCK;
1896                 resp->status = NFS3ERR_JUKEBOX;
1897         } else
1898                 resp->status = puterrno3(error);
1899 out1:
1900         DTRACE_NFSV3_5(op__create__done, struct svc_req *, req,
1901             cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
1902             CREATE3res *, resp);
1903 
1904         if (name != NULL && name != args->where.name)
1905                 kmem_free(name, MAXPATHLEN + 1);
1906 
1907         if (tvp != NULL) {
1908                 if (in_crit)
1909                         nbl_end_crit(tvp);
1910                 VN_RELE(tvp);
1911         }
1912         if (dvp != NULL)
1913                 VN_RELE(dvp);
1914         vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc);
1915 }
1916 
1917 void *
1918 rfs3_create_getfh(CREATE3args *args)
1919 {
1920 
1921         return (&args->where.dir);
1922 }
1923 
1924 void
1925 rfs3_mkdir(MKDIR3args *args, MKDIR3res *resp, struct exportinfo *exi,
1926     struct svc_req *req, cred_t *cr, bool_t ro)
1927 {
1928         int error;
1929         vnode_t *vp = NULL;
1930         vnode_t *dvp;
1931         struct vattr *vap;
1932         struct vattr va;
1933         struct vattr *dbvap;
1934         struct vattr dbva;
1935         struct vattr *davap;
1936         struct vattr dava;
1937         struct sockaddr *ca;
1938         char *name = NULL;
1939 
1940         dbvap = NULL;
1941         davap = NULL;
1942 
1943         dvp = nfs3_fhtovp(&args->where.dir, exi);
1944 
1945         DTRACE_NFSV3_5(op__mkdir__start, struct svc_req *, req,
1946             cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
1947             MKDIR3args *, args);
1948 
1949         if (dvp == NULL) {
1950                 error = ESTALE;
1951                 goto out;
1952         }
1953 
1954         dbva.va_mask = AT_ALL;
1955         dbvap = VOP_GETATTR(dvp, &dbva, 0, cr, NULL) ? NULL : &dbva;
1956         davap = dbvap;
1957 
1958         if (args->where.name == nfs3nametoolong) {
1959                 resp->status = NFS3ERR_NAMETOOLONG;
1960                 goto out1;
1961         }
1962 
1963         if (args->where.name == NULL || *(args->where.name) == '\0') {
1964                 resp->status = NFS3ERR_ACCES;
1965                 goto out1;
1966         }
1967 
1968         if (rdonly(ro, dvp)) {
1969                 resp->status = NFS3ERR_ROFS;
1970                 goto out1;
1971         }
1972 
1973         if (is_system_labeled()) {
1974                 bslabel_t *clabel = req->rq_label;
1975 
1976                 ASSERT(clabel != NULL);
1977                 DTRACE_PROBE2(tx__rfs3__log__info__opmkdir__clabel, char *,
1978                     "got client label from request(1)", struct svc_req *, req);
1979 
1980                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
1981                         if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
1982                             exi)) {
1983                                 resp->status = NFS3ERR_ACCES;
1984                                 goto out1;
1985                         }
1986                 }
1987         }
1988 
1989         error = sattr3_to_vattr(&args->attributes, &va);
1990         if (error)
1991                 goto out;
1992 
1993         if (!(va.va_mask & AT_MODE)) {
1994                 resp->status = NFS3ERR_INVAL;
1995                 goto out1;
1996         }
1997 
1998         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
1999         name = nfscmd_convname(ca, exi, args->where.name,
2000             NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2001 
2002         if (name == NULL) {
2003                 resp->status = NFS3ERR_INVAL;
2004                 goto out1;
2005         }
2006 
2007         va.va_mask |= AT_TYPE;
2008         va.va_type = VDIR;
2009 
2010         error = VOP_MKDIR(dvp, name, &va, &vp, cr, NULL, 0, NULL);
2011 
2012         if (name != args->where.name)
2013                 kmem_free(name, MAXPATHLEN + 1);
2014 
2015         dava.va_mask = AT_ALL;
2016         davap = VOP_GETATTR(dvp, &dava, 0, cr, NULL) ? NULL : &dava;
2017 
2018         /*
2019          * Force modified data and metadata out to stable storage.
2020          */
2021         (void) VOP_FSYNC(dvp, 0, cr, NULL);
2022 
2023         if (error)
2024                 goto out;
2025 
2026         error = makefh3(&resp->resok.obj.handle, vp, exi);
2027         if (error)
2028                 resp->resok.obj.handle_follows = FALSE;
2029         else
2030                 resp->resok.obj.handle_follows = TRUE;
2031 
2032         va.va_mask = AT_ALL;
2033         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
2034 
2035         /*
2036          * Force modified data and metadata out to stable storage.
2037          */
2038         (void) VOP_FSYNC(vp, 0, cr, NULL);
2039 
2040         VN_RELE(vp);
2041 
2042         resp->status = NFS3_OK;
2043         vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
2044         vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
2045 
2046         DTRACE_NFSV3_5(op__mkdir__done, struct svc_req *, req,
2047             cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
2048             MKDIR3res *, resp);
2049         VN_RELE(dvp);
2050 
2051         return;
2052 
2053 out:
2054         if (curthread->t_flag & T_WOULDBLOCK) {
2055                 curthread->t_flag &= ~T_WOULDBLOCK;
2056                 resp->status = NFS3ERR_JUKEBOX;
2057         } else
2058                 resp->status = puterrno3(error);
2059 out1:
2060         DTRACE_NFSV3_5(op__mkdir__done, struct svc_req *, req,
2061             cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
2062             MKDIR3res *, resp);
2063         if (dvp != NULL)
2064                 VN_RELE(dvp);
2065         vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc);
2066 }
2067 
2068 void *
2069 rfs3_mkdir_getfh(MKDIR3args *args)
2070 {
2071 
2072         return (&args->where.dir);
2073 }
2074 
2075 void
2076 rfs3_symlink(SYMLINK3args *args, SYMLINK3res *resp, struct exportinfo *exi,
2077     struct svc_req *req, cred_t *cr, bool_t ro)
2078 {
2079         int error;
2080         vnode_t *vp;
2081         vnode_t *dvp;
2082         struct vattr *vap;
2083         struct vattr va;
2084         struct vattr *dbvap;
2085         struct vattr dbva;
2086         struct vattr *davap;
2087         struct vattr dava;
2088         struct sockaddr *ca;
2089         char *name = NULL;
2090         char *symdata = NULL;
2091 
2092         dbvap = NULL;
2093         davap = NULL;
2094 
2095         dvp = nfs3_fhtovp(&args->where.dir, exi);
2096 
2097         DTRACE_NFSV3_5(op__symlink__start, struct svc_req *, req,
2098             cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
2099             SYMLINK3args *, args);
2100 
2101         if (dvp == NULL) {
2102                 error = ESTALE;
2103                 goto err;
2104         }
2105 
2106         dbva.va_mask = AT_ALL;
2107         dbvap = VOP_GETATTR(dvp, &dbva, 0, cr, NULL) ? NULL : &dbva;
2108         davap = dbvap;
2109 
2110         if (args->where.name == nfs3nametoolong) {
2111                 resp->status = NFS3ERR_NAMETOOLONG;
2112                 goto err1;
2113         }
2114 
2115         if (args->where.name == NULL || *(args->where.name) == '\0') {
2116                 resp->status = NFS3ERR_ACCES;
2117                 goto err1;
2118         }
2119 
2120         if (rdonly(ro, dvp)) {
2121                 resp->status = NFS3ERR_ROFS;
2122                 goto err1;
2123         }
2124 
2125         if (is_system_labeled()) {
2126                 bslabel_t *clabel = req->rq_label;
2127 
2128                 ASSERT(clabel != NULL);
2129                 DTRACE_PROBE2(tx__rfs3__log__info__opsymlink__clabel, char *,
2130                     "got client label from request(1)", struct svc_req *, req);
2131 
2132                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2133                         if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
2134                             exi)) {
2135                                 resp->status = NFS3ERR_ACCES;
2136                                 goto err1;
2137                         }
2138                 }
2139         }
2140 
2141         error = sattr3_to_vattr(&args->symlink.symlink_attributes, &va);
2142         if (error)
2143                 goto err;
2144 
2145         if (!(va.va_mask & AT_MODE)) {
2146                 resp->status = NFS3ERR_INVAL;
2147                 goto err1;
2148         }
2149 
2150         if (args->symlink.symlink_data == nfs3nametoolong) {
2151                 resp->status = NFS3ERR_NAMETOOLONG;
2152                 goto err1;
2153         }
2154 
2155         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2156         name = nfscmd_convname(ca, exi, args->where.name,
2157             NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2158 
2159         if (name == NULL) {
2160                 /* This is really a Solaris EILSEQ */
2161                 resp->status = NFS3ERR_INVAL;
2162                 goto err1;
2163         }
2164 
2165         symdata = nfscmd_convname(ca, exi, args->symlink.symlink_data,
2166             NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2167         if (symdata == NULL) {
2168                 /* This is really a Solaris EILSEQ */
2169                 resp->status = NFS3ERR_INVAL;
2170                 goto err1;
2171         }
2172 
2173 
2174         va.va_mask |= AT_TYPE;
2175         va.va_type = VLNK;
2176 
2177         error = VOP_SYMLINK(dvp, name, &va, symdata, cr, NULL, 0);
2178 
2179         dava.va_mask = AT_ALL;
2180         davap = VOP_GETATTR(dvp, &dava, 0, cr, NULL) ? NULL : &dava;
2181 
2182         if (error)
2183                 goto err;
2184 
2185         error = VOP_LOOKUP(dvp, name, &vp, NULL, 0, NULL, cr,
2186             NULL, NULL, NULL);
2187 
2188         /*
2189          * Force modified data and metadata out to stable storage.
2190          */
2191         (void) VOP_FSYNC(dvp, 0, cr, NULL);
2192 
2193 
2194         resp->status = NFS3_OK;
2195         if (error) {
2196                 resp->resok.obj.handle_follows = FALSE;
2197                 vattr_to_post_op_attr(NULL, &resp->resok.obj_attributes);
2198                 vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
2199                 goto out;
2200         }
2201 
2202         error = makefh3(&resp->resok.obj.handle, vp, exi);
2203         if (error)
2204                 resp->resok.obj.handle_follows = FALSE;
2205         else
2206                 resp->resok.obj.handle_follows = TRUE;
2207 
2208         va.va_mask = AT_ALL;
2209         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
2210 
2211         /*
2212          * Force modified data and metadata out to stable storage.
2213          */
2214         (void) VOP_FSYNC(vp, 0, cr, NULL);
2215 
2216         VN_RELE(vp);
2217 
2218         vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
2219         vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
2220         goto out;
2221 
2222 err:
2223         if (curthread->t_flag & T_WOULDBLOCK) {
2224                 curthread->t_flag &= ~T_WOULDBLOCK;
2225                 resp->status = NFS3ERR_JUKEBOX;
2226         } else
2227                 resp->status = puterrno3(error);
2228 err1:
2229         vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc);
2230 out:
2231         if (name != NULL && name != args->where.name)
2232                 kmem_free(name, MAXPATHLEN + 1);
2233         if (symdata != NULL && symdata != args->symlink.symlink_data)
2234                 kmem_free(symdata, MAXPATHLEN + 1);
2235 
2236         DTRACE_NFSV3_5(op__symlink__done, struct svc_req *, req,
2237             cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
2238             SYMLINK3res *, resp);
2239 
2240         if (dvp != NULL)
2241                 VN_RELE(dvp);
2242 }
2243 
2244 void *
2245 rfs3_symlink_getfh(SYMLINK3args *args)
2246 {
2247 
2248         return (&args->where.dir);
2249 }
2250 
2251 void
2252 rfs3_mknod(MKNOD3args *args, MKNOD3res *resp, struct exportinfo *exi,
2253     struct svc_req *req, cred_t *cr, bool_t ro)
2254 {
2255         int error;
2256         vnode_t *vp;
2257         vnode_t *realvp;
2258         vnode_t *dvp;
2259         struct vattr *vap;
2260         struct vattr va;
2261         struct vattr *dbvap;
2262         struct vattr dbva;
2263         struct vattr *davap;
2264         struct vattr dava;
2265         int mode;
2266         enum vcexcl excl;
2267         struct sockaddr *ca;
2268         char *name = NULL;
2269 
2270         dbvap = NULL;
2271         davap = NULL;
2272 
2273         dvp = nfs3_fhtovp(&args->where.dir, exi);
2274 
2275         DTRACE_NFSV3_5(op__mknod__start, struct svc_req *, req,
2276             cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
2277             MKNOD3args *, args);
2278 
2279         if (dvp == NULL) {
2280                 error = ESTALE;
2281                 goto out;
2282         }
2283 
2284         dbva.va_mask = AT_ALL;
2285         dbvap = VOP_GETATTR(dvp, &dbva, 0, cr, NULL) ? NULL : &dbva;
2286         davap = dbvap;
2287 
2288         if (args->where.name == nfs3nametoolong) {
2289                 resp->status = NFS3ERR_NAMETOOLONG;
2290                 goto out1;
2291         }
2292 
2293         if (args->where.name == NULL || *(args->where.name) == '\0') {
2294                 resp->status = NFS3ERR_ACCES;
2295                 goto out1;
2296         }
2297 
2298         if (rdonly(ro, dvp)) {
2299                 resp->status = NFS3ERR_ROFS;
2300                 goto out1;
2301         }
2302 
2303         if (is_system_labeled()) {
2304                 bslabel_t *clabel = req->rq_label;
2305 
2306                 ASSERT(clabel != NULL);
2307                 DTRACE_PROBE2(tx__rfs3__log__info__opmknod__clabel, char *,
2308                     "got client label from request(1)", struct svc_req *, req);
2309 
2310                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2311                         if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
2312                             exi)) {
2313                                 resp->status = NFS3ERR_ACCES;
2314                                 goto out1;
2315                         }
2316                 }
2317         }
2318 
2319         switch (args->what.type) {
2320         case NF3CHR:
2321         case NF3BLK:
2322                 error = sattr3_to_vattr(
2323                     &args->what.mknoddata3_u.device.dev_attributes, &va);
2324                 if (error)
2325                         goto out;
2326                 if (secpolicy_sys_devices(cr) != 0) {
2327                         resp->status = NFS3ERR_PERM;
2328                         goto out1;
2329                 }
2330                 if (args->what.type == NF3CHR)
2331                         va.va_type = VCHR;
2332                 else
2333                         va.va_type = VBLK;
2334                 va.va_rdev = makedevice(
2335                     args->what.mknoddata3_u.device.spec.specdata1,
2336                     args->what.mknoddata3_u.device.spec.specdata2);
2337                 va.va_mask |= AT_TYPE | AT_RDEV;
2338                 break;
2339         case NF3SOCK:
2340                 error = sattr3_to_vattr(
2341                     &args->what.mknoddata3_u.pipe_attributes, &va);
2342                 if (error)
2343                         goto out;
2344                 va.va_type = VSOCK;
2345                 va.va_mask |= AT_TYPE;
2346                 break;
2347         case NF3FIFO:
2348                 error = sattr3_to_vattr(
2349                     &args->what.mknoddata3_u.pipe_attributes, &va);
2350                 if (error)
2351                         goto out;
2352                 va.va_type = VFIFO;
2353                 va.va_mask |= AT_TYPE;
2354                 break;
2355         default:
2356                 resp->status = NFS3ERR_BADTYPE;
2357                 goto out1;
2358         }
2359 
2360         /*
2361          * Must specify the mode.
2362          */
2363         if (!(va.va_mask & AT_MODE)) {
2364                 resp->status = NFS3ERR_INVAL;
2365                 goto out1;
2366         }
2367 
2368         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2369         name = nfscmd_convname(ca, exi, args->where.name,
2370             NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2371 
2372         if (name == NULL) {
2373                 resp->status = NFS3ERR_INVAL;
2374                 goto out1;
2375         }
2376 
2377         excl = EXCL;
2378 
2379         mode = 0;
2380 
2381         error = VOP_CREATE(dvp, name, &va, excl, mode,
2382             &vp, cr, 0, NULL, NULL);
2383 
2384         if (name != args->where.name)
2385                 kmem_free(name, MAXPATHLEN + 1);
2386 
2387         dava.va_mask = AT_ALL;
2388         davap = VOP_GETATTR(dvp, &dava, 0, cr, NULL) ? NULL : &dava;
2389 
2390         /*
2391          * Force modified data and metadata out to stable storage.
2392          */
2393         (void) VOP_FSYNC(dvp, 0, cr, NULL);
2394 
2395         if (error)
2396                 goto out;
2397 
2398         resp->status = NFS3_OK;
2399 
2400         error = makefh3(&resp->resok.obj.handle, vp, exi);
2401         if (error)
2402                 resp->resok.obj.handle_follows = FALSE;
2403         else
2404                 resp->resok.obj.handle_follows = TRUE;
2405 
2406         va.va_mask = AT_ALL;
2407         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
2408 
2409         /*
2410          * Force modified metadata out to stable storage.
2411          *
2412          * if a underlying vp exists, pass it to VOP_FSYNC
2413          */
2414         if (VOP_REALVP(vp, &realvp, NULL) == 0)
2415                 (void) VOP_FSYNC(realvp, FNODSYNC, cr, NULL);
2416         else
2417                 (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
2418 
2419         VN_RELE(vp);
2420 
2421         vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
2422         vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
2423         DTRACE_NFSV3_5(op__mknod__done, struct svc_req *, req,
2424             cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
2425             MKNOD3res *, resp);
2426         VN_RELE(dvp);
2427         return;
2428 
2429 out:
2430         if (curthread->t_flag & T_WOULDBLOCK) {
2431                 curthread->t_flag &= ~T_WOULDBLOCK;
2432                 resp->status = NFS3ERR_JUKEBOX;
2433         } else
2434                 resp->status = puterrno3(error);
2435 out1:
2436         DTRACE_NFSV3_5(op__mknod__done, struct svc_req *, req,
2437             cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
2438             MKNOD3res *, resp);
2439         if (dvp != NULL)
2440                 VN_RELE(dvp);
2441         vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc);
2442 }
2443 
2444 void *
2445 rfs3_mknod_getfh(MKNOD3args *args)
2446 {
2447 
2448         return (&args->where.dir);
2449 }
2450 
2451 void
2452 rfs3_remove(REMOVE3args *args, REMOVE3res *resp, struct exportinfo *exi,
2453     struct svc_req *req, cred_t *cr, bool_t ro)
2454 {
2455         int error = 0;
2456         vnode_t *vp;
2457         struct vattr *bvap;
2458         struct vattr bva;
2459         struct vattr *avap;
2460         struct vattr ava;
2461         vnode_t *targvp = NULL;
2462         struct sockaddr *ca;
2463         char *name = NULL;
2464 
2465         bvap = NULL;
2466         avap = NULL;
2467 
2468         vp = nfs3_fhtovp(&args->object.dir, exi);
2469 
2470         DTRACE_NFSV3_5(op__remove__start, struct svc_req *, req,
2471             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
2472             REMOVE3args *, args);
2473 
2474         if (vp == NULL) {
2475                 error = ESTALE;
2476                 goto err;
2477         }
2478 
2479         bva.va_mask = AT_ALL;
2480         bvap = VOP_GETATTR(vp, &bva, 0, cr, NULL) ? NULL : &bva;
2481         avap = bvap;
2482 
2483         if (vp->v_type != VDIR) {
2484                 resp->status = NFS3ERR_NOTDIR;
2485                 goto err1;
2486         }
2487 
2488         if (args->object.name == nfs3nametoolong) {
2489                 resp->status = NFS3ERR_NAMETOOLONG;
2490                 goto err1;
2491         }
2492 
2493         if (args->object.name == NULL || *(args->object.name) == '\0') {
2494                 resp->status = NFS3ERR_ACCES;
2495                 goto err1;
2496         }
2497 
2498         if (rdonly(ro, vp)) {
2499                 resp->status = NFS3ERR_ROFS;
2500                 goto err1;
2501         }
2502 
2503         if (is_system_labeled()) {
2504                 bslabel_t *clabel = req->rq_label;
2505 
2506                 ASSERT(clabel != NULL);
2507                 DTRACE_PROBE2(tx__rfs3__log__info__opremove__clabel, char *,
2508                     "got client label from request(1)", struct svc_req *, req);
2509 
2510                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2511                         if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
2512                             exi)) {
2513                                 resp->status = NFS3ERR_ACCES;
2514                                 goto err1;
2515                         }
2516                 }
2517         }
2518 
2519         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2520         name = nfscmd_convname(ca, exi, args->object.name,
2521             NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2522 
2523         if (name == NULL) {
2524                 resp->status = NFS3ERR_INVAL;
2525                 goto err1;
2526         }
2527 
2528         /*
2529          * Check for a conflict with a non-blocking mandatory share
2530          * reservation and V4 delegations
2531          */
2532         error = VOP_LOOKUP(vp, name, &targvp, NULL, 0,
2533             NULL, cr, NULL, NULL, NULL);
2534         if (error != 0)
2535                 goto err;
2536 
2537         if (rfs4_check_delegated(FWRITE, targvp, TRUE)) {
2538                 resp->status = NFS3ERR_JUKEBOX;
2539                 goto err1;
2540         }
2541 
2542         if (!nbl_need_check(targvp)) {
2543                 error = VOP_REMOVE(vp, name, cr, NULL, 0);
2544         } else {
2545                 nbl_start_crit(targvp, RW_READER);
2546                 if (nbl_conflict(targvp, NBL_REMOVE, 0, 0, 0, NULL)) {
2547                         error = EACCES;
2548                 } else {
2549                         error = VOP_REMOVE(vp, name, cr, NULL, 0);
2550                 }
2551                 nbl_end_crit(targvp);
2552         }
2553         VN_RELE(targvp);
2554         targvp = NULL;
2555 
2556         ava.va_mask = AT_ALL;
2557         avap = VOP_GETATTR(vp, &ava, 0, cr, NULL) ? NULL : &ava;
2558 
2559         /*
2560          * Force modified data and metadata out to stable storage.
2561          */
2562         (void) VOP_FSYNC(vp, 0, cr, NULL);
2563 
2564         if (error)
2565                 goto err;
2566 
2567         resp->status = NFS3_OK;
2568         vattr_to_wcc_data(bvap, avap, &resp->resok.dir_wcc);
2569         goto out;
2570 
2571 err:
2572         if (curthread->t_flag & T_WOULDBLOCK) {
2573                 curthread->t_flag &= ~T_WOULDBLOCK;
2574                 resp->status = NFS3ERR_JUKEBOX;
2575         } else
2576                 resp->status = puterrno3(error);
2577 err1:
2578         vattr_to_wcc_data(bvap, avap, &resp->resfail.dir_wcc);
2579 out:
2580         DTRACE_NFSV3_5(op__remove__done, struct svc_req *, req,
2581             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
2582             REMOVE3res *, resp);
2583 
2584         if (name != NULL && name != args->object.name)
2585                 kmem_free(name, MAXPATHLEN + 1);
2586 
2587         if (vp != NULL)
2588                 VN_RELE(vp);
2589 }
2590 
2591 void *
2592 rfs3_remove_getfh(REMOVE3args *args)
2593 {
2594 
2595         return (&args->object.dir);
2596 }
2597 
2598 void
2599 rfs3_rmdir(RMDIR3args *args, RMDIR3res *resp, struct exportinfo *exi,
2600     struct svc_req *req, cred_t *cr, bool_t ro)
2601 {
2602         int error;
2603         vnode_t *vp;
2604         struct vattr *bvap;
2605         struct vattr bva;
2606         struct vattr *avap;
2607         struct vattr ava;
2608         struct sockaddr *ca;
2609         char *name = NULL;
2610 
2611         bvap = NULL;
2612         avap = NULL;
2613 
2614         vp = nfs3_fhtovp(&args->object.dir, exi);
2615 
2616         DTRACE_NFSV3_5(op__rmdir__start, struct svc_req *, req,
2617             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
2618             RMDIR3args *, args);
2619 
2620         if (vp == NULL) {
2621                 error = ESTALE;
2622                 goto err;
2623         }
2624 
2625         bva.va_mask = AT_ALL;
2626         bvap = VOP_GETATTR(vp, &bva, 0, cr, NULL) ? NULL : &bva;
2627         avap = bvap;
2628 
2629         if (vp->v_type != VDIR) {
2630                 resp->status = NFS3ERR_NOTDIR;
2631                 goto err1;
2632         }
2633 
2634         if (args->object.name == nfs3nametoolong) {
2635                 resp->status = NFS3ERR_NAMETOOLONG;
2636                 goto err1;
2637         }
2638 
2639         if (args->object.name == NULL || *(args->object.name) == '\0') {
2640                 resp->status = NFS3ERR_ACCES;
2641                 goto err1;
2642         }
2643 
2644         if (rdonly(ro, vp)) {
2645                 resp->status = NFS3ERR_ROFS;
2646                 goto err1;
2647         }
2648 
2649         if (is_system_labeled()) {
2650                 bslabel_t *clabel = req->rq_label;
2651 
2652                 ASSERT(clabel != NULL);
2653                 DTRACE_PROBE2(tx__rfs3__log__info__opremovedir__clabel, char *,
2654                     "got client label from request(1)", struct svc_req *, req);
2655 
2656                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2657                         if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
2658                             exi)) {
2659                                 resp->status = NFS3ERR_ACCES;
2660                                 goto err1;
2661                         }
2662                 }
2663         }
2664 
2665         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2666         name = nfscmd_convname(ca, exi, args->object.name,
2667             NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2668 
2669         if (name == NULL) {
2670                 resp->status = NFS3ERR_INVAL;
2671                 goto err1;
2672         }
2673 
2674         error = VOP_RMDIR(vp, name, ZONE_ROOTVP(), cr, NULL, 0);
2675 
2676         if (name != args->object.name)
2677                 kmem_free(name, MAXPATHLEN + 1);
2678 
2679         ava.va_mask = AT_ALL;
2680         avap = VOP_GETATTR(vp, &ava, 0, cr, NULL) ? NULL : &ava;
2681 
2682         /*
2683          * Force modified data and metadata out to stable storage.
2684          */
2685         (void) VOP_FSYNC(vp, 0, cr, NULL);
2686 
2687         if (error) {
2688                 /*
2689                  * System V defines rmdir to return EEXIST, not ENOTEMPTY,
2690                  * if the directory is not empty.  A System V NFS server
2691                  * needs to map NFS3ERR_EXIST to NFS3ERR_NOTEMPTY to transmit
2692                  * over the wire.
2693                  */
2694                 if (error == EEXIST)
2695                         error = ENOTEMPTY;
2696                 goto err;
2697         }
2698 
2699         resp->status = NFS3_OK;
2700         vattr_to_wcc_data(bvap, avap, &resp->resok.dir_wcc);
2701         goto out;
2702 
2703 err:
2704         if (curthread->t_flag & T_WOULDBLOCK) {
2705                 curthread->t_flag &= ~T_WOULDBLOCK;
2706                 resp->status = NFS3ERR_JUKEBOX;
2707         } else
2708                 resp->status = puterrno3(error);
2709 err1:
2710         vattr_to_wcc_data(bvap, avap, &resp->resfail.dir_wcc);
2711 out:
2712         DTRACE_NFSV3_5(op__rmdir__done, struct svc_req *, req,
2713             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
2714             RMDIR3res *, resp);
2715         if (vp != NULL)
2716                 VN_RELE(vp);
2717 
2718 }
2719 
2720 void *
2721 rfs3_rmdir_getfh(RMDIR3args *args)
2722 {
2723 
2724         return (&args->object.dir);
2725 }
2726 
2727 void
2728 rfs3_rename(RENAME3args *args, RENAME3res *resp, struct exportinfo *exi,
2729     struct svc_req *req, cred_t *cr, bool_t ro)
2730 {
2731         int error = 0;
2732         vnode_t *fvp;
2733         vnode_t *tvp;
2734         vnode_t *targvp;
2735         struct vattr *fbvap;
2736         struct vattr fbva;
2737         struct vattr *favap;
2738         struct vattr fava;
2739         struct vattr *tbvap;
2740         struct vattr tbva;
2741         struct vattr *tavap;
2742         struct vattr tava;
2743         nfs_fh3 *fh3;
2744         struct exportinfo *to_exi;
2745         vnode_t *srcvp = NULL;
2746         bslabel_t *clabel;
2747         struct sockaddr *ca;
2748         char *name = NULL;
2749         char *toname = NULL;
2750 
2751         fbvap = NULL;
2752         favap = NULL;
2753         tbvap = NULL;
2754         tavap = NULL;
2755         tvp = NULL;
2756 
2757         fvp = nfs3_fhtovp(&args->from.dir, exi);
2758 
2759         DTRACE_NFSV3_5(op__rename__start, struct svc_req *, req,
2760             cred_t *, cr, vnode_t *, fvp, struct exportinfo *, exi,
2761             RENAME3args *, args);
2762 
2763         if (fvp == NULL) {
2764                 error = ESTALE;
2765                 goto err;
2766         }
2767 
2768         if (is_system_labeled()) {
2769                 clabel = req->rq_label;
2770                 ASSERT(clabel != NULL);
2771                 DTRACE_PROBE2(tx__rfs3__log__info__oprename__clabel, char *,
2772                     "got client label from request(1)", struct svc_req *, req);
2773 
2774                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2775                         if (!do_rfs_label_check(clabel, fvp, EQUALITY_CHECK,
2776                             exi)) {
2777                                 resp->status = NFS3ERR_ACCES;
2778                                 goto err1;
2779                         }
2780                 }
2781         }
2782 
2783         fbva.va_mask = AT_ALL;
2784         fbvap = VOP_GETATTR(fvp, &fbva, 0, cr, NULL) ? NULL : &fbva;
2785         favap = fbvap;
2786 
2787         fh3 = &args->to.dir;
2788         to_exi = checkexport(&fh3->fh3_fsid, FH3TOXFIDP(fh3));
2789         if (to_exi == NULL) {
2790                 resp->status = NFS3ERR_ACCES;
2791                 goto err1;
2792         }
2793         exi_rele(to_exi);
2794 
2795         if (to_exi != exi) {
2796                 resp->status = NFS3ERR_XDEV;
2797                 goto err1;
2798         }
2799 
2800         tvp = nfs3_fhtovp(&args->to.dir, exi);
2801         if (tvp == NULL) {
2802                 error = ESTALE;
2803                 goto err;
2804         }
2805 
2806         tbva.va_mask = AT_ALL;
2807         tbvap = VOP_GETATTR(tvp, &tbva, 0, cr, NULL) ? NULL : &tbva;
2808         tavap = tbvap;
2809 
2810         if (fvp->v_type != VDIR || tvp->v_type != VDIR) {
2811                 resp->status = NFS3ERR_NOTDIR;
2812                 goto err1;
2813         }
2814 
2815         if (args->from.name == nfs3nametoolong ||
2816             args->to.name == nfs3nametoolong) {
2817                 resp->status = NFS3ERR_NAMETOOLONG;
2818                 goto err1;
2819         }
2820         if (args->from.name == NULL || *(args->from.name) == '\0' ||
2821             args->to.name == NULL || *(args->to.name) == '\0') {
2822                 resp->status = NFS3ERR_ACCES;
2823                 goto err1;
2824         }
2825 
2826         if (rdonly(ro, tvp)) {
2827                 resp->status = NFS3ERR_ROFS;
2828                 goto err1;
2829         }
2830 
2831         if (is_system_labeled()) {
2832                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2833                         if (!do_rfs_label_check(clabel, tvp, EQUALITY_CHECK,
2834                             exi)) {
2835                                 resp->status = NFS3ERR_ACCES;
2836                                 goto err1;
2837                         }
2838                 }
2839         }
2840 
2841         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2842         name = nfscmd_convname(ca, exi, args->from.name,
2843             NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2844 
2845         if (name == NULL) {
2846                 resp->status = NFS3ERR_INVAL;
2847                 goto err1;
2848         }
2849 
2850         toname = nfscmd_convname(ca, exi, args->to.name,
2851             NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2852 
2853         if (toname == NULL) {
2854                 resp->status = NFS3ERR_INVAL;
2855                 goto err1;
2856         }
2857 
2858         /*
2859          * Check for a conflict with a non-blocking mandatory share
2860          * reservation or V4 delegations.
2861          */
2862         error = VOP_LOOKUP(fvp, name, &srcvp, NULL, 0,
2863             NULL, cr, NULL, NULL, NULL);
2864         if (error != 0)
2865                 goto err;
2866 
2867         /*
2868          * If we rename a delegated file we should recall the
2869          * delegation, since future opens should fail or would
2870          * refer to a new file.
2871          */
2872         if (rfs4_check_delegated(FWRITE, srcvp, FALSE)) {
2873                 resp->status = NFS3ERR_JUKEBOX;
2874                 goto err1;
2875         }
2876 
2877         /*
2878          * Check for renaming over a delegated file.  Check nfs4_deleg_policy
2879          * first to avoid VOP_LOOKUP if possible.
2880          */
2881         if (nfs4_get_deleg_policy() != SRV_NEVER_DELEGATE &&
2882             VOP_LOOKUP(tvp, toname, &targvp, NULL, 0, NULL, cr,
2883             NULL, NULL, NULL) == 0) {
2884 
2885                 if (rfs4_check_delegated(FWRITE, targvp, TRUE)) {
2886                         VN_RELE(targvp);
2887                         resp->status = NFS3ERR_JUKEBOX;
2888                         goto err1;
2889                 }
2890                 VN_RELE(targvp);
2891         }
2892 
2893         if (!nbl_need_check(srcvp)) {
2894                 error = VOP_RENAME(fvp, name, tvp, toname, cr, NULL, 0);
2895         } else {
2896                 nbl_start_crit(srcvp, RW_READER);
2897                 if (nbl_conflict(srcvp, NBL_RENAME, 0, 0, 0, NULL))
2898                         error = EACCES;
2899                 else
2900                         error = VOP_RENAME(fvp, name, tvp, toname, cr, NULL, 0);
2901                 nbl_end_crit(srcvp);
2902         }
2903         if (error == 0)
2904                 vn_renamepath(tvp, srcvp, args->to.name,
2905                     strlen(args->to.name));
2906         VN_RELE(srcvp);
2907         srcvp = NULL;
2908 
2909         fava.va_mask = AT_ALL;
2910         favap = VOP_GETATTR(fvp, &fava, 0, cr, NULL) ? NULL : &fava;
2911         tava.va_mask = AT_ALL;
2912         tavap = VOP_GETATTR(tvp, &tava, 0, cr, NULL) ? NULL : &tava;
2913 
2914         /*
2915          * Force modified data and metadata out to stable storage.
2916          */
2917         (void) VOP_FSYNC(fvp, 0, cr, NULL);
2918         (void) VOP_FSYNC(tvp, 0, cr, NULL);
2919 
2920         if (error)
2921                 goto err;
2922 
2923         resp->status = NFS3_OK;
2924         vattr_to_wcc_data(fbvap, favap, &resp->resok.fromdir_wcc);
2925         vattr_to_wcc_data(tbvap, tavap, &resp->resok.todir_wcc);
2926         goto out;
2927 
2928 err:
2929         if (curthread->t_flag & T_WOULDBLOCK) {
2930                 curthread->t_flag &= ~T_WOULDBLOCK;
2931                 resp->status = NFS3ERR_JUKEBOX;
2932         } else {
2933                 resp->status = puterrno3(error);
2934         }
2935 err1:
2936         vattr_to_wcc_data(fbvap, favap, &resp->resfail.fromdir_wcc);
2937         vattr_to_wcc_data(tbvap, tavap, &resp->resfail.todir_wcc);
2938 
2939 out:
2940         if (name != NULL && name != args->from.name)
2941                 kmem_free(name, MAXPATHLEN + 1);
2942         if (toname != NULL && toname != args->to.name)
2943                 kmem_free(toname, MAXPATHLEN + 1);
2944 
2945         DTRACE_NFSV3_5(op__rename__done, struct svc_req *, req,
2946             cred_t *, cr, vnode_t *, fvp, struct exportinfo *, exi,
2947             RENAME3res *, resp);
2948         if (fvp != NULL)
2949                 VN_RELE(fvp);
2950         if (tvp != NULL)
2951                 VN_RELE(tvp);
2952 }
2953 
2954 void *
2955 rfs3_rename_getfh(RENAME3args *args)
2956 {
2957 
2958         return (&args->from.dir);
2959 }
2960 
2961 void
2962 rfs3_link(LINK3args *args, LINK3res *resp, struct exportinfo *exi,
2963     struct svc_req *req, cred_t *cr, bool_t ro)
2964 {
2965         int error;
2966         vnode_t *vp;
2967         vnode_t *dvp;
2968         struct vattr *vap;
2969         struct vattr va;
2970         struct vattr *bvap;
2971         struct vattr bva;
2972         struct vattr *avap;
2973         struct vattr ava;
2974         nfs_fh3 *fh3;
2975         struct exportinfo *to_exi;
2976         bslabel_t *clabel;
2977         struct sockaddr *ca;
2978         char *name = NULL;
2979 
2980         vap = NULL;
2981         bvap = NULL;
2982         avap = NULL;
2983         dvp = NULL;
2984 
2985         vp = nfs3_fhtovp(&args->file, exi);
2986 
2987         DTRACE_NFSV3_5(op__link__start, struct svc_req *, req,
2988             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
2989             LINK3args *, args);
2990 
2991         if (vp == NULL) {
2992                 error = ESTALE;
2993                 goto out;
2994         }
2995 
2996         va.va_mask = AT_ALL;
2997         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
2998 
2999         fh3 = &args->link.dir;
3000         to_exi = checkexport(&fh3->fh3_fsid, FH3TOXFIDP(fh3));
3001         if (to_exi == NULL) {
3002                 resp->status = NFS3ERR_ACCES;
3003                 goto out1;
3004         }
3005         exi_rele(to_exi);
3006 
3007         if (to_exi != exi) {
3008                 resp->status = NFS3ERR_XDEV;
3009                 goto out1;
3010         }
3011 
3012         if (is_system_labeled()) {
3013                 clabel = req->rq_label;
3014 
3015                 ASSERT(clabel != NULL);
3016                 DTRACE_PROBE2(tx__rfs3__log__info__oplink__clabel, char *,
3017                     "got client label from request(1)", struct svc_req *, req);
3018 
3019                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3020                         if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3021                             exi)) {
3022                                 resp->status = NFS3ERR_ACCES;
3023                                 goto out1;
3024                         }
3025                 }
3026         }
3027 
3028         dvp = nfs3_fhtovp(&args->link.dir, exi);
3029         if (dvp == NULL) {
3030                 error = ESTALE;
3031                 goto out;
3032         }
3033 
3034         bva.va_mask = AT_ALL;
3035         bvap = VOP_GETATTR(dvp, &bva, 0, cr, NULL) ? NULL : &bva;
3036 
3037         if (dvp->v_type != VDIR) {
3038                 resp->status = NFS3ERR_NOTDIR;
3039                 goto out1;
3040         }
3041 
3042         if (args->link.name == nfs3nametoolong) {
3043                 resp->status = NFS3ERR_NAMETOOLONG;
3044                 goto out1;
3045         }
3046 
3047         if (args->link.name == NULL || *(args->link.name) == '\0') {
3048                 resp->status = NFS3ERR_ACCES;
3049                 goto out1;
3050         }
3051 
3052         if (rdonly(ro, dvp)) {
3053                 resp->status = NFS3ERR_ROFS;
3054                 goto out1;
3055         }
3056 
3057         if (is_system_labeled()) {
3058                 DTRACE_PROBE2(tx__rfs3__log__info__oplinkdir__clabel, char *,
3059                     "got client label from request(1)", struct svc_req *, req);
3060 
3061                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3062                         if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
3063                             exi)) {
3064                                 resp->status = NFS3ERR_ACCES;
3065                                 goto out1;
3066                         }
3067                 }
3068         }
3069 
3070         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
3071         name = nfscmd_convname(ca, exi, args->link.name,
3072             NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
3073 
3074         if (name == NULL) {
3075                 resp->status = NFS3ERR_SERVERFAULT;
3076                 goto out1;
3077         }
3078 
3079         error = VOP_LINK(dvp, vp, name, cr, NULL, 0);
3080 
3081         va.va_mask = AT_ALL;
3082         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3083         ava.va_mask = AT_ALL;
3084         avap = VOP_GETATTR(dvp, &ava, 0, cr, NULL) ? NULL : &ava;
3085 
3086         /*
3087          * Force modified data and metadata out to stable storage.
3088          */
3089         (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
3090         (void) VOP_FSYNC(dvp, 0, cr, NULL);
3091 
3092         if (error)
3093                 goto out;
3094 
3095         VN_RELE(dvp);
3096 
3097         resp->status = NFS3_OK;
3098         vattr_to_post_op_attr(vap, &resp->resok.file_attributes);
3099         vattr_to_wcc_data(bvap, avap, &resp->resok.linkdir_wcc);
3100 
3101         DTRACE_NFSV3_5(op__link__done, struct svc_req *, req,
3102             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3103             LINK3res *, resp);
3104 
3105         VN_RELE(vp);
3106 
3107         return;
3108 
3109 out:
3110         if (curthread->t_flag & T_WOULDBLOCK) {
3111                 curthread->t_flag &= ~T_WOULDBLOCK;
3112                 resp->status = NFS3ERR_JUKEBOX;
3113         } else
3114                 resp->status = puterrno3(error);
3115 out1:
3116         if (name != NULL && name != args->link.name)
3117                 kmem_free(name, MAXPATHLEN + 1);
3118 
3119         DTRACE_NFSV3_5(op__link__done, struct svc_req *, req,
3120             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3121             LINK3res *, resp);
3122 
3123         if (vp != NULL)
3124                 VN_RELE(vp);
3125         if (dvp != NULL)
3126                 VN_RELE(dvp);
3127         vattr_to_post_op_attr(vap, &resp->resfail.file_attributes);
3128         vattr_to_wcc_data(bvap, avap, &resp->resfail.linkdir_wcc);
3129 }
3130 
3131 void *
3132 rfs3_link_getfh(LINK3args *args)
3133 {
3134 
3135         return (&args->file);
3136 }
3137 
3138 /*
3139  * This macro defines the size of a response which contains attribute
3140  * information and one directory entry (whose length is specified by
3141  * the macro parameter).  If the incoming request is larger than this,
3142  * then we are guaranteed to be able to return at one directory entry
3143  * if one exists.  Therefore, we do not need to check for
3144  * NFS3ERR_TOOSMALL if the requested size is larger then this.  If it
3145  * is not, then we need to check to make sure that this error does not
3146  * need to be returned.
3147  *
3148  * NFS3_READDIR_MIN_COUNT is comprised of following :
3149  *
3150  * status - 1 * BYTES_PER_XDR_UNIT
3151  * attr. flag - 1 * BYTES_PER_XDR_UNIT
3152  * cookie verifier - 2 * BYTES_PER_XDR_UNIT
3153  * attributes  - NFS3_SIZEOF_FATTR3 * BYTES_PER_XDR_UNIT
3154  * boolean - 1 * BYTES_PER_XDR_UNIT
3155  * file id - 2 * BYTES_PER_XDR_UNIT
3156  * directory name length - 1 * BYTES_PER_XDR_UNIT
3157  * cookie - 2 * BYTES_PER_XDR_UNIT
3158  * end of list - 1 * BYTES_PER_XDR_UNIT
3159  * end of file - 1 * BYTES_PER_XDR_UNIT
3160  * Name length of directory to the nearest byte
3161  */
3162 
3163 #define NFS3_READDIR_MIN_COUNT(length)  \
3164         ((1 + 1 + 2 + NFS3_SIZEOF_FATTR3 + 1 + 2 + 1 + 2 + 1 + 1) * \
3165                 BYTES_PER_XDR_UNIT + roundup((length), BYTES_PER_XDR_UNIT))
3166 
3167 /* ARGSUSED */
3168 void
3169 rfs3_readdir(READDIR3args *args, READDIR3res *resp, struct exportinfo *exi,
3170     struct svc_req *req, cred_t *cr, bool_t ro)
3171 {
3172         int error;
3173         vnode_t *vp;
3174         struct vattr *vap;
3175         struct vattr va;
3176         struct iovec iov;
3177         struct uio uio;
3178         char *data;
3179         int iseof;
3180         int bufsize;
3181         int namlen;
3182         uint_t count;
3183         struct sockaddr *ca;
3184 
3185         vap = NULL;
3186 
3187         vp = nfs3_fhtovp(&args->dir, exi);
3188 
3189         DTRACE_NFSV3_5(op__readdir__start, struct svc_req *, req,
3190             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3191             READDIR3args *, args);
3192 
3193         if (vp == NULL) {
3194                 error = ESTALE;
3195                 goto out;
3196         }
3197 
3198         if (is_system_labeled()) {
3199                 bslabel_t *clabel = req->rq_label;
3200 
3201                 ASSERT(clabel != NULL);
3202                 DTRACE_PROBE2(tx__rfs3__log__info__opreaddir__clabel, char *,
3203                     "got client label from request(1)", struct svc_req *, req);
3204 
3205                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3206                         if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3207                             exi)) {
3208                                 resp->status = NFS3ERR_ACCES;
3209                                 goto out1;
3210                         }
3211                 }
3212         }
3213 
3214         (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL);
3215 
3216         va.va_mask = AT_ALL;
3217         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3218 
3219         if (vp->v_type != VDIR) {
3220                 resp->status = NFS3ERR_NOTDIR;
3221                 goto out1;
3222         }
3223 
3224         error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
3225         if (error)
3226                 goto out;
3227 
3228         /*
3229          * Now don't allow arbitrary count to alloc;
3230          * allow the maximum not to exceed rfs3_tsize()
3231          */
3232         if (args->count > rfs3_tsize(req))
3233                 args->count = rfs3_tsize(req);
3234 
3235         /*
3236          * Make sure that there is room to read at least one entry
3237          * if any are available.
3238          */
3239         if (args->count < DIRENT64_RECLEN(MAXNAMELEN))
3240                 count = DIRENT64_RECLEN(MAXNAMELEN);
3241         else
3242                 count = args->count;
3243 
3244         data = kmem_alloc(count, KM_SLEEP);
3245 
3246         iov.iov_base = data;
3247         iov.iov_len = count;
3248         uio.uio_iov = &iov;
3249         uio.uio_iovcnt = 1;
3250         uio.uio_segflg = UIO_SYSSPACE;
3251         uio.uio_extflg = UIO_COPY_CACHED;
3252         uio.uio_loffset = (offset_t)args->cookie;
3253         uio.uio_resid = count;
3254 
3255         error = VOP_READDIR(vp, &uio, cr, &iseof, NULL, 0);
3256 
3257         va.va_mask = AT_ALL;
3258         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3259 
3260         if (error) {
3261                 kmem_free(data, count);
3262                 goto out;
3263         }
3264 
3265         /*
3266          * If the count was not large enough to be able to guarantee
3267          * to be able to return at least one entry, then need to
3268          * check to see if NFS3ERR_TOOSMALL should be returned.
3269          */
3270         if (args->count < NFS3_READDIR_MIN_COUNT(MAXNAMELEN)) {
3271                 /*
3272                  * bufsize is used to keep track of the size of the response.
3273                  * It is primed with:
3274                  *      1 for the status +
3275                  *      1 for the dir_attributes.attributes boolean +
3276                  *      2 for the cookie verifier
3277                  * all times BYTES_PER_XDR_UNIT to convert from XDR units
3278                  * to bytes.  If there are directory attributes to be
3279                  * returned, then:
3280                  *      NFS3_SIZEOF_FATTR3 for the dir_attributes.attr fattr3
3281                  * time BYTES_PER_XDR_UNIT is added to account for them.
3282                  */
3283                 bufsize = (1 + 1 + 2) * BYTES_PER_XDR_UNIT;
3284                 if (vap != NULL)
3285                         bufsize += NFS3_SIZEOF_FATTR3 * BYTES_PER_XDR_UNIT;
3286                 /*
3287                  * An entry is composed of:
3288                  *      1 for the true/false list indicator +
3289                  *      2 for the fileid +
3290                  *      1 for the length of the name +
3291                  *      2 for the cookie +
3292                  * all times BYTES_PER_XDR_UNIT to convert from
3293                  * XDR units to bytes, plus the length of the name
3294                  * rounded up to the nearest BYTES_PER_XDR_UNIT.
3295                  */
3296                 if (count != uio.uio_resid) {
3297                         namlen = strlen(((struct dirent64 *)data)->d_name);
3298                         bufsize += (1 + 2 + 1 + 2) * BYTES_PER_XDR_UNIT +
3299                             roundup(namlen, BYTES_PER_XDR_UNIT);
3300                 }
3301                 /*
3302                  * We need to check to see if the number of bytes left
3303                  * to go into the buffer will actually fit into the
3304                  * buffer.  This is calculated as the size of this
3305                  * entry plus:
3306                  *      1 for the true/false list indicator +
3307                  *      1 for the eof indicator
3308                  * times BYTES_PER_XDR_UNIT to convert from from
3309                  * XDR units to bytes.
3310                  */
3311                 bufsize += (1 + 1) * BYTES_PER_XDR_UNIT;
3312                 if (bufsize > args->count) {
3313                         kmem_free(data, count);
3314                         resp->status = NFS3ERR_TOOSMALL;
3315                         goto out1;
3316                 }
3317         }
3318 
3319         /*
3320          * Have a valid readir buffer for the native character
3321          * set. Need to check if a conversion is necessary and
3322          * potentially rewrite the whole buffer. Note that if the
3323          * conversion expands names enough, the structure may not
3324          * fit. In this case, we need to drop entries until if fits
3325          * and patch the counts in order that the next readdir will
3326          * get the correct entries.
3327          */
3328         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
3329         data = nfscmd_convdirent(ca, exi, data, count, &resp->status);
3330 
3331 
3332         VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
3333 
3334 #if 0 /* notyet */
3335         /*
3336          * Don't do this.  It causes local disk writes when just
3337          * reading the file and the overhead is deemed larger
3338          * than the benefit.
3339          */
3340         /*
3341          * Force modified metadata out to stable storage.
3342          */
3343         (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
3344 #endif
3345 
3346         resp->status = NFS3_OK;
3347         vattr_to_post_op_attr(vap, &resp->resok.dir_attributes);
3348         resp->resok.cookieverf = 0;
3349         resp->resok.reply.entries = (entry3 *)data;
3350         resp->resok.reply.eof = iseof;
3351         resp->resok.size = count - uio.uio_resid;
3352         resp->resok.count = args->count;
3353         resp->resok.freecount = count;
3354 
3355         DTRACE_NFSV3_5(op__readdir__done, struct svc_req *, req,
3356             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3357             READDIR3res *, resp);
3358 
3359         VN_RELE(vp);
3360 
3361         return;
3362 
3363 out:
3364         if (curthread->t_flag & T_WOULDBLOCK) {
3365                 curthread->t_flag &= ~T_WOULDBLOCK;
3366                 resp->status = NFS3ERR_JUKEBOX;
3367         } else
3368                 resp->status = puterrno3(error);
3369 out1:
3370         vattr_to_post_op_attr(vap, &resp->resfail.dir_attributes);
3371 
3372         DTRACE_NFSV3_5(op__readdir__done, struct svc_req *, req,
3373             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3374             READDIR3res *, resp);
3375 
3376         if (vp != NULL) {
3377                 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
3378                 VN_RELE(vp);
3379         }
3380         vattr_to_post_op_attr(vap, &resp->resfail.dir_attributes);
3381 }
3382 
3383 void *
3384 rfs3_readdir_getfh(READDIR3args *args)
3385 {
3386 
3387         return (&args->dir);
3388 }
3389 
3390 void
3391 rfs3_readdir_free(READDIR3res *resp)
3392 {
3393 
3394         if (resp->status == NFS3_OK)
3395                 kmem_free(resp->resok.reply.entries, resp->resok.freecount);
3396 }
3397 
3398 #ifdef nextdp
3399 #undef nextdp
3400 #endif
3401 #define nextdp(dp)      ((struct dirent64 *)((char *)(dp) + (dp)->d_reclen))
3402 
3403 /*
3404  * This macro computes the size of a response which contains
3405  * one directory entry including the attributes as well as file handle.
3406  * If the incoming request is larger than this, then we are guaranteed to be
3407  * able to return at least one more directory entry if one exists.
3408  *
3409  * NFS3_READDIRPLUS_ENTRY is made up of the following:
3410  *
3411  * boolean - 1 * BYTES_PER_XDR_UNIT
3412  * file id - 2 * BYTES_PER_XDR_UNIT
3413  * directory name length - 1 * BYTES_PER_XDR_UNIT
3414  * cookie - 2 * BYTES_PER_XDR_UNIT
3415  * attribute flag - 1 * BYTES_PER_XDR_UNIT
3416  * attributes - NFS3_SIZEOF_FATTR3 * BYTES_PER_XDR_UNIT
3417  * status byte for file handle - 1 *  BYTES_PER_XDR_UNIT
3418  * length of a file handle - 1 * BYTES_PER_XDR_UNIT
3419  * Maximum length of a file handle (NFS3_MAXFHSIZE)
3420  * name length of the entry to the nearest bytes
3421  */
3422 #define NFS3_READDIRPLUS_ENTRY(namelen) \
3423         ((1 + 2 + 1 + 2 + 1 + NFS3_SIZEOF_FATTR3 + 1 + 1) * \
3424                 BYTES_PER_XDR_UNIT + \
3425         NFS3_MAXFHSIZE + roundup(namelen, BYTES_PER_XDR_UNIT))
3426 
3427 static int rfs3_readdir_unit = MAXBSIZE;
3428 
3429 /* ARGSUSED */
3430 void
3431 rfs3_readdirplus(READDIRPLUS3args *args, READDIRPLUS3res *resp,
3432     struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
3433 {
3434         int error;
3435         vnode_t *vp;
3436         struct vattr *vap;
3437         struct vattr va;
3438         struct iovec iov;
3439         struct uio uio;
3440         char *data;
3441         int iseof;
3442         struct dirent64 *dp;
3443         vnode_t *nvp;
3444         struct vattr *nvap;
3445         struct vattr nva;
3446         entryplus3_info *infop = NULL;
3447         int size = 0;
3448         int nents = 0;
3449         int bufsize = 0;
3450         int entrysize = 0;
3451         int tofit = 0;
3452         int rd_unit = rfs3_readdir_unit;
3453         int prev_len;
3454         int space_left;
3455         int i;
3456         uint_t *namlen = NULL;
3457         char *ndata = NULL;
3458         struct sockaddr *ca;
3459         size_t ret;
3460 
3461         vap = NULL;
3462 
3463         vp = nfs3_fhtovp(&args->dir, exi);
3464 
3465         DTRACE_NFSV3_5(op__readdirplus__start, struct svc_req *, req,
3466             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3467             READDIRPLUS3args *, args);
3468 
3469         if (vp == NULL) {
3470                 error = ESTALE;
3471                 goto out;
3472         }
3473 
3474         if (is_system_labeled()) {
3475                 bslabel_t *clabel = req->rq_label;
3476 
3477                 ASSERT(clabel != NULL);
3478                 DTRACE_PROBE2(tx__rfs3__log__info__opreaddirplus__clabel,
3479                     char *, "got client label from request(1)",
3480                     struct svc_req *, req);
3481 
3482                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3483                         if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3484                             exi)) {
3485                                 resp->status = NFS3ERR_ACCES;
3486                                 goto out1;
3487                         }
3488                 }
3489         }
3490 
3491         (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL);
3492 
3493         va.va_mask = AT_ALL;
3494         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3495 
3496         if (vp->v_type != VDIR) {
3497                 error = ENOTDIR;
3498                 goto out;
3499         }
3500 
3501         error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
3502         if (error)
3503                 goto out;
3504 
3505         /*
3506          * Don't allow arbitrary counts for allocation
3507          */
3508         if (args->maxcount > rfs3_tsize(req))
3509                 args->maxcount = rfs3_tsize(req);
3510 
3511         /*
3512          * Make sure that there is room to read at least one entry
3513          * if any are available
3514          */
3515         args->dircount = MIN(args->dircount, args->maxcount);
3516 
3517         if (args->dircount < DIRENT64_RECLEN(MAXNAMELEN))
3518                 args->dircount = DIRENT64_RECLEN(MAXNAMELEN);
3519 
3520         /*
3521          * This allocation relies on a minimum directory entry
3522          * being roughly 24 bytes.  Therefore, the namlen array
3523          * will have enough space based on the maximum number of
3524          * entries to read.
3525          */
3526         namlen = kmem_alloc(args->dircount, KM_SLEEP);
3527 
3528         space_left = args->dircount;
3529         data = kmem_alloc(args->dircount, KM_SLEEP);
3530         dp = (struct dirent64 *)data;
3531         uio.uio_iov = &iov;
3532         uio.uio_iovcnt = 1;
3533         uio.uio_segflg = UIO_SYSSPACE;
3534         uio.uio_extflg = UIO_COPY_CACHED;
3535         uio.uio_loffset = (offset_t)args->cookie;
3536 
3537         /*
3538          * bufsize is used to keep track of the size of the response as we
3539          * get post op attributes and filehandles for each entry.  This is
3540          * an optimization as the server may have read more entries than will
3541          * fit in the buffer specified by maxcount.  We stop calculating
3542          * post op attributes and filehandles once we have exceeded maxcount.
3543          * This will minimize the effect of truncation.
3544          *
3545          * It is primed with:
3546          *      1 for the status +
3547          *      1 for the dir_attributes.attributes boolean +
3548          *      2 for the cookie verifier
3549          * all times BYTES_PER_XDR_UNIT to convert from XDR units
3550          * to bytes.  If there are directory attributes to be
3551          * returned, then:
3552          *      NFS3_SIZEOF_FATTR3 for the dir_attributes.attr fattr3
3553          * time BYTES_PER_XDR_UNIT is added to account for them.
3554          */
3555         bufsize = (1 + 1 + 2) * BYTES_PER_XDR_UNIT;
3556         if (vap != NULL)
3557                 bufsize += NFS3_SIZEOF_FATTR3 * BYTES_PER_XDR_UNIT;
3558 
3559 getmoredents:
3560         /*
3561          * Here we make a check so that our read unit is not larger than
3562          * the space left in the buffer.
3563          */
3564         rd_unit = MIN(rd_unit, space_left);
3565         iov.iov_base = (char *)dp;
3566         iov.iov_len = rd_unit;
3567         uio.uio_resid = rd_unit;
3568         prev_len = rd_unit;
3569 
3570         error = VOP_READDIR(vp, &uio, cr, &iseof, NULL, 0);
3571 
3572         if (error) {
3573                 kmem_free(data, args->dircount);
3574                 goto out;
3575         }
3576 
3577         if (uio.uio_resid == prev_len && !iseof) {
3578                 if (nents == 0) {
3579                         kmem_free(data, args->dircount);
3580                         resp->status = NFS3ERR_TOOSMALL;
3581                         goto out1;
3582                 }
3583 
3584                 /*
3585                  * We could not get any more entries, so get the attributes
3586                  * and filehandle for the entries already obtained.
3587                  */
3588                 goto good;
3589         }
3590 
3591         /*
3592          * We estimate the size of the response by assuming the
3593          * entry exists and attributes and filehandle are also valid
3594          */
3595         for (size = prev_len - uio.uio_resid;
3596             size > 0;
3597             size -= dp->d_reclen, dp = nextdp(dp)) {
3598 
3599                 if (dp->d_ino == 0) {
3600                         nents++;
3601                         continue;
3602                 }
3603 
3604                 namlen[nents] = strlen(dp->d_name);
3605                 entrysize = NFS3_READDIRPLUS_ENTRY(namlen[nents]);
3606 
3607                 /*
3608                  * We need to check to see if the number of bytes left
3609                  * to go into the buffer will actually fit into the
3610                  * buffer.  This is calculated as the size of this
3611                  * entry plus:
3612                  *      1 for the true/false list indicator +
3613                  *      1 for the eof indicator
3614                  * times BYTES_PER_XDR_UNIT to convert from XDR units
3615                  * to bytes.
3616                  *
3617                  * Also check the dircount limit against the first entry read
3618                  *
3619                  */
3620                 tofit = entrysize + (1 + 1) * BYTES_PER_XDR_UNIT;
3621                 if (bufsize + tofit > args->maxcount) {
3622                         /*
3623                          * We make a check here to see if this was the
3624                          * first entry being measured.  If so, then maxcount
3625                          * was too small to begin with and so we need to
3626                          * return with NFS3ERR_TOOSMALL.
3627                          */
3628                         if (nents == 0) {
3629                                 kmem_free(data, args->dircount);
3630                                 resp->status = NFS3ERR_TOOSMALL;
3631                                 goto out1;
3632                         }
3633                         iseof = FALSE;
3634                         goto good;
3635                 }
3636                 bufsize += entrysize;
3637                 nents++;
3638         }
3639 
3640         /*
3641          * If there is enough room to fit at least 1 more entry including
3642          * post op attributes and filehandle in the buffer AND that we haven't
3643          * exceeded dircount then go back and get some more.
3644          */
3645         if (!iseof &&
3646             (args->maxcount - bufsize) >= NFS3_READDIRPLUS_ENTRY(MAXNAMELEN)) {
3647                 space_left -= (prev_len - uio.uio_resid);
3648                 if (space_left >= DIRENT64_RECLEN(MAXNAMELEN))
3649                         goto getmoredents;
3650 
3651                 /* else, fall through */
3652         }
3653 good:
3654         va.va_mask = AT_ALL;
3655         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3656 
3657         VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
3658 
3659         infop = kmem_alloc(nents * sizeof (struct entryplus3_info), KM_SLEEP);
3660         resp->resok.infop = infop;
3661 
3662         dp = (struct dirent64 *)data;
3663         for (i = 0; i < nents; i++) {
3664 
3665                 if (dp->d_ino == 0) {
3666                         infop[i].attr.attributes = FALSE;
3667                         infop[i].fh.handle_follows = FALSE;
3668                         dp = nextdp(dp);
3669                         continue;
3670                 }
3671 
3672                 infop[i].namelen = namlen[i];
3673 
3674                 error = VOP_LOOKUP(vp, dp->d_name, &nvp, NULL, 0, NULL, cr,
3675                     NULL, NULL, NULL);
3676                 if (error) {
3677                         infop[i].attr.attributes = FALSE;
3678                         infop[i].fh.handle_follows = FALSE;
3679                         dp = nextdp(dp);
3680                         continue;
3681                 }
3682 
3683                 nva.va_mask = AT_ALL;
3684                 nvap = rfs4_delegated_getattr(nvp, &nva, 0, cr) ? NULL : &nva;
3685 
3686                 /* Lie about the object type for a referral */
3687                 if (vn_is_nfs_reparse(nvp, cr))
3688                         nvap->va_type = VLNK;
3689 
3690                 if (vn_ismntpt(nvp)) {
3691                         infop[i].attr.attributes = FALSE;
3692                         infop[i].fh.handle_follows = FALSE;
3693                 } else {
3694                         vattr_to_post_op_attr(nvap, &infop[i].attr);
3695 
3696                         error = makefh3(&infop[i].fh.handle, nvp, exi);
3697                         if (!error)
3698                                 infop[i].fh.handle_follows = TRUE;
3699                         else
3700                                 infop[i].fh.handle_follows = FALSE;
3701                 }
3702 
3703                 VN_RELE(nvp);
3704                 dp = nextdp(dp);
3705         }
3706 
3707         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
3708         ret = nfscmd_convdirplus(ca, exi, data, nents, args->dircount, &ndata);
3709         if (ndata == NULL)
3710                 ndata = data;
3711 
3712         if (ret > 0) {
3713                 /*
3714                  * We had to drop one or more entries in order to fit
3715                  * during the character conversion.  We need to patch
3716                  * up the size and eof info.
3717                  */
3718                 if (iseof)
3719                         iseof = FALSE;
3720 
3721                 ret = nfscmd_dropped_entrysize((struct dirent64 *)data,
3722                     nents, ret);
3723         }
3724 
3725 
3726 #if 0 /* notyet */
3727         /*
3728          * Don't do this.  It causes local disk writes when just
3729          * reading the file and the overhead is deemed larger
3730          * than the benefit.
3731          */
3732         /*
3733          * Force modified metadata out to stable storage.
3734          */
3735         (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
3736 #endif
3737 
3738         kmem_free(namlen, args->dircount);
3739 
3740         resp->status = NFS3_OK;
3741         vattr_to_post_op_attr(vap, &resp->resok.dir_attributes);
3742         resp->resok.cookieverf = 0;
3743         resp->resok.reply.entries = (entryplus3 *)ndata;
3744         resp->resok.reply.eof = iseof;
3745         resp->resok.size = nents;
3746         resp->resok.count = args->dircount - ret;
3747         resp->resok.maxcount = args->maxcount;
3748 
3749         DTRACE_NFSV3_5(op__readdirplus__done, struct svc_req *, req,
3750             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3751             READDIRPLUS3res *, resp);
3752 
3753         VN_RELE(vp);
3754 
3755         return;
3756 
3757 out:
3758         if (curthread->t_flag & T_WOULDBLOCK) {
3759                 curthread->t_flag &= ~T_WOULDBLOCK;
3760                 resp->status = NFS3ERR_JUKEBOX;
3761         } else {
3762                 resp->status = puterrno3(error);
3763         }
3764 out1:
3765         vattr_to_post_op_attr(vap, &resp->resfail.dir_attributes);
3766 
3767         DTRACE_NFSV3_5(op__readdirplus__done, struct svc_req *, req,
3768             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3769             READDIRPLUS3res *, resp);
3770 
3771         if (vp != NULL) {
3772                 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
3773                 VN_RELE(vp);
3774         }
3775 
3776         if (namlen != NULL)
3777                 kmem_free(namlen, args->dircount);
3778 
3779         vattr_to_post_op_attr(vap, &resp->resfail.dir_attributes);
3780 }
3781 
3782 void *
3783 rfs3_readdirplus_getfh(READDIRPLUS3args *args)
3784 {
3785 
3786         return (&args->dir);
3787 }
3788 
3789 void
3790 rfs3_readdirplus_free(READDIRPLUS3res *resp)
3791 {
3792 
3793         if (resp->status == NFS3_OK) {
3794                 kmem_free(resp->resok.reply.entries, resp->resok.count);
3795                 kmem_free(resp->resok.infop,
3796                     resp->resok.size * sizeof (struct entryplus3_info));
3797         }
3798 }
3799 
3800 /* ARGSUSED */
3801 void
3802 rfs3_fsstat(FSSTAT3args *args, FSSTAT3res *resp, struct exportinfo *exi,
3803     struct svc_req *req, cred_t *cr, bool_t ro)
3804 {
3805         int error;
3806         vnode_t *vp;
3807         struct vattr *vap;
3808         struct vattr va;
3809         struct statvfs64 sb;
3810 
3811         vap = NULL;
3812 
3813         vp = nfs3_fhtovp(&args->fsroot, exi);
3814 
3815         DTRACE_NFSV3_5(op__fsstat__start, struct svc_req *, req,
3816             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3817             FSSTAT3args *, args);
3818 
3819         if (vp == NULL) {
3820                 error = ESTALE;
3821                 goto out;
3822         }
3823 
3824         if (is_system_labeled()) {
3825                 bslabel_t *clabel = req->rq_label;
3826 
3827                 ASSERT(clabel != NULL);
3828                 DTRACE_PROBE2(tx__rfs3__log__info__opfsstat__clabel, char *,
3829                     "got client label from request(1)", struct svc_req *, req);
3830 
3831                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3832                         if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3833                             exi)) {
3834                                 resp->status = NFS3ERR_ACCES;
3835                                 goto out1;
3836                         }
3837                 }
3838         }
3839 
3840         error = VFS_STATVFS(vp->v_vfsp, &sb);
3841 
3842         va.va_mask = AT_ALL;
3843         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3844 
3845         if (error)
3846                 goto out;
3847 
3848         resp->status = NFS3_OK;
3849         vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
3850         if (sb.f_blocks != (fsblkcnt64_t)-1)
3851                 resp->resok.tbytes = (size3)sb.f_frsize * (size3)sb.f_blocks;
3852         else
3853                 resp->resok.tbytes = (size3)sb.f_blocks;
3854         if (sb.f_bfree != (fsblkcnt64_t)-1)
3855                 resp->resok.fbytes = (size3)sb.f_frsize * (size3)sb.f_bfree;
3856         else
3857                 resp->resok.fbytes = (size3)sb.f_bfree;
3858         if (sb.f_bavail != (fsblkcnt64_t)-1)
3859                 resp->resok.abytes = (size3)sb.f_frsize * (size3)sb.f_bavail;
3860         else
3861                 resp->resok.abytes = (size3)sb.f_bavail;
3862         resp->resok.tfiles = (size3)sb.f_files;
3863         resp->resok.ffiles = (size3)sb.f_ffree;
3864         resp->resok.afiles = (size3)sb.f_favail;
3865         resp->resok.invarsec = 0;
3866 
3867         DTRACE_NFSV3_5(op__fsstat__done, struct svc_req *, req,
3868             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3869             FSSTAT3res *, resp);
3870         VN_RELE(vp);
3871 
3872         return;
3873 
3874 out:
3875         if (curthread->t_flag & T_WOULDBLOCK) {
3876                 curthread->t_flag &= ~T_WOULDBLOCK;
3877                 resp->status = NFS3ERR_JUKEBOX;
3878         } else
3879                 resp->status = puterrno3(error);
3880 out1:
3881         DTRACE_NFSV3_5(op__fsstat__done, struct svc_req *, req,
3882             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3883             FSSTAT3res *, resp);
3884 
3885         if (vp != NULL)
3886                 VN_RELE(vp);
3887         vattr_to_post_op_attr(vap, &resp->resfail.obj_attributes);
3888 }
3889 
3890 void *
3891 rfs3_fsstat_getfh(FSSTAT3args *args)
3892 {
3893 
3894         return (&args->fsroot);
3895 }
3896 
3897 /* ARGSUSED */
3898 void
3899 rfs3_fsinfo(FSINFO3args *args, FSINFO3res *resp, struct exportinfo *exi,
3900     struct svc_req *req, cred_t *cr, bool_t ro)
3901 {
3902         vnode_t *vp;
3903         struct vattr *vap;
3904         struct vattr va;
3905         uint32_t xfer_size;
3906         ulong_t l = 0;
3907         int error;
3908 
3909         vp = nfs3_fhtovp(&args->fsroot, exi);
3910 
3911         DTRACE_NFSV3_5(op__fsinfo__start, struct svc_req *, req,
3912             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3913             FSINFO3args *, args);
3914 
3915         if (vp == NULL) {
3916                 if (curthread->t_flag & T_WOULDBLOCK) {
3917                         curthread->t_flag &= ~T_WOULDBLOCK;
3918                         resp->status = NFS3ERR_JUKEBOX;
3919                 } else
3920                         resp->status = NFS3ERR_STALE;
3921                 vattr_to_post_op_attr(NULL, &resp->resfail.obj_attributes);
3922                 goto out;
3923         }
3924 
3925         if (is_system_labeled()) {
3926                 bslabel_t *clabel = req->rq_label;
3927 
3928                 ASSERT(clabel != NULL);
3929                 DTRACE_PROBE2(tx__rfs3__log__info__opfsinfo__clabel, char *,
3930                     "got client label from request(1)", struct svc_req *, req);
3931 
3932                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3933                         if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3934                             exi)) {
3935                                 resp->status = NFS3ERR_STALE;
3936                                 vattr_to_post_op_attr(NULL,
3937                                     &resp->resfail.obj_attributes);
3938                                 goto out;
3939                         }
3940                 }
3941         }
3942 
3943         va.va_mask = AT_ALL;
3944         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3945 
3946         resp->status = NFS3_OK;
3947         vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
3948         xfer_size = rfs3_tsize(req);
3949         resp->resok.rtmax = xfer_size;
3950         resp->resok.rtpref = xfer_size;
3951         resp->resok.rtmult = DEV_BSIZE;
3952         resp->resok.wtmax = xfer_size;
3953         resp->resok.wtpref = xfer_size;
3954         resp->resok.wtmult = DEV_BSIZE;
3955         resp->resok.dtpref = MAXBSIZE;
3956 
3957         /*
3958          * Large file spec: want maxfilesize based on limit of
3959          * underlying filesystem.  We can guess 2^31-1 if need be.
3960          */
3961         error = VOP_PATHCONF(vp, _PC_FILESIZEBITS, &l, cr, NULL);
3962         if (error) {
3963                 resp->status = puterrno3(error);
3964                 goto out;
3965         }
3966 
3967         /*
3968          * If the underlying file system does not support _PC_FILESIZEBITS,
3969          * return a reasonable default. Note that error code on VOP_PATHCONF
3970          * will be 0, even if the underlying file system does not support
3971          * _PC_FILESIZEBITS.
3972          */
3973         if (l == (ulong_t)-1) {
3974                 resp->resok.maxfilesize = MAXOFF32_T;
3975         } else {
3976                 if (l >= (sizeof (uint64_t) * 8))
3977                         resp->resok.maxfilesize = INT64_MAX;
3978                 else
3979                         resp->resok.maxfilesize = (1LL << (l-1)) - 1;
3980         }
3981 
3982         resp->resok.time_delta.seconds = 0;
3983         resp->resok.time_delta.nseconds = 1000;
3984         resp->resok.properties = FSF3_LINK | FSF3_SYMLINK |
3985             FSF3_HOMOGENEOUS | FSF3_CANSETTIME;
3986 
3987         DTRACE_NFSV3_5(op__fsinfo__done, struct svc_req *, req,
3988             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3989             FSINFO3res *, resp);
3990 
3991         VN_RELE(vp);
3992 
3993         return;
3994 
3995 out:
3996         DTRACE_NFSV3_5(op__fsinfo__done, struct svc_req *, req,
3997             cred_t *, cr, vnode_t *, NULL, struct exportinfo *, exi,
3998             FSINFO3res *, resp);
3999         if (vp != NULL)
4000                 VN_RELE(vp);
4001 }
4002 
4003 void *
4004 rfs3_fsinfo_getfh(FSINFO3args *args)
4005 {
4006         return (&args->fsroot);
4007 }
4008 
4009 /* ARGSUSED */
4010 void
4011 rfs3_pathconf(PATHCONF3args *args, PATHCONF3res *resp, struct exportinfo *exi,
4012     struct svc_req *req, cred_t *cr, bool_t ro)
4013 {
4014         int error;
4015         vnode_t *vp;
4016         struct vattr *vap;
4017         struct vattr va;
4018         ulong_t val;
4019 
4020         vap = NULL;
4021 
4022         vp = nfs3_fhtovp(&args->object, exi);
4023 
4024         DTRACE_NFSV3_5(op__pathconf__start, struct svc_req *, req,
4025             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
4026             PATHCONF3args *, args);
4027 
4028         if (vp == NULL) {
4029                 error = ESTALE;
4030                 goto out;
4031         }
4032 
4033         if (is_system_labeled()) {
4034                 bslabel_t *clabel = req->rq_label;
4035 
4036                 ASSERT(clabel != NULL);
4037                 DTRACE_PROBE2(tx__rfs3__log__info__oppathconf__clabel, char *,
4038                     "got client label from request(1)", struct svc_req *, req);
4039 
4040                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
4041                         if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
4042                             exi)) {
4043                                 resp->status = NFS3ERR_ACCES;
4044                                 goto out1;
4045                         }
4046                 }
4047         }
4048 
4049         va.va_mask = AT_ALL;
4050         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
4051 
4052         error = VOP_PATHCONF(vp, _PC_LINK_MAX, &val, cr, NULL);
4053         if (error)
4054                 goto out;
4055         resp->resok.info.link_max = (uint32)val;
4056 
4057         error = VOP_PATHCONF(vp, _PC_NAME_MAX, &val, cr, NULL);
4058         if (error)
4059                 goto out;
4060         resp->resok.info.name_max = (uint32)val;
4061 
4062         error = VOP_PATHCONF(vp, _PC_NO_TRUNC, &val, cr, NULL);
4063         if (error)
4064                 goto out;
4065         if (val == 1)
4066                 resp->resok.info.no_trunc = TRUE;
4067         else
4068                 resp->resok.info.no_trunc = FALSE;
4069 
4070         error = VOP_PATHCONF(vp, _PC_CHOWN_RESTRICTED, &val, cr, NULL);
4071         if (error)
4072                 goto out;
4073         if (val == 1)
4074                 resp->resok.info.chown_restricted = TRUE;
4075         else
4076                 resp->resok.info.chown_restricted = FALSE;
4077 
4078         resp->status = NFS3_OK;
4079         vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
4080         resp->resok.info.case_insensitive = FALSE;
4081         resp->resok.info.case_preserving = TRUE;
4082         DTRACE_NFSV3_5(op__pathconf__done, struct svc_req *, req,
4083             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
4084             PATHCONF3res *, resp);
4085         VN_RELE(vp);
4086         return;
4087 
4088 out:
4089         if (curthread->t_flag & T_WOULDBLOCK) {
4090                 curthread->t_flag &= ~T_WOULDBLOCK;
4091                 resp->status = NFS3ERR_JUKEBOX;
4092         } else
4093                 resp->status = puterrno3(error);
4094 out1:
4095         DTRACE_NFSV3_5(op__pathconf__done, struct svc_req *, req,
4096             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
4097             PATHCONF3res *, resp);
4098         if (vp != NULL)
4099                 VN_RELE(vp);
4100         vattr_to_post_op_attr(vap, &resp->resfail.obj_attributes);
4101 }
4102 
4103 void *
4104 rfs3_pathconf_getfh(PATHCONF3args *args)
4105 {
4106 
4107         return (&args->object);
4108 }
4109 
4110 void
4111 rfs3_commit(COMMIT3args *args, COMMIT3res *resp, struct exportinfo *exi,
4112     struct svc_req *req, cred_t *cr, bool_t ro)
4113 {
4114         nfs3_srv_t *ns;
4115         int error;
4116         vnode_t *vp;
4117         struct vattr *bvap;
4118         struct vattr bva;
4119         struct vattr *avap;
4120         struct vattr ava;
4121 
4122         bvap = NULL;
4123         avap = NULL;
4124 
4125         vp = nfs3_fhtovp(&args->file, exi);
4126 
4127         DTRACE_NFSV3_5(op__commit__start, struct svc_req *, req,
4128             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
4129             COMMIT3args *, args);
4130 
4131         if (vp == NULL) {
4132                 error = ESTALE;
4133                 goto out;
4134         }
4135 
4136         ns = nfs3_get_srv();
4137         bva.va_mask = AT_ALL;
4138         error = VOP_GETATTR(vp, &bva, 0, cr, NULL);
4139 
4140         /*
4141          * If we can't get the attributes, then we can't do the
4142          * right access checking.  So, we'll fail the request.
4143          */
4144         if (error)
4145                 goto out;
4146 
4147         bvap = &bva;
4148 
4149         if (rdonly(ro, vp)) {
4150                 resp->status = NFS3ERR_ROFS;
4151                 goto out1;
4152         }
4153 
4154         if (vp->v_type != VREG) {
4155                 resp->status = NFS3ERR_INVAL;
4156                 goto out1;
4157         }
4158 
4159         if (is_system_labeled()) {
4160                 bslabel_t *clabel = req->rq_label;
4161 
4162                 ASSERT(clabel != NULL);
4163                 DTRACE_PROBE2(tx__rfs3__log__info__opcommit__clabel, char *,
4164                     "got client label from request(1)", struct svc_req *, req);
4165 
4166                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
4167                         if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
4168                             exi)) {
4169                                 resp->status = NFS3ERR_ACCES;
4170                                 goto out1;
4171                         }
4172                 }
4173         }
4174 
4175         if (crgetuid(cr) != bva.va_uid &&
4176             (error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL)))
4177                 goto out;
4178 
4179         error = VOP_FSYNC(vp, FSYNC, cr, NULL);
4180 
4181         ava.va_mask = AT_ALL;
4182         avap = VOP_GETATTR(vp, &ava, 0, cr, NULL) ? NULL : &ava;
4183 
4184         if (error)
4185                 goto out;
4186 
4187         resp->status = NFS3_OK;
4188         vattr_to_wcc_data(bvap, avap, &resp->resok.file_wcc);
4189         resp->resok.verf = ns->write3verf;
4190 
4191         DTRACE_NFSV3_5(op__commit__done, struct svc_req *, req,
4192             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
4193             COMMIT3res *, resp);
4194 
4195         VN_RELE(vp);
4196 
4197         return;
4198 
4199 out:
4200         if (curthread->t_flag & T_WOULDBLOCK) {
4201                 curthread->t_flag &= ~T_WOULDBLOCK;
4202                 resp->status = NFS3ERR_JUKEBOX;
4203         } else
4204                 resp->status = puterrno3(error);
4205 out1:
4206         DTRACE_NFSV3_5(op__commit__done, struct svc_req *, req,
4207             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
4208             COMMIT3res *, resp);
4209 
4210         if (vp != NULL)
4211                 VN_RELE(vp);
4212         vattr_to_wcc_data(bvap, avap, &resp->resfail.file_wcc);
4213 }
4214 
4215 void *
4216 rfs3_commit_getfh(COMMIT3args *args)
4217 {
4218 
4219         return (&args->file);
4220 }
4221 
4222 static int
4223 sattr3_to_vattr(sattr3 *sap, struct vattr *vap)
4224 {
4225 
4226         vap->va_mask = 0;
4227 
4228         if (sap->mode.set_it) {
4229                 vap->va_mode = (mode_t)sap->mode.mode;
4230                 vap->va_mask |= AT_MODE;
4231         }
4232         if (sap->uid.set_it) {
4233                 vap->va_uid = (uid_t)sap->uid.uid;
4234                 vap->va_mask |= AT_UID;
4235         }
4236         if (sap->gid.set_it) {
4237                 vap->va_gid = (gid_t)sap->gid.gid;
4238                 vap->va_mask |= AT_GID;
4239         }
4240         if (sap->size.set_it) {
4241                 if (sap->size.size > (size3)((u_longlong_t)-1))
4242                         return (EINVAL);
4243                 vap->va_size = sap->size.size;
4244                 vap->va_mask |= AT_SIZE;
4245         }
4246         if (sap->atime.set_it == SET_TO_CLIENT_TIME) {
4247 #ifndef _LP64
4248                 /* check time validity */
4249                 if (!NFS3_TIME_OK(sap->atime.atime.seconds))
4250                         return (EOVERFLOW);
4251 #endif
4252                 /*
4253                  * nfs protocol defines times as unsigned so don't extend sign,
4254                  * unless sysadmin set nfs_allow_preepoch_time.
4255                  */
4256                 NFS_TIME_T_CONVERT(vap->va_atime.tv_sec,
4257                     sap->atime.atime.seconds);
4258                 vap->va_atime.tv_nsec = (uint32_t)sap->atime.atime.nseconds;
4259                 vap->va_mask |= AT_ATIME;
4260         } else if (sap->atime.set_it == SET_TO_SERVER_TIME) {
4261                 gethrestime(&vap->va_atime);
4262                 vap->va_mask |= AT_ATIME;
4263         }
4264         if (sap->mtime.set_it == SET_TO_CLIENT_TIME) {
4265 #ifndef _LP64
4266                 /* check time validity */
4267                 if (!NFS3_TIME_OK(sap->mtime.mtime.seconds))
4268                         return (EOVERFLOW);
4269 #endif
4270                 /*
4271                  * nfs protocol defines times as unsigned so don't extend sign,
4272                  * unless sysadmin set nfs_allow_preepoch_time.
4273                  */
4274                 NFS_TIME_T_CONVERT(vap->va_mtime.tv_sec,
4275                     sap->mtime.mtime.seconds);
4276                 vap->va_mtime.tv_nsec = (uint32_t)sap->mtime.mtime.nseconds;
4277                 vap->va_mask |= AT_MTIME;
4278         } else if (sap->mtime.set_it == SET_TO_SERVER_TIME) {
4279                 gethrestime(&vap->va_mtime);
4280                 vap->va_mask |= AT_MTIME;
4281         }
4282 
4283         return (0);
4284 }
4285 
4286 static const ftype3 vt_to_nf3[] = {
4287         0, NF3REG, NF3DIR, NF3BLK, NF3CHR, NF3LNK, NF3FIFO, 0, 0, NF3SOCK, 0
4288 };
4289 
4290 static int
4291 vattr_to_fattr3(struct vattr *vap, fattr3 *fap)
4292 {
4293 
4294         ASSERT(vap->va_type >= VNON && vap->va_type <= VBAD);
4295         /* Return error if time or size overflow */
4296         if (! (NFS_VAP_TIME_OK(vap) && NFS3_SIZE_OK(vap->va_size))) {
4297                 return (EOVERFLOW);
4298         }
4299         fap->type = vt_to_nf3[vap->va_type];
4300         fap->mode = (mode3)(vap->va_mode & MODEMASK);
4301         fap->nlink = (uint32)vap->va_nlink;
4302         if (vap->va_uid == UID_NOBODY)
4303                 fap->uid = (uid3)NFS_UID_NOBODY;
4304         else
4305                 fap->uid = (uid3)vap->va_uid;
4306         if (vap->va_gid == GID_NOBODY)
4307                 fap->gid = (gid3)NFS_GID_NOBODY;
4308         else
4309                 fap->gid = (gid3)vap->va_gid;
4310         fap->size = (size3)vap->va_size;
4311         fap->used = (size3)DEV_BSIZE * (size3)vap->va_nblocks;
4312         fap->rdev.specdata1 = (uint32)getmajor(vap->va_rdev);
4313         fap->rdev.specdata2 = (uint32)getminor(vap->va_rdev);
4314         fap->fsid = (uint64)vap->va_fsid;
4315         fap->fileid = (fileid3)vap->va_nodeid;
4316         fap->atime.seconds = vap->va_atime.tv_sec;
4317         fap->atime.nseconds = vap->va_atime.tv_nsec;
4318         fap->mtime.seconds = vap->va_mtime.tv_sec;
4319         fap->mtime.nseconds = vap->va_mtime.tv_nsec;
4320         fap->ctime.seconds = vap->va_ctime.tv_sec;
4321         fap->ctime.nseconds = vap->va_ctime.tv_nsec;
4322         return (0);
4323 }
4324 
4325 static int
4326 vattr_to_wcc_attr(struct vattr *vap, wcc_attr *wccap)
4327 {
4328 
4329         /* Return error if time or size overflow */
4330         if (!(NFS_TIME_T_OK(vap->va_mtime.tv_sec) &&
4331             NFS_TIME_T_OK(vap->va_ctime.tv_sec) &&
4332             NFS3_SIZE_OK(vap->va_size))) {
4333                 return (EOVERFLOW);
4334         }
4335         wccap->size = (size3)vap->va_size;
4336         wccap->mtime.seconds = vap->va_mtime.tv_sec;
4337         wccap->mtime.nseconds = vap->va_mtime.tv_nsec;
4338         wccap->ctime.seconds = vap->va_ctime.tv_sec;
4339         wccap->ctime.nseconds = vap->va_ctime.tv_nsec;
4340         return (0);
4341 }
4342 
4343 static void
4344 vattr_to_pre_op_attr(struct vattr *vap, pre_op_attr *poap)
4345 {
4346 
4347         /* don't return attrs if time overflow */
4348         if ((vap != NULL) && !vattr_to_wcc_attr(vap, &poap->attr)) {
4349                 poap->attributes = TRUE;
4350         } else
4351                 poap->attributes = FALSE;
4352 }
4353 
4354 void
4355 vattr_to_post_op_attr(struct vattr *vap, post_op_attr *poap)
4356 {
4357 
4358         /* don't return attrs if time overflow */
4359         if ((vap != NULL) && !vattr_to_fattr3(vap, &poap->attr)) {
4360                 poap->attributes = TRUE;
4361         } else
4362                 poap->attributes = FALSE;
4363 }
4364 
4365 static void
4366 vattr_to_wcc_data(struct vattr *bvap, struct vattr *avap, wcc_data *wccp)
4367 {
4368         vattr_to_pre_op_attr(bvap, &wccp->before);
4369         vattr_to_post_op_attr(avap, &wccp->after);
4370 }
4371 
4372 static int
4373 rdma_setup_read_data3(READ3args *args, READ3resok *rok)
4374 {
4375         struct clist    *wcl;
4376         int             wlist_len;
4377         count3          count = rok->count;
4378 
4379         wcl = args->wlist;
4380         if (rdma_setup_read_chunks(wcl, count, &wlist_len) == FALSE)
4381                 return (FALSE);
4382 
4383         wcl = args->wlist;
4384         rok->wlist_len = wlist_len;
4385         rok->wlist = wcl;
4386         return (TRUE);
4387 }
4388 
4389 void
4390 rfs3_srv_zone_init(nfs_globals_t *ng)
4391 {
4392         nfs3_srv_t *ns;
4393         struct rfs3_verf_overlay {
4394                 uint_t id; /* a "unique" identifier */
4395                 int ts; /* a unique timestamp */
4396         } *verfp;
4397         timestruc_t now;
4398 
4399         ns = kmem_zalloc(sizeof (*ns), KM_SLEEP);
4400 
4401         /*
4402          * The following algorithm attempts to find a unique verifier
4403          * to be used as the write verifier returned from the server
4404          * to the client.  It is important that this verifier change
4405          * whenever the server reboots.  Of secondary importance, it
4406          * is important for the verifier to be unique between two
4407          * different servers.
4408          *
4409          * Thus, an attempt is made to use the system hostid and the
4410          * current time in seconds when the nfssrv kernel module is
4411          * loaded.  It is assumed that an NFS server will not be able
4412          * to boot and then to reboot in less than a second.  If the
4413          * hostid has not been set, then the current high resolution
4414          * time is used.  This will ensure different verifiers each
4415          * time the server reboots and minimize the chances that two
4416          * different servers will have the same verifier.
4417          */
4418 
4419 #ifndef lint
4420         /*
4421          * We ASSERT that this constant logic expression is
4422          * always true because in the past, it wasn't.
4423          */
4424         ASSERT(sizeof (*verfp) <= sizeof (ns->write3verf));
4425 #endif
4426 
4427         gethrestime(&now);
4428         verfp = (struct rfs3_verf_overlay *)&ns->write3verf;
4429         verfp->ts = (int)now.tv_sec;
4430         verfp->id = zone_get_hostid(NULL);
4431 
4432         if (verfp->id == 0)
4433                 verfp->id = (uint_t)now.tv_nsec;
4434 
4435         ng->nfs3_srv = ns;
4436 }
4437 
4438 void
4439 rfs3_srv_zone_fini(nfs_globals_t *ng)
4440 {
4441         nfs3_srv_t *ns = ng->nfs3_srv;
4442 
4443         ng->nfs3_srv = NULL;
4444 
4445         kmem_free(ns, sizeof (*ns));
4446 }
4447 
4448 void
4449 rfs3_srvrinit(void)
4450 {
4451         nfs3_srv_caller_id = fs_new_caller_id();
4452 }
4453 
4454 void
4455 rfs3_srvrfini(void)
4456 {
4457         /* Nothing to do */
4458 }