1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright 2015 Nexenta Systems, Inc.  All rights reserved.
  24  * Copyright (c) 1994, 2010, Oracle and/or its affiliates. All rights reserved.
  25  * Copyright (c) 2013 by Delphix. All rights reserved.
  26  */
  27 
  28 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
  29 /* All Rights Reserved */
  30 
  31 #include <sys/param.h>
  32 #include <sys/types.h>
  33 #include <sys/systm.h>
  34 #include <sys/cred.h>
  35 #include <sys/buf.h>
  36 #include <sys/vfs.h>
  37 #include <sys/vnode.h>
  38 #include <sys/uio.h>
  39 #include <sys/errno.h>
  40 #include <sys/sysmacros.h>
  41 #include <sys/statvfs.h>
  42 #include <sys/kmem.h>
  43 #include <sys/dirent.h>
  44 #include <sys/cmn_err.h>
  45 #include <sys/debug.h>
  46 #include <sys/systeminfo.h>
  47 #include <sys/flock.h>
  48 #include <sys/nbmlock.h>
  49 #include <sys/policy.h>
  50 #include <sys/sdt.h>
  51 
  52 #include <rpc/types.h>
  53 #include <rpc/auth.h>
  54 #include <rpc/svc.h>
  55 #include <rpc/rpc_rdma.h>
  56 
  57 #include <nfs/nfs.h>
  58 #include <nfs/export.h>
  59 #include <nfs/nfs_cmd.h>
  60 
  61 #include <sys/strsubr.h>
  62 #include <sys/tsol/label.h>
  63 #include <sys/tsol/tndb.h>
  64 
  65 #include <sys/zone.h>
  66 
  67 #include <inet/ip.h>
  68 #include <inet/ip6.h>
  69 
  70 /*
  71  * These are the interface routines for the server side of the
  72  * Network File System.  See the NFS version 3 protocol specification
  73  * for a description of this interface.
  74  */
  75 
  76 static writeverf3 write3verf;
  77 
  78 static int      sattr3_to_vattr(sattr3 *, struct vattr *);
  79 static int      vattr_to_fattr3(struct vattr *, fattr3 *);
  80 static int      vattr_to_wcc_attr(struct vattr *, wcc_attr *);
  81 static void     vattr_to_pre_op_attr(struct vattr *, pre_op_attr *);
  82 static void     vattr_to_wcc_data(struct vattr *, struct vattr *, wcc_data *);
  83 static int      rdma_setup_read_data3(READ3args *, READ3resok *);
  84 
  85 extern int nfs_loaned_buffers;
  86 
  87 u_longlong_t nfs3_srv_caller_id;
  88 
  89 /* ARGSUSED */
  90 void
  91 rfs3_getattr(GETATTR3args *args, GETATTR3res *resp, struct exportinfo *exi,
  92     struct svc_req *req, cred_t *cr, bool_t ro)
  93 {
  94         int error;
  95         vnode_t *vp;
  96         struct vattr va;
  97 
  98         vp = nfs3_fhtovp(&args->object, exi);
  99 
 100         DTRACE_NFSV3_4(op__getattr__start, struct svc_req *, req,
 101             cred_t *, cr, vnode_t *, vp, GETATTR3args *, args);
 102 
 103         if (vp == NULL) {
 104                 error = ESTALE;
 105                 goto out;
 106         }
 107 
 108         va.va_mask = AT_ALL;
 109         error = rfs4_delegated_getattr(vp, &va, 0, cr);
 110 
 111         if (!error) {
 112                 /* Lie about the object type for a referral */
 113                 if (vn_is_nfs_reparse(vp, cr))
 114                         va.va_type = VLNK;
 115 
 116                 /* overflow error if time or size is out of range */
 117                 error = vattr_to_fattr3(&va, &resp->resok.obj_attributes);
 118                 if (error)
 119                         goto out;
 120                 resp->status = NFS3_OK;
 121 
 122                 DTRACE_NFSV3_4(op__getattr__done, struct svc_req *, req,
 123                     cred_t *, cr, vnode_t *, vp, GETATTR3res *, resp);
 124 
 125                 VN_RELE(vp);
 126 
 127                 return;
 128         }
 129 
 130 out:
 131         if (curthread->t_flag & T_WOULDBLOCK) {
 132                 curthread->t_flag &= ~T_WOULDBLOCK;
 133                 resp->status = NFS3ERR_JUKEBOX;
 134         } else
 135                 resp->status = puterrno3(error);
 136 
 137         DTRACE_NFSV3_4(op__getattr__done, struct svc_req *, req,
 138             cred_t *, cr, vnode_t *, vp, GETATTR3res *, resp);
 139 
 140         if (vp != NULL)
 141                 VN_RELE(vp);
 142 }
 143 
 144 void *
 145 rfs3_getattr_getfh(GETATTR3args *args)
 146 {
 147 
 148         return (&args->object);
 149 }
 150 
 151 void
 152 rfs3_setattr(SETATTR3args *args, SETATTR3res *resp, struct exportinfo *exi,
 153     struct svc_req *req, cred_t *cr, bool_t ro)
 154 {
 155         int error;
 156         vnode_t *vp;
 157         struct vattr *bvap;
 158         struct vattr bva;
 159         struct vattr *avap;
 160         struct vattr ava;
 161         int flag;
 162         int in_crit = 0;
 163         struct flock64 bf;
 164         caller_context_t ct;
 165 
 166         bvap = NULL;
 167         avap = NULL;
 168 
 169         vp = nfs3_fhtovp(&args->object, exi);
 170 
 171         DTRACE_NFSV3_4(op__setattr__start, struct svc_req *, req,
 172             cred_t *, cr, vnode_t *, vp, SETATTR3args *, args);
 173 
 174         if (vp == NULL) {
 175                 error = ESTALE;
 176                 goto out;
 177         }
 178 
 179         error = sattr3_to_vattr(&args->new_attributes, &ava);
 180         if (error)
 181                 goto out;
 182 
 183         if (is_system_labeled()) {
 184                 bslabel_t *clabel = req->rq_label;
 185 
 186                 ASSERT(clabel != NULL);
 187                 DTRACE_PROBE2(tx__rfs3__log__info__opsetattr__clabel, char *,
 188                     "got client label from request(1)", struct svc_req *, req);
 189 
 190                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
 191                         if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
 192                             exi)) {
 193                                 resp->status = NFS3ERR_ACCES;
 194                                 goto out1;
 195                         }
 196                 }
 197         }
 198 
 199         /*
 200          * We need to specially handle size changes because of
 201          * possible conflicting NBMAND locks. Get into critical
 202          * region before VOP_GETATTR, so the size attribute is
 203          * valid when checking conflicts.
 204          *
 205          * Also, check to see if the v4 side of the server has
 206          * delegated this file.  If so, then we return JUKEBOX to
 207          * allow the client to retrasmit its request.
 208          */
 209         if (vp->v_type == VREG && (ava.va_mask & AT_SIZE)) {
 210                 if (nbl_need_check(vp)) {
 211                         nbl_start_crit(vp, RW_READER);
 212                         in_crit = 1;
 213                 }
 214         }
 215 
 216         bva.va_mask = AT_ALL;
 217         error = rfs4_delegated_getattr(vp, &bva, 0, cr);
 218 
 219         /*
 220          * If we can't get the attributes, then we can't do the
 221          * right access checking.  So, we'll fail the request.
 222          */
 223         if (error)
 224                 goto out;
 225 
 226         bvap = &bva;
 227 
 228         if (rdonly(ro, vp)) {
 229                 resp->status = NFS3ERR_ROFS;
 230                 goto out1;
 231         }
 232 
 233         if (args->guard.check &&
 234             (args->guard.obj_ctime.seconds != bva.va_ctime.tv_sec ||
 235             args->guard.obj_ctime.nseconds != bva.va_ctime.tv_nsec)) {
 236                 resp->status = NFS3ERR_NOT_SYNC;
 237                 goto out1;
 238         }
 239 
 240         if (args->new_attributes.mtime.set_it == SET_TO_CLIENT_TIME)
 241                 flag = ATTR_UTIME;
 242         else
 243                 flag = 0;
 244 
 245         /*
 246          * If the filesystem is exported with nosuid, then mask off
 247          * the setuid and setgid bits.
 248          */
 249         if ((ava.va_mask & AT_MODE) && vp->v_type == VREG &&
 250             (exi->exi_export.ex_flags & EX_NOSUID))
 251                 ava.va_mode &= ~(VSUID | VSGID);
 252 
 253         ct.cc_sysid = 0;
 254         ct.cc_pid = 0;
 255         ct.cc_caller_id = nfs3_srv_caller_id;
 256         ct.cc_flags = CC_DONTBLOCK;
 257 
 258         /*
 259          * We need to specially handle size changes because it is
 260          * possible for the client to create a file with modes
 261          * which indicate read-only, but with the file opened for
 262          * writing.  If the client then tries to set the size of
 263          * the file, then the normal access checking done in
 264          * VOP_SETATTR would prevent the client from doing so,
 265          * although it should be legal for it to do so.  To get
 266          * around this, we do the access checking for ourselves
 267          * and then use VOP_SPACE which doesn't do the access
 268          * checking which VOP_SETATTR does. VOP_SPACE can only
 269          * operate on VREG files, let VOP_SETATTR handle the other
 270          * extremely rare cases.
 271          * Also the client should not be allowed to change the
 272          * size of the file if there is a conflicting non-blocking
 273          * mandatory lock in the region the change.
 274          */
 275         if (vp->v_type == VREG && (ava.va_mask & AT_SIZE)) {
 276                 if (in_crit) {
 277                         u_offset_t offset;
 278                         ssize_t length;
 279 
 280                         if (ava.va_size < bva.va_size) {
 281                                 offset = ava.va_size;
 282                                 length = bva.va_size - ava.va_size;
 283                         } else {
 284                                 offset = bva.va_size;
 285                                 length = ava.va_size - bva.va_size;
 286                         }
 287                         if (nbl_conflict(vp, NBL_WRITE, offset, length, 0,
 288                             NULL)) {
 289                                 error = EACCES;
 290                                 goto out;
 291                         }
 292                 }
 293 
 294                 if (crgetuid(cr) == bva.va_uid && ava.va_size != bva.va_size) {
 295                         ava.va_mask &= ~AT_SIZE;
 296                         bf.l_type = F_WRLCK;
 297                         bf.l_whence = 0;
 298                         bf.l_start = (off64_t)ava.va_size;
 299                         bf.l_len = 0;
 300                         bf.l_sysid = 0;
 301                         bf.l_pid = 0;
 302                         error = VOP_SPACE(vp, F_FREESP, &bf, FWRITE,
 303                             (offset_t)ava.va_size, cr, &ct);
 304                 }
 305         }
 306 
 307         if (!error && ava.va_mask)
 308                 error = VOP_SETATTR(vp, &ava, flag, cr, &ct);
 309 
 310         /* check if a monitor detected a delegation conflict */
 311         if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
 312                 resp->status = NFS3ERR_JUKEBOX;
 313                 goto out1;
 314         }
 315 
 316         ava.va_mask = AT_ALL;
 317         avap = rfs4_delegated_getattr(vp, &ava, 0, cr) ? NULL : &ava;
 318 
 319         /*
 320          * Force modified metadata out to stable storage.
 321          */
 322         (void) VOP_FSYNC(vp, FNODSYNC, cr, &ct);
 323 
 324         if (error)
 325                 goto out;
 326 
 327         if (in_crit)
 328                 nbl_end_crit(vp);
 329 
 330         resp->status = NFS3_OK;
 331         vattr_to_wcc_data(bvap, avap, &resp->resok.obj_wcc);
 332 
 333         DTRACE_NFSV3_4(op__setattr__done, struct svc_req *, req,
 334             cred_t *, cr, vnode_t *, vp, SETATTR3res *, resp);
 335 
 336         VN_RELE(vp);
 337 
 338         return;
 339 
 340 out:
 341         if (curthread->t_flag & T_WOULDBLOCK) {
 342                 curthread->t_flag &= ~T_WOULDBLOCK;
 343                 resp->status = NFS3ERR_JUKEBOX;
 344         } else
 345                 resp->status = puterrno3(error);
 346 out1:
 347         DTRACE_NFSV3_4(op__setattr__done, struct svc_req *, req,
 348             cred_t *, cr, vnode_t *, vp, SETATTR3res *, resp);
 349 
 350         if (vp != NULL) {
 351                 if (in_crit)
 352                         nbl_end_crit(vp);
 353                 VN_RELE(vp);
 354         }
 355         vattr_to_wcc_data(bvap, avap, &resp->resfail.obj_wcc);
 356 }
 357 
 358 void *
 359 rfs3_setattr_getfh(SETATTR3args *args)
 360 {
 361 
 362         return (&args->object);
 363 }
 364 
 365 /* ARGSUSED */
 366 void
 367 rfs3_lookup(LOOKUP3args *args, LOOKUP3res *resp, struct exportinfo *exi,
 368     struct svc_req *req, cred_t *cr, bool_t ro)
 369 {
 370         int error;
 371         vnode_t *vp;
 372         vnode_t *dvp;
 373         struct vattr *vap;
 374         struct vattr va;
 375         struct vattr *dvap;
 376         struct vattr dva;
 377         nfs_fh3 *fhp;
 378         struct sec_ol sec = {0, 0};
 379         bool_t publicfh_flag = FALSE, auth_weak = FALSE;
 380         struct sockaddr *ca;
 381         char *name = NULL;
 382 
 383         dvap = NULL;
 384 
 385         if (exi != NULL)
 386                 exi_hold(exi);
 387 
 388         /*
 389          * Allow lookups from the root - the default
 390          * location of the public filehandle.
 391          */
 392         if (exi != NULL && (exi->exi_export.ex_flags & EX_PUBLIC)) {
 393                 dvp = rootdir;
 394                 VN_HOLD(dvp);
 395 
 396                 DTRACE_NFSV3_4(op__lookup__start, struct svc_req *, req,
 397                     cred_t *, cr, vnode_t *, dvp, LOOKUP3args *, args);
 398         } else {
 399                 dvp = nfs3_fhtovp(&args->what.dir, exi);
 400 
 401                 DTRACE_NFSV3_4(op__lookup__start, struct svc_req *, req,
 402                     cred_t *, cr, vnode_t *, dvp, LOOKUP3args *, args);
 403 
 404                 if (dvp == NULL) {
 405                         error = ESTALE;
 406                         goto out;
 407                 }
 408         }
 409 
 410         dva.va_mask = AT_ALL;
 411         dvap = VOP_GETATTR(dvp, &dva, 0, cr, NULL) ? NULL : &dva;
 412 
 413         if (args->what.name == nfs3nametoolong) {
 414                 resp->status = NFS3ERR_NAMETOOLONG;
 415                 goto out1;
 416         }
 417 
 418         if (args->what.name == NULL || *(args->what.name) == '\0') {
 419                 resp->status = NFS3ERR_ACCES;
 420                 goto out1;
 421         }
 422 
 423         fhp = &args->what.dir;
 424         if (strcmp(args->what.name, "..") == 0 &&
 425             EQFID(&exi->exi_fid, FH3TOFIDP(fhp))) {
 426                 if ((exi->exi_export.ex_flags & EX_NOHIDE) &&
 427                     (dvp->v_flag & VROOT)) {
 428                         /*
 429                          * special case for ".." and 'nohide'exported root
 430                          */
 431                         if (rfs_climb_crossmnt(&dvp, &exi, cr) != 0) {
 432                                 resp->status = NFS3ERR_ACCES;
 433                                 goto out1;
 434                         }
 435                 } else {
 436                         resp->status = NFS3ERR_NOENT;
 437                         goto out1;
 438                 }
 439         }
 440 
 441         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
 442         name = nfscmd_convname(ca, exi, args->what.name,
 443             NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
 444 
 445         if (name == NULL) {
 446                 resp->status = NFS3ERR_ACCES;
 447                 goto out1;
 448         }
 449 
 450         /*
 451          * If the public filehandle is used then allow
 452          * a multi-component lookup
 453          */
 454         if (PUBLIC_FH3(&args->what.dir)) {
 455                 publicfh_flag = TRUE;
 456 
 457                 exi_rele(exi);
 458 
 459                 error = rfs_publicfh_mclookup(name, dvp, cr, &vp,
 460                     &exi, &sec);
 461 
 462                 /*
 463                  * Since WebNFS may bypass MOUNT, we need to ensure this
 464                  * request didn't come from an unlabeled admin_low client.
 465                  */
 466                 if (is_system_labeled() && error == 0) {
 467                         int             addr_type;
 468                         void            *ipaddr;
 469                         tsol_tpc_t      *tp;
 470 
 471                         if (ca->sa_family == AF_INET) {
 472                                 addr_type = IPV4_VERSION;
 473                                 ipaddr = &((struct sockaddr_in *)ca)->sin_addr;
 474                         } else if (ca->sa_family == AF_INET6) {
 475                                 addr_type = IPV6_VERSION;
 476                                 ipaddr = &((struct sockaddr_in6 *)
 477                                     ca)->sin6_addr;
 478                         }
 479                         tp = find_tpc(ipaddr, addr_type, B_FALSE);
 480                         if (tp == NULL || tp->tpc_tp.tp_doi !=
 481                             l_admin_low->tsl_doi || tp->tpc_tp.host_type !=
 482                             SUN_CIPSO) {
 483                                 VN_RELE(vp);
 484                                 error = EACCES;
 485                         }
 486                         if (tp != NULL)
 487                                 TPC_RELE(tp);
 488                 }
 489         } else {
 490                 error = VOP_LOOKUP(dvp, name, &vp,
 491                     NULL, 0, NULL, cr, NULL, NULL, NULL);
 492         }
 493 
 494         if (name != args->what.name)
 495                 kmem_free(name, MAXPATHLEN + 1);
 496 
 497         if (error == 0 && vn_ismntpt(vp)) {
 498                 error = rfs_cross_mnt(&vp, &exi);
 499                 if (error)
 500                         VN_RELE(vp);
 501         }
 502 
 503         if (is_system_labeled() && error == 0) {
 504                 bslabel_t *clabel = req->rq_label;
 505 
 506                 ASSERT(clabel != NULL);
 507                 DTRACE_PROBE2(tx__rfs3__log__info__oplookup__clabel, char *,
 508                     "got client label from request(1)", struct svc_req *, req);
 509 
 510                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
 511                         if (!do_rfs_label_check(clabel, dvp,
 512                             DOMINANCE_CHECK, exi)) {
 513                                 VN_RELE(vp);
 514                                 error = EACCES;
 515                         }
 516                 }
 517         }
 518 
 519         dva.va_mask = AT_ALL;
 520         dvap = VOP_GETATTR(dvp, &dva, 0, cr, NULL) ? NULL : &dva;
 521 
 522         if (error)
 523                 goto out;
 524 
 525         if (sec.sec_flags & SEC_QUERY) {
 526                 error = makefh3_ol(&resp->resok.object, exi, sec.sec_index);
 527         } else {
 528                 error = makefh3(&resp->resok.object, vp, exi);
 529                 if (!error && publicfh_flag && !chk_clnt_sec(exi, req))
 530                         auth_weak = TRUE;
 531         }
 532 
 533         if (error) {
 534                 VN_RELE(vp);
 535                 goto out;
 536         }
 537 
 538         va.va_mask = AT_ALL;
 539         vap = rfs4_delegated_getattr(vp, &va, 0, cr) ? NULL : &va;
 540 
 541         exi_rele(exi);
 542         VN_RELE(vp);
 543 
 544         resp->status = NFS3_OK;
 545         vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
 546         vattr_to_post_op_attr(dvap, &resp->resok.dir_attributes);
 547 
 548         /*
 549          * If it's public fh, no 0x81, and client's flavor is
 550          * invalid, set WebNFS status to WNFSERR_CLNT_FLAVOR now.
 551          * Then set RPC status to AUTH_TOOWEAK in common_dispatch.
 552          */
 553         if (auth_weak)
 554                 resp->status = (enum nfsstat3)WNFSERR_CLNT_FLAVOR;
 555 
 556         DTRACE_NFSV3_4(op__lookup__done, struct svc_req *, req,
 557             cred_t *, cr, vnode_t *, dvp, LOOKUP3res *, resp);
 558         VN_RELE(dvp);
 559 
 560         return;
 561 
 562 out:
 563         if (curthread->t_flag & T_WOULDBLOCK) {
 564                 curthread->t_flag &= ~T_WOULDBLOCK;
 565                 resp->status = NFS3ERR_JUKEBOX;
 566         } else
 567                 resp->status = puterrno3(error);
 568 out1:
 569         if (exi != NULL)
 570                 exi_rele(exi);
 571 
 572         DTRACE_NFSV3_4(op__lookup__done, struct svc_req *, req,
 573             cred_t *, cr, vnode_t *, dvp, LOOKUP3res *, resp);
 574 
 575         if (dvp != NULL)
 576                 VN_RELE(dvp);
 577         vattr_to_post_op_attr(dvap, &resp->resfail.dir_attributes);
 578 
 579 }
 580 
 581 void *
 582 rfs3_lookup_getfh(LOOKUP3args *args)
 583 {
 584 
 585         return (&args->what.dir);
 586 }
 587 
 588 /* ARGSUSED */
 589 void
 590 rfs3_access(ACCESS3args *args, ACCESS3res *resp, struct exportinfo *exi,
 591     struct svc_req *req, cred_t *cr, bool_t ro)
 592 {
 593         int error;
 594         vnode_t *vp;
 595         struct vattr *vap;
 596         struct vattr va;
 597         int checkwriteperm;
 598         boolean_t dominant_label = B_FALSE;
 599         boolean_t equal_label = B_FALSE;
 600         boolean_t admin_low_client;
 601 
 602         vap = NULL;
 603 
 604         vp = nfs3_fhtovp(&args->object, exi);
 605 
 606         DTRACE_NFSV3_4(op__access__start, struct svc_req *, req,
 607             cred_t *, cr, vnode_t *, vp, ACCESS3args *, args);
 608 
 609         if (vp == NULL) {
 610                 error = ESTALE;
 611                 goto out;
 612         }
 613 
 614         /*
 615          * If the file system is exported read only, it is not appropriate
 616          * to check write permissions for regular files and directories.
 617          * Special files are interpreted by the client, so the underlying
 618          * permissions are sent back to the client for interpretation.
 619          */
 620         if (rdonly(ro, vp) && (vp->v_type == VREG || vp->v_type == VDIR))
 621                 checkwriteperm = 0;
 622         else
 623                 checkwriteperm = 1;
 624 
 625         /*
 626          * We need the mode so that we can correctly determine access
 627          * permissions relative to a mandatory lock file.  Access to
 628          * mandatory lock files is denied on the server, so it might
 629          * as well be reflected to the server during the open.
 630          */
 631         va.va_mask = AT_MODE;
 632         error = VOP_GETATTR(vp, &va, 0, cr, NULL);
 633         if (error)
 634                 goto out;
 635 
 636         vap = &va;
 637 
 638         resp->resok.access = 0;
 639 
 640         if (is_system_labeled()) {
 641                 bslabel_t *clabel = req->rq_label;
 642 
 643                 ASSERT(clabel != NULL);
 644                 DTRACE_PROBE2(tx__rfs3__log__info__opaccess__clabel, char *,
 645                     "got client label from request(1)", struct svc_req *, req);
 646 
 647                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
 648                         if ((equal_label = do_rfs_label_check(clabel, vp,
 649                             EQUALITY_CHECK, exi)) == B_FALSE) {
 650                                 dominant_label = do_rfs_label_check(clabel,
 651                                     vp, DOMINANCE_CHECK, exi);
 652                         } else
 653                                 dominant_label = B_TRUE;
 654                         admin_low_client = B_FALSE;
 655                 } else
 656                         admin_low_client = B_TRUE;
 657         }
 658 
 659         if (args->access & ACCESS3_READ) {
 660                 error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
 661                 if (error) {
 662                         if (curthread->t_flag & T_WOULDBLOCK)
 663                                 goto out;
 664                 } else if (!MANDLOCK(vp, va.va_mode) &&
 665                     (!is_system_labeled() || admin_low_client ||
 666                     dominant_label))
 667                         resp->resok.access |= ACCESS3_READ;
 668         }
 669         if ((args->access & ACCESS3_LOOKUP) && vp->v_type == VDIR) {
 670                 error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL);
 671                 if (error) {
 672                         if (curthread->t_flag & T_WOULDBLOCK)
 673                                 goto out;
 674                 } else if (!is_system_labeled() || admin_low_client ||
 675                     dominant_label)
 676                         resp->resok.access |= ACCESS3_LOOKUP;
 677         }
 678         if (checkwriteperm &&
 679             (args->access & (ACCESS3_MODIFY|ACCESS3_EXTEND))) {
 680                 error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
 681                 if (error) {
 682                         if (curthread->t_flag & T_WOULDBLOCK)
 683                                 goto out;
 684                 } else if (!MANDLOCK(vp, va.va_mode) &&
 685                     (!is_system_labeled() || admin_low_client || equal_label)) {
 686                         resp->resok.access |=
 687                             (args->access & (ACCESS3_MODIFY|ACCESS3_EXTEND));
 688                 }
 689         }
 690         if (checkwriteperm &&
 691             (args->access & ACCESS3_DELETE) && vp->v_type == VDIR) {
 692                 error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
 693                 if (error) {
 694                         if (curthread->t_flag & T_WOULDBLOCK)
 695                                 goto out;
 696                 } else if (!is_system_labeled() || admin_low_client ||
 697                     equal_label)
 698                         resp->resok.access |= ACCESS3_DELETE;
 699         }
 700         if (args->access & ACCESS3_EXECUTE) {
 701                 error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL);
 702                 if (error) {
 703                         if (curthread->t_flag & T_WOULDBLOCK)
 704                                 goto out;
 705                 } else if (!MANDLOCK(vp, va.va_mode) &&
 706                     (!is_system_labeled() || admin_low_client ||
 707                     dominant_label))
 708                         resp->resok.access |= ACCESS3_EXECUTE;
 709         }
 710 
 711         va.va_mask = AT_ALL;
 712         vap = rfs4_delegated_getattr(vp, &va, 0, cr) ? NULL : &va;
 713 
 714         resp->status = NFS3_OK;
 715         vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
 716 
 717         DTRACE_NFSV3_4(op__access__done, struct svc_req *, req,
 718             cred_t *, cr, vnode_t *, vp, ACCESS3res *, resp);
 719 
 720         VN_RELE(vp);
 721 
 722         return;
 723 
 724 out:
 725         if (curthread->t_flag & T_WOULDBLOCK) {
 726                 curthread->t_flag &= ~T_WOULDBLOCK;
 727                 resp->status = NFS3ERR_JUKEBOX;
 728         } else
 729                 resp->status = puterrno3(error);
 730         DTRACE_NFSV3_4(op__access__done, struct svc_req *, req,
 731             cred_t *, cr, vnode_t *, vp, ACCESS3res *, resp);
 732         if (vp != NULL)
 733                 VN_RELE(vp);
 734         vattr_to_post_op_attr(vap, &resp->resfail.obj_attributes);
 735 }
 736 
 737 void *
 738 rfs3_access_getfh(ACCESS3args *args)
 739 {
 740 
 741         return (&args->object);
 742 }
 743 
 744 /* ARGSUSED */
 745 void
 746 rfs3_readlink(READLINK3args *args, READLINK3res *resp, struct exportinfo *exi,
 747     struct svc_req *req, cred_t *cr, bool_t ro)
 748 {
 749         int error;
 750         vnode_t *vp;
 751         struct vattr *vap;
 752         struct vattr va;
 753         struct iovec iov;
 754         struct uio uio;
 755         char *data;
 756         struct sockaddr *ca;
 757         char *name = NULL;
 758         int is_referral = 0;
 759 
 760         vap = NULL;
 761 
 762         vp = nfs3_fhtovp(&args->symlink, exi);
 763 
 764         DTRACE_NFSV3_4(op__readlink__start, struct svc_req *, req,
 765             cred_t *, cr, vnode_t *, vp, READLINK3args *, args);
 766 
 767         if (vp == NULL) {
 768                 error = ESTALE;
 769                 goto out;
 770         }
 771 
 772         va.va_mask = AT_ALL;
 773         error = VOP_GETATTR(vp, &va, 0, cr, NULL);
 774         if (error)
 775                 goto out;
 776 
 777         vap = &va;
 778 
 779         /* We lied about the object type for a referral */
 780         if (vn_is_nfs_reparse(vp, cr))
 781                 is_referral = 1;
 782 
 783         if (vp->v_type != VLNK && !is_referral) {
 784                 resp->status = NFS3ERR_INVAL;
 785                 goto out1;
 786         }
 787 
 788         if (MANDLOCK(vp, va.va_mode)) {
 789                 resp->status = NFS3ERR_ACCES;
 790                 goto out1;
 791         }
 792 
 793         if (is_system_labeled()) {
 794                 bslabel_t *clabel = req->rq_label;
 795 
 796                 ASSERT(clabel != NULL);
 797                 DTRACE_PROBE2(tx__rfs3__log__info__opreadlink__clabel, char *,
 798                     "got client label from request(1)", struct svc_req *, req);
 799 
 800                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
 801                         if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
 802                             exi)) {
 803                                 resp->status = NFS3ERR_ACCES;
 804                                 goto out1;
 805                         }
 806                 }
 807         }
 808 
 809         data = kmem_alloc(MAXPATHLEN + 1, KM_SLEEP);
 810 
 811         if (is_referral) {
 812                 char *s;
 813                 size_t strsz;
 814 
 815                 /* Get an artificial symlink based on a referral */
 816                 s = build_symlink(vp, cr, &strsz);
 817                 global_svstat_ptr[3][NFS_REFERLINKS].value.ui64++;
 818                 DTRACE_PROBE2(nfs3serv__func__referral__reflink,
 819                     vnode_t *, vp, char *, s);
 820                 if (s == NULL)
 821                         error = EINVAL;
 822                 else {
 823                         error = 0;
 824                         (void) strlcpy(data, s, MAXPATHLEN + 1);
 825                         kmem_free(s, strsz);
 826                 }
 827 
 828         } else {
 829 
 830                 iov.iov_base = data;
 831                 iov.iov_len = MAXPATHLEN;
 832                 uio.uio_iov = &iov;
 833                 uio.uio_iovcnt = 1;
 834                 uio.uio_segflg = UIO_SYSSPACE;
 835                 uio.uio_extflg = UIO_COPY_CACHED;
 836                 uio.uio_loffset = 0;
 837                 uio.uio_resid = MAXPATHLEN;
 838 
 839                 error = VOP_READLINK(vp, &uio, cr, NULL);
 840 
 841                 if (!error)
 842                         *(data + MAXPATHLEN - uio.uio_resid) = '\0';
 843         }
 844 
 845         va.va_mask = AT_ALL;
 846         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
 847 
 848         /* Lie about object type again just to be consistent */
 849         if (is_referral && vap != NULL)
 850                 vap->va_type = VLNK;
 851 
 852 #if 0 /* notyet */
 853         /*
 854          * Don't do this.  It causes local disk writes when just
 855          * reading the file and the overhead is deemed larger
 856          * than the benefit.
 857          */
 858         /*
 859          * Force modified metadata out to stable storage.
 860          */
 861         (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
 862 #endif
 863 
 864         if (error) {
 865                 kmem_free(data, MAXPATHLEN + 1);
 866                 goto out;
 867         }
 868 
 869         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
 870         name = nfscmd_convname(ca, exi, data, NFSCMD_CONV_OUTBOUND,
 871             MAXPATHLEN + 1);
 872 
 873         if (name == NULL) {
 874                 /*
 875                  * Even though the conversion failed, we return
 876                  * something. We just don't translate it.
 877                  */
 878                 name = data;
 879         }
 880 
 881         resp->status = NFS3_OK;
 882         vattr_to_post_op_attr(vap, &resp->resok.symlink_attributes);
 883         resp->resok.data = name;
 884 
 885         DTRACE_NFSV3_4(op__readlink__done, struct svc_req *, req,
 886             cred_t *, cr, vnode_t *, vp, READLINK3res *, resp);
 887         VN_RELE(vp);
 888 
 889         if (name != data)
 890                 kmem_free(data, MAXPATHLEN + 1);
 891 
 892         return;
 893 
 894 out:
 895         if (curthread->t_flag & T_WOULDBLOCK) {
 896                 curthread->t_flag &= ~T_WOULDBLOCK;
 897                 resp->status = NFS3ERR_JUKEBOX;
 898         } else
 899                 resp->status = puterrno3(error);
 900 out1:
 901         DTRACE_NFSV3_4(op__readlink__done, struct svc_req *, req,
 902             cred_t *, cr, vnode_t *, vp, READLINK3res *, resp);
 903         if (vp != NULL)
 904                 VN_RELE(vp);
 905         vattr_to_post_op_attr(vap, &resp->resfail.symlink_attributes);
 906 }
 907 
 908 void *
 909 rfs3_readlink_getfh(READLINK3args *args)
 910 {
 911 
 912         return (&args->symlink);
 913 }
 914 
 915 void
 916 rfs3_readlink_free(READLINK3res *resp)
 917 {
 918 
 919         if (resp->status == NFS3_OK)
 920                 kmem_free(resp->resok.data, MAXPATHLEN + 1);
 921 }
 922 
 923 /*
 924  * Server routine to handle read
 925  * May handle RDMA data as well as mblks
 926  */
 927 /* ARGSUSED */
 928 void
 929 rfs3_read(READ3args *args, READ3res *resp, struct exportinfo *exi,
 930     struct svc_req *req, cred_t *cr, bool_t ro)
 931 {
 932         int error;
 933         vnode_t *vp;
 934         struct vattr *vap;
 935         struct vattr va;
 936         struct iovec iov, *iovp = NULL;
 937         int iovcnt;
 938         struct uio uio;
 939         u_offset_t offset;
 940         mblk_t *mp = NULL;
 941         int in_crit = 0;
 942         int need_rwunlock = 0;
 943         caller_context_t ct;
 944         int rdma_used = 0;
 945         int loaned_buffers;
 946         struct uio *uiop;
 947 
 948         vap = NULL;
 949 
 950         vp = nfs3_fhtovp(&args->file, exi);
 951 
 952         DTRACE_NFSV3_4(op__read__start, struct svc_req *, req,
 953             cred_t *, cr, vnode_t *, vp, READ3args *, args);
 954 
 955         if (vp == NULL) {
 956                 error = ESTALE;
 957                 goto out;
 958         }
 959 
 960         if (args->wlist) {
 961                 if (args->count > clist_len(args->wlist)) {
 962                         error = EINVAL;
 963                         goto out;
 964                 }
 965                 rdma_used = 1;
 966         }
 967 
 968         /* use loaned buffers for TCP */
 969         loaned_buffers = (nfs_loaned_buffers && !rdma_used) ? 1 : 0;
 970 
 971         if (is_system_labeled()) {
 972                 bslabel_t *clabel = req->rq_label;
 973 
 974                 ASSERT(clabel != NULL);
 975                 DTRACE_PROBE2(tx__rfs3__log__info__opread__clabel, char *,
 976                     "got client label from request(1)", struct svc_req *, req);
 977 
 978                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
 979                         if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
 980                             exi)) {
 981                                 resp->status = NFS3ERR_ACCES;
 982                                 goto out1;
 983                         }
 984                 }
 985         }
 986 
 987         ct.cc_sysid = 0;
 988         ct.cc_pid = 0;
 989         ct.cc_caller_id = nfs3_srv_caller_id;
 990         ct.cc_flags = CC_DONTBLOCK;
 991 
 992         /*
 993          * Enter the critical region before calling VOP_RWLOCK
 994          * to avoid a deadlock with write requests.
 995          */
 996         if (nbl_need_check(vp)) {
 997                 nbl_start_crit(vp, RW_READER);
 998                 in_crit = 1;
 999                 if (nbl_conflict(vp, NBL_READ, args->offset, args->count, 0,
1000                     NULL)) {
1001                         error = EACCES;
1002                         goto out;
1003                 }
1004         }
1005 
1006         error = VOP_RWLOCK(vp, V_WRITELOCK_FALSE, &ct);
1007 
1008         /* check if a monitor detected a delegation conflict */
1009         if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1010                 resp->status = NFS3ERR_JUKEBOX;
1011                 goto out1;
1012         }
1013 
1014         need_rwunlock = 1;
1015 
1016         va.va_mask = AT_ALL;
1017         error = VOP_GETATTR(vp, &va, 0, cr, &ct);
1018 
1019         /*
1020          * If we can't get the attributes, then we can't do the
1021          * right access checking.  So, we'll fail the request.
1022          */
1023         if (error)
1024                 goto out;
1025 
1026         vap = &va;
1027 
1028         if (vp->v_type != VREG) {
1029                 resp->status = NFS3ERR_INVAL;
1030                 goto out1;
1031         }
1032 
1033         if (crgetuid(cr) != va.va_uid) {
1034                 error = VOP_ACCESS(vp, VREAD, 0, cr, &ct);
1035                 if (error) {
1036                         if (curthread->t_flag & T_WOULDBLOCK)
1037                                 goto out;
1038                         error = VOP_ACCESS(vp, VEXEC, 0, cr, &ct);
1039                         if (error)
1040                                 goto out;
1041                 }
1042         }
1043 
1044         if (MANDLOCK(vp, va.va_mode)) {
1045                 resp->status = NFS3ERR_ACCES;
1046                 goto out1;
1047         }
1048 
1049         offset = args->offset;
1050         if (offset >= va.va_size) {
1051                 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
1052                 if (in_crit)
1053                         nbl_end_crit(vp);
1054                 resp->status = NFS3_OK;
1055                 vattr_to_post_op_attr(vap, &resp->resok.file_attributes);
1056                 resp->resok.count = 0;
1057                 resp->resok.eof = TRUE;
1058                 resp->resok.data.data_len = 0;
1059                 resp->resok.data.data_val = NULL;
1060                 resp->resok.data.mp = NULL;
1061                 /* RDMA */
1062                 resp->resok.wlist = args->wlist;
1063                 resp->resok.wlist_len = resp->resok.count;
1064                 if (resp->resok.wlist)
1065                         clist_zero_len(resp->resok.wlist);
1066                 goto done;
1067         }
1068 
1069         if (args->count == 0) {
1070                 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
1071                 if (in_crit)
1072                         nbl_end_crit(vp);
1073                 resp->status = NFS3_OK;
1074                 vattr_to_post_op_attr(vap, &resp->resok.file_attributes);
1075                 resp->resok.count = 0;
1076                 resp->resok.eof = FALSE;
1077                 resp->resok.data.data_len = 0;
1078                 resp->resok.data.data_val = NULL;
1079                 resp->resok.data.mp = NULL;
1080                 /* RDMA */
1081                 resp->resok.wlist = args->wlist;
1082                 resp->resok.wlist_len = resp->resok.count;
1083                 if (resp->resok.wlist)
1084                         clist_zero_len(resp->resok.wlist);
1085                 goto done;
1086         }
1087 
1088         /*
1089          * do not allocate memory more the max. allowed
1090          * transfer size
1091          */
1092         if (args->count > rfs3_tsize(req))
1093                 args->count = rfs3_tsize(req);
1094 
1095         if (loaned_buffers) {
1096                 uiop = (uio_t *)rfs_setup_xuio(vp);
1097                 ASSERT(uiop != NULL);
1098                 uiop->uio_segflg = UIO_SYSSPACE;
1099                 uiop->uio_loffset = args->offset;
1100                 uiop->uio_resid = args->count;
1101 
1102                 /* Jump to do the read if successful */
1103                 if (VOP_REQZCBUF(vp, UIO_READ, (xuio_t *)uiop, cr, &ct) == 0) {
1104                         /*
1105                          * Need to hold the vnode until after VOP_RETZCBUF()
1106                          * is called.
1107                          */
1108                         VN_HOLD(vp);
1109                         goto doio_read;
1110                 }
1111 
1112                 DTRACE_PROBE2(nfss__i__reqzcbuf_failed, int,
1113                     uiop->uio_loffset, int, uiop->uio_resid);
1114 
1115                 uiop->uio_extflg = 0;
1116                 /* failure to setup for zero copy */
1117                 rfs_free_xuio((void *)uiop);
1118                 loaned_buffers = 0;
1119         }
1120 
1121         /*
1122          * If returning data via RDMA Write, then grab the chunk list.
1123          * If we aren't returning READ data w/RDMA_WRITE, then grab
1124          * a mblk.
1125          */
1126         if (rdma_used) {
1127                 (void) rdma_get_wchunk(req, &iov, args->wlist);
1128                 uio.uio_iov = &iov;
1129                 uio.uio_iovcnt = 1;
1130         } else {
1131                 /*
1132                  * mp will contain the data to be sent out in the read reply.
1133                  * For UDP, this will be freed after the reply has been sent
1134                  * out by the driver.  For TCP, it will be freed after the last
1135                  * segment associated with the reply has been ACKed by the
1136                  * client.
1137                  */
1138                 mp = rfs_read_alloc(args->count, &iovp, &iovcnt);
1139                 uio.uio_iov = iovp;
1140                 uio.uio_iovcnt = iovcnt;
1141         }
1142 
1143         uio.uio_segflg = UIO_SYSSPACE;
1144         uio.uio_extflg = UIO_COPY_CACHED;
1145         uio.uio_loffset = args->offset;
1146         uio.uio_resid = args->count;
1147         uiop = &uio;
1148 
1149 doio_read:
1150         error = VOP_READ(vp, uiop, 0, cr, &ct);
1151 
1152         if (error) {
1153                 if (mp)
1154                         freemsg(mp);
1155                 /* check if a monitor detected a delegation conflict */
1156                 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1157                         resp->status = NFS3ERR_JUKEBOX;
1158                         goto out1;
1159                 }
1160                 goto out;
1161         }
1162 
1163         /* make mblk using zc buffers */
1164         if (loaned_buffers) {
1165                 mp = uio_to_mblk(uiop);
1166                 ASSERT(mp != NULL);
1167         }
1168 
1169         va.va_mask = AT_ALL;
1170         error = VOP_GETATTR(vp, &va, 0, cr, &ct);
1171 
1172         if (error)
1173                 vap = NULL;
1174         else
1175                 vap = &va;
1176 
1177         VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
1178 
1179         if (in_crit)
1180                 nbl_end_crit(vp);
1181 
1182         resp->status = NFS3_OK;
1183         vattr_to_post_op_attr(vap, &resp->resok.file_attributes);
1184         resp->resok.count = args->count - uiop->uio_resid;
1185         if (!error && offset + resp->resok.count == va.va_size)
1186                 resp->resok.eof = TRUE;
1187         else
1188                 resp->resok.eof = FALSE;
1189         resp->resok.data.data_len = resp->resok.count;
1190 
1191         if (mp)
1192                 rfs_rndup_mblks(mp, resp->resok.count, loaned_buffers);
1193 
1194         resp->resok.data.mp = mp;
1195         resp->resok.size = (uint_t)args->count;
1196 
1197         if (rdma_used) {
1198                 resp->resok.data.data_val = (caddr_t)iov.iov_base;
1199                 if (!rdma_setup_read_data3(args, &(resp->resok))) {
1200                         resp->status = NFS3ERR_INVAL;
1201                 }
1202         } else {
1203                 resp->resok.data.data_val = (caddr_t)mp->b_datap->db_base;
1204                 (resp->resok).wlist = NULL;
1205         }
1206 
1207 done:
1208         DTRACE_NFSV3_4(op__read__done, struct svc_req *, req,
1209             cred_t *, cr, vnode_t *, vp, READ3res *, resp);
1210 
1211         VN_RELE(vp);
1212 
1213         if (iovp != NULL)
1214                 kmem_free(iovp, iovcnt * sizeof (struct iovec));
1215 
1216         return;
1217 
1218 out:
1219         if (curthread->t_flag & T_WOULDBLOCK) {
1220                 curthread->t_flag &= ~T_WOULDBLOCK;
1221                 resp->status = NFS3ERR_JUKEBOX;
1222         } else
1223                 resp->status = puterrno3(error);
1224 out1:
1225         DTRACE_NFSV3_4(op__read__done, struct svc_req *, req,
1226             cred_t *, cr, vnode_t *, vp, READ3res *, resp);
1227 
1228         if (vp != NULL) {
1229                 if (need_rwunlock)
1230                         VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
1231                 if (in_crit)
1232                         nbl_end_crit(vp);
1233                 VN_RELE(vp);
1234         }
1235         vattr_to_post_op_attr(vap, &resp->resfail.file_attributes);
1236 
1237         if (iovp != NULL)
1238                 kmem_free(iovp, iovcnt * sizeof (struct iovec));
1239 }
1240 
1241 void
1242 rfs3_read_free(READ3res *resp)
1243 {
1244         mblk_t *mp;
1245 
1246         if (resp->status == NFS3_OK) {
1247                 mp = resp->resok.data.mp;
1248                 if (mp != NULL)
1249                         freemsg(mp);
1250         }
1251 }
1252 
1253 void *
1254 rfs3_read_getfh(READ3args *args)
1255 {
1256 
1257         return (&args->file);
1258 }
1259 
1260 #define MAX_IOVECS      12
1261 
1262 #ifdef DEBUG
1263 static int rfs3_write_hits = 0;
1264 static int rfs3_write_misses = 0;
1265 #endif
1266 
1267 void
1268 rfs3_write(WRITE3args *args, WRITE3res *resp, struct exportinfo *exi,
1269     struct svc_req *req, cred_t *cr, bool_t ro)
1270 {
1271         int error;
1272         vnode_t *vp;
1273         struct vattr *bvap = NULL;
1274         struct vattr bva;
1275         struct vattr *avap = NULL;
1276         struct vattr ava;
1277         u_offset_t rlimit;
1278         struct uio uio;
1279         struct iovec iov[MAX_IOVECS];
1280         mblk_t *m;
1281         struct iovec *iovp;
1282         int iovcnt;
1283         int ioflag;
1284         cred_t *savecred;
1285         int in_crit = 0;
1286         int rwlock_ret = -1;
1287         caller_context_t ct;
1288 
1289         vp = nfs3_fhtovp(&args->file, exi);
1290 
1291         DTRACE_NFSV3_4(op__write__start, struct svc_req *, req,
1292             cred_t *, cr, vnode_t *, vp, WRITE3args *, args);
1293 
1294         if (vp == NULL) {
1295                 error = ESTALE;
1296                 goto err;
1297         }
1298 
1299         if (is_system_labeled()) {
1300                 bslabel_t *clabel = req->rq_label;
1301 
1302                 ASSERT(clabel != NULL);
1303                 DTRACE_PROBE2(tx__rfs3__log__info__opwrite__clabel, char *,
1304                     "got client label from request(1)", struct svc_req *, req);
1305 
1306                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
1307                         if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
1308                             exi)) {
1309                                 resp->status = NFS3ERR_ACCES;
1310                                 goto err1;
1311                         }
1312                 }
1313         }
1314 
1315         ct.cc_sysid = 0;
1316         ct.cc_pid = 0;
1317         ct.cc_caller_id = nfs3_srv_caller_id;
1318         ct.cc_flags = CC_DONTBLOCK;
1319 
1320         /*
1321          * We have to enter the critical region before calling VOP_RWLOCK
1322          * to avoid a deadlock with ufs.
1323          */
1324         if (nbl_need_check(vp)) {
1325                 nbl_start_crit(vp, RW_READER);
1326                 in_crit = 1;
1327                 if (nbl_conflict(vp, NBL_WRITE, args->offset, args->count, 0,
1328                     NULL)) {
1329                         error = EACCES;
1330                         goto err;
1331                 }
1332         }
1333 
1334         rwlock_ret = VOP_RWLOCK(vp, V_WRITELOCK_TRUE, &ct);
1335 
1336         /* check if a monitor detected a delegation conflict */
1337         if (rwlock_ret == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1338                 resp->status = NFS3ERR_JUKEBOX;
1339                 rwlock_ret = -1;
1340                 goto err1;
1341         }
1342 
1343 
1344         bva.va_mask = AT_ALL;
1345         error = VOP_GETATTR(vp, &bva, 0, cr, &ct);
1346 
1347         /*
1348          * If we can't get the attributes, then we can't do the
1349          * right access checking.  So, we'll fail the request.
1350          */
1351         if (error)
1352                 goto err;
1353 
1354         bvap = &bva;
1355         avap = bvap;
1356 
1357         if (args->count != args->data.data_len) {
1358                 resp->status = NFS3ERR_INVAL;
1359                 goto err1;
1360         }
1361 
1362         if (rdonly(ro, vp)) {
1363                 resp->status = NFS3ERR_ROFS;
1364                 goto err1;
1365         }
1366 
1367         if (vp->v_type != VREG) {
1368                 resp->status = NFS3ERR_INVAL;
1369                 goto err1;
1370         }
1371 
1372         if (crgetuid(cr) != bva.va_uid &&
1373             (error = VOP_ACCESS(vp, VWRITE, 0, cr, &ct)))
1374                 goto err;
1375 
1376         if (MANDLOCK(vp, bva.va_mode)) {
1377                 resp->status = NFS3ERR_ACCES;
1378                 goto err1;
1379         }
1380 
1381         if (args->count == 0) {
1382                 resp->status = NFS3_OK;
1383                 vattr_to_wcc_data(bvap, avap, &resp->resok.file_wcc);
1384                 resp->resok.count = 0;
1385                 resp->resok.committed = args->stable;
1386                 resp->resok.verf = write3verf;
1387                 goto out;
1388         }
1389 
1390         if (args->mblk != NULL) {
1391                 iovcnt = 0;
1392                 for (m = args->mblk; m != NULL; m = m->b_cont)
1393                         iovcnt++;
1394                 if (iovcnt <= MAX_IOVECS) {
1395 #ifdef DEBUG
1396                         rfs3_write_hits++;
1397 #endif
1398                         iovp = iov;
1399                 } else {
1400 #ifdef DEBUG
1401                         rfs3_write_misses++;
1402 #endif
1403                         iovp = kmem_alloc(sizeof (*iovp) * iovcnt, KM_SLEEP);
1404                 }
1405                 mblk_to_iov(args->mblk, iovcnt, iovp);
1406 
1407         } else if (args->rlist != NULL) {
1408                 iovcnt = 1;
1409                 iovp = iov;
1410                 iovp->iov_base = (char *)((args->rlist)->u.c_daddr3);
1411                 iovp->iov_len = args->count;
1412         } else {
1413                 iovcnt = 1;
1414                 iovp = iov;
1415                 iovp->iov_base = args->data.data_val;
1416                 iovp->iov_len = args->count;
1417         }
1418 
1419         uio.uio_iov = iovp;
1420         uio.uio_iovcnt = iovcnt;
1421 
1422         uio.uio_segflg = UIO_SYSSPACE;
1423         uio.uio_extflg = UIO_COPY_DEFAULT;
1424         uio.uio_loffset = args->offset;
1425         uio.uio_resid = args->count;
1426         uio.uio_llimit = curproc->p_fsz_ctl;
1427         rlimit = uio.uio_llimit - args->offset;
1428         if (rlimit < (u_offset_t)uio.uio_resid)
1429                 uio.uio_resid = (int)rlimit;
1430 
1431         if (args->stable == UNSTABLE)
1432                 ioflag = 0;
1433         else if (args->stable == FILE_SYNC)
1434                 ioflag = FSYNC;
1435         else if (args->stable == DATA_SYNC)
1436                 ioflag = FDSYNC;
1437         else {
1438                 if (iovp != iov)
1439                         kmem_free(iovp, sizeof (*iovp) * iovcnt);
1440                 resp->status = NFS3ERR_INVAL;
1441                 goto err1;
1442         }
1443 
1444         /*
1445          * We're changing creds because VM may fault and we need
1446          * the cred of the current thread to be used if quota
1447          * checking is enabled.
1448          */
1449         savecred = curthread->t_cred;
1450         curthread->t_cred = cr;
1451         error = VOP_WRITE(vp, &uio, ioflag, cr, &ct);
1452         curthread->t_cred = savecred;
1453 
1454         if (iovp != iov)
1455                 kmem_free(iovp, sizeof (*iovp) * iovcnt);
1456 
1457         /* check if a monitor detected a delegation conflict */
1458         if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1459                 resp->status = NFS3ERR_JUKEBOX;
1460                 goto err1;
1461         }
1462 
1463         ava.va_mask = AT_ALL;
1464         avap = VOP_GETATTR(vp, &ava, 0, cr, &ct) ? NULL : &ava;
1465 
1466         if (error)
1467                 goto err;
1468 
1469         /*
1470          * If we were unable to get the V_WRITELOCK_TRUE, then we
1471          * may not have accurate after attrs, so check if
1472          * we have both attributes, they have a non-zero va_seq, and
1473          * va_seq has changed by exactly one,
1474          * if not, turn off the before attr.
1475          */
1476         if (rwlock_ret != V_WRITELOCK_TRUE) {
1477                 if (bvap == NULL || avap == NULL ||
1478                     bvap->va_seq == 0 || avap->va_seq == 0 ||
1479                     avap->va_seq != (bvap->va_seq + 1)) {
1480                         bvap = NULL;
1481                 }
1482         }
1483 
1484         resp->status = NFS3_OK;
1485         vattr_to_wcc_data(bvap, avap, &resp->resok.file_wcc);
1486         resp->resok.count = args->count - uio.uio_resid;
1487         resp->resok.committed = args->stable;
1488         resp->resok.verf = write3verf;
1489         goto out;
1490 
1491 err:
1492         if (curthread->t_flag & T_WOULDBLOCK) {
1493                 curthread->t_flag &= ~T_WOULDBLOCK;
1494                 resp->status = NFS3ERR_JUKEBOX;
1495         } else
1496                 resp->status = puterrno3(error);
1497 err1:
1498         vattr_to_wcc_data(bvap, avap, &resp->resfail.file_wcc);
1499 out:
1500         DTRACE_NFSV3_4(op__write__done, struct svc_req *, req,
1501             cred_t *, cr, vnode_t *, vp, WRITE3res *, resp);
1502 
1503         if (vp != NULL) {
1504                 if (rwlock_ret != -1)
1505                         VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, &ct);
1506                 if (in_crit)
1507                         nbl_end_crit(vp);
1508                 VN_RELE(vp);
1509         }
1510 }
1511 
1512 void *
1513 rfs3_write_getfh(WRITE3args *args)
1514 {
1515 
1516         return (&args->file);
1517 }
1518 
1519 void
1520 rfs3_create(CREATE3args *args, CREATE3res *resp, struct exportinfo *exi,
1521     struct svc_req *req, cred_t *cr, bool_t ro)
1522 {
1523         int error;
1524         int in_crit = 0;
1525         vnode_t *vp;
1526         vnode_t *tvp = NULL;
1527         vnode_t *dvp;
1528         struct vattr *vap;
1529         struct vattr va;
1530         struct vattr *dbvap;
1531         struct vattr dbva;
1532         struct vattr *davap;
1533         struct vattr dava;
1534         enum vcexcl excl;
1535         nfstime3 *mtime;
1536         len_t reqsize;
1537         bool_t trunc;
1538         struct sockaddr *ca;
1539         char *name = NULL;
1540 
1541         dbvap = NULL;
1542         davap = NULL;
1543 
1544         dvp = nfs3_fhtovp(&args->where.dir, exi);
1545 
1546         DTRACE_NFSV3_4(op__create__start, struct svc_req *, req,
1547             cred_t *, cr, vnode_t *, dvp, CREATE3args *, args);
1548 
1549         if (dvp == NULL) {
1550                 error = ESTALE;
1551                 goto out;
1552         }
1553 
1554         dbva.va_mask = AT_ALL;
1555         dbvap = VOP_GETATTR(dvp, &dbva, 0, cr, NULL) ? NULL : &dbva;
1556         davap = dbvap;
1557 
1558         if (args->where.name == nfs3nametoolong) {
1559                 resp->status = NFS3ERR_NAMETOOLONG;
1560                 goto out1;
1561         }
1562 
1563         if (args->where.name == NULL || *(args->where.name) == '\0') {
1564                 resp->status = NFS3ERR_ACCES;
1565                 goto out1;
1566         }
1567 
1568         if (rdonly(ro, dvp)) {
1569                 resp->status = NFS3ERR_ROFS;
1570                 goto out1;
1571         }
1572 
1573         if (is_system_labeled()) {
1574                 bslabel_t *clabel = req->rq_label;
1575 
1576                 ASSERT(clabel != NULL);
1577                 DTRACE_PROBE2(tx__rfs3__log__info__opcreate__clabel, char *,
1578                     "got client label from request(1)", struct svc_req *, req);
1579 
1580                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
1581                         if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
1582                             exi)) {
1583                                 resp->status = NFS3ERR_ACCES;
1584                                 goto out1;
1585                         }
1586                 }
1587         }
1588 
1589         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
1590         name = nfscmd_convname(ca, exi, args->where.name,
1591             NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
1592 
1593         if (name == NULL) {
1594                 /* This is really a Solaris EILSEQ */
1595                 resp->status = NFS3ERR_INVAL;
1596                 goto out1;
1597         }
1598 
1599         if (args->how.mode == EXCLUSIVE) {
1600                 va.va_mask = AT_TYPE | AT_MODE | AT_MTIME;
1601                 va.va_type = VREG;
1602                 va.va_mode = (mode_t)0;
1603                 /*
1604                  * Ensure no time overflows and that types match
1605                  */
1606                 mtime = (nfstime3 *)&args->how.createhow3_u.verf;
1607                 va.va_mtime.tv_sec = mtime->seconds % INT32_MAX;
1608                 va.va_mtime.tv_nsec = mtime->nseconds;
1609                 excl = EXCL;
1610         } else {
1611                 error = sattr3_to_vattr(&args->how.createhow3_u.obj_attributes,
1612                     &va);
1613                 if (error)
1614                         goto out;
1615                 va.va_mask |= AT_TYPE;
1616                 va.va_type = VREG;
1617                 if (args->how.mode == GUARDED)
1618                         excl = EXCL;
1619                 else {
1620                         excl = NONEXCL;
1621 
1622                         /*
1623                          * During creation of file in non-exclusive mode
1624                          * if size of file is being set then make sure
1625                          * that if the file already exists that no conflicting
1626                          * non-blocking mandatory locks exists in the region
1627                          * being modified. If there are conflicting locks fail
1628                          * the operation with EACCES.
1629                          */
1630                         if (va.va_mask & AT_SIZE) {
1631                                 struct vattr tva;
1632 
1633                                 /*
1634                                  * Does file already exist?
1635                                  */
1636                                 error = VOP_LOOKUP(dvp, name, &tvp,
1637                                     NULL, 0, NULL, cr, NULL, NULL, NULL);
1638 
1639                                 /*
1640                                  * Check to see if the file has been delegated
1641                                  * to a v4 client.  If so, then begin recall of
1642                                  * the delegation and return JUKEBOX to allow
1643                                  * the client to retrasmit its request.
1644                                  */
1645 
1646                                 trunc = va.va_size == 0;
1647                                 if (!error &&
1648                                     rfs4_check_delegated(FWRITE, tvp, trunc)) {
1649                                         resp->status = NFS3ERR_JUKEBOX;
1650                                         goto out1;
1651                                 }
1652 
1653                                 /*
1654                                  * Check for NBMAND lock conflicts
1655                                  */
1656                                 if (!error && nbl_need_check(tvp)) {
1657                                         u_offset_t offset;
1658                                         ssize_t len;
1659 
1660                                         nbl_start_crit(tvp, RW_READER);
1661                                         in_crit = 1;
1662 
1663                                         tva.va_mask = AT_SIZE;
1664                                         error = VOP_GETATTR(tvp, &tva, 0, cr,
1665                                             NULL);
1666                                         /*
1667                                          * Can't check for conflicts, so return
1668                                          * error.
1669                                          */
1670                                         if (error)
1671                                                 goto out;
1672 
1673                                         offset = tva.va_size < va.va_size ?
1674                                             tva.va_size : va.va_size;
1675                                         len = tva.va_size < va.va_size ?
1676                                             va.va_size - tva.va_size :
1677                                             tva.va_size - va.va_size;
1678                                         if (nbl_conflict(tvp, NBL_WRITE,
1679                                             offset, len, 0, NULL)) {
1680                                                 error = EACCES;
1681                                                 goto out;
1682                                         }
1683                                 } else if (tvp) {
1684                                         VN_RELE(tvp);
1685                                         tvp = NULL;
1686                                 }
1687                         }
1688                 }
1689                 if (va.va_mask & AT_SIZE)
1690                         reqsize = va.va_size;
1691         }
1692 
1693         /*
1694          * Must specify the mode.
1695          */
1696         if (!(va.va_mask & AT_MODE)) {
1697                 resp->status = NFS3ERR_INVAL;
1698                 goto out1;
1699         }
1700 
1701         /*
1702          * If the filesystem is exported with nosuid, then mask off
1703          * the setuid and setgid bits.
1704          */
1705         if (va.va_type == VREG && (exi->exi_export.ex_flags & EX_NOSUID))
1706                 va.va_mode &= ~(VSUID | VSGID);
1707 
1708 tryagain:
1709         /*
1710          * The file open mode used is VWRITE.  If the client needs
1711          * some other semantic, then it should do the access checking
1712          * itself.  It would have been nice to have the file open mode
1713          * passed as part of the arguments.
1714          */
1715         error = VOP_CREATE(dvp, name, &va, excl, VWRITE,
1716             &vp, cr, 0, NULL, NULL);
1717 
1718         dava.va_mask = AT_ALL;
1719         davap = VOP_GETATTR(dvp, &dava, 0, cr, NULL) ? NULL : &dava;
1720 
1721         if (error) {
1722                 /*
1723                  * If we got something other than file already exists
1724                  * then just return this error.  Otherwise, we got
1725                  * EEXIST.  If we were doing a GUARDED create, then
1726                  * just return this error.  Otherwise, we need to
1727                  * make sure that this wasn't a duplicate of an
1728                  * exclusive create request.
1729                  *
1730                  * The assumption is made that a non-exclusive create
1731                  * request will never return EEXIST.
1732                  */
1733                 if (error != EEXIST || args->how.mode == GUARDED)
1734                         goto out;
1735                 /*
1736                  * Lookup the file so that we can get a vnode for it.
1737                  */
1738                 error = VOP_LOOKUP(dvp, name, &vp, NULL, 0,
1739                     NULL, cr, NULL, NULL, NULL);
1740                 if (error) {
1741                         /*
1742                          * We couldn't find the file that we thought that
1743                          * we just created.  So, we'll just try creating
1744                          * it again.
1745                          */
1746                         if (error == ENOENT)
1747                                 goto tryagain;
1748                         goto out;
1749                 }
1750 
1751                 /*
1752                  * If the file is delegated to a v4 client, go ahead
1753                  * and initiate recall, this create is a hint that a
1754                  * conflicting v3 open has occurred.
1755                  */
1756 
1757                 if (rfs4_check_delegated(FWRITE, vp, FALSE)) {
1758                         VN_RELE(vp);
1759                         resp->status = NFS3ERR_JUKEBOX;
1760                         goto out1;
1761                 }
1762 
1763                 va.va_mask = AT_ALL;
1764                 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
1765 
1766                 mtime = (nfstime3 *)&args->how.createhow3_u.verf;
1767                 /* % with INT32_MAX to prevent overflows */
1768                 if (args->how.mode == EXCLUSIVE && (vap == NULL ||
1769                     vap->va_mtime.tv_sec !=
1770                     (mtime->seconds % INT32_MAX) ||
1771                     vap->va_mtime.tv_nsec != mtime->nseconds)) {
1772                         VN_RELE(vp);
1773                         error = EEXIST;
1774                         goto out;
1775                 }
1776         } else {
1777 
1778                 if ((args->how.mode == UNCHECKED ||
1779                     args->how.mode == GUARDED) &&
1780                     args->how.createhow3_u.obj_attributes.size.set_it &&
1781                     va.va_size == 0)
1782                         trunc = TRUE;
1783                 else
1784                         trunc = FALSE;
1785 
1786                 if (rfs4_check_delegated(FWRITE, vp, trunc)) {
1787                         VN_RELE(vp);
1788                         resp->status = NFS3ERR_JUKEBOX;
1789                         goto out1;
1790                 }
1791 
1792                 va.va_mask = AT_ALL;
1793                 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
1794 
1795                 /*
1796                  * We need to check to make sure that the file got
1797                  * created to the indicated size.  If not, we do a
1798                  * setattr to try to change the size, but we don't
1799                  * try too hard.  This shouldn't a problem as most
1800                  * clients will only specifiy a size of zero which
1801                  * local file systems handle.  However, even if
1802                  * the client does specify a non-zero size, it can
1803                  * still recover by checking the size of the file
1804                  * after it has created it and then issue a setattr
1805                  * request of its own to set the size of the file.
1806                  */
1807                 if (vap != NULL &&
1808                     (args->how.mode == UNCHECKED ||
1809                     args->how.mode == GUARDED) &&
1810                     args->how.createhow3_u.obj_attributes.size.set_it &&
1811                     vap->va_size != reqsize) {
1812                         va.va_mask = AT_SIZE;
1813                         va.va_size = reqsize;
1814                         (void) VOP_SETATTR(vp, &va, 0, cr, NULL);
1815                         va.va_mask = AT_ALL;
1816                         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
1817                 }
1818         }
1819 
1820         if (name != args->where.name)
1821                 kmem_free(name, MAXPATHLEN + 1);
1822 
1823         error = makefh3(&resp->resok.obj.handle, vp, exi);
1824         if (error)
1825                 resp->resok.obj.handle_follows = FALSE;
1826         else
1827                 resp->resok.obj.handle_follows = TRUE;
1828 
1829         /*
1830          * Force modified data and metadata out to stable storage.
1831          */
1832         (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
1833         (void) VOP_FSYNC(dvp, 0, cr, NULL);
1834 
1835         VN_RELE(vp);
1836         if (tvp != NULL) {
1837                 if (in_crit)
1838                         nbl_end_crit(tvp);
1839                 VN_RELE(tvp);
1840         }
1841 
1842         resp->status = NFS3_OK;
1843         vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
1844         vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
1845 
1846         DTRACE_NFSV3_4(op__create__done, struct svc_req *, req,
1847             cred_t *, cr, vnode_t *, dvp, CREATE3res *, resp);
1848 
1849         VN_RELE(dvp);
1850         return;
1851 
1852 out:
1853         if (curthread->t_flag & T_WOULDBLOCK) {
1854                 curthread->t_flag &= ~T_WOULDBLOCK;
1855                 resp->status = NFS3ERR_JUKEBOX;
1856         } else
1857                 resp->status = puterrno3(error);
1858 out1:
1859         DTRACE_NFSV3_4(op__create__done, struct svc_req *, req,
1860             cred_t *, cr, vnode_t *, dvp, CREATE3res *, resp);
1861 
1862         if (name != NULL && name != args->where.name)
1863                 kmem_free(name, MAXPATHLEN + 1);
1864 
1865         if (tvp != NULL) {
1866                 if (in_crit)
1867                         nbl_end_crit(tvp);
1868                 VN_RELE(tvp);
1869         }
1870         if (dvp != NULL)
1871                 VN_RELE(dvp);
1872         vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc);
1873 }
1874 
1875 void *
1876 rfs3_create_getfh(CREATE3args *args)
1877 {
1878 
1879         return (&args->where.dir);
1880 }
1881 
1882 void
1883 rfs3_mkdir(MKDIR3args *args, MKDIR3res *resp, struct exportinfo *exi,
1884     struct svc_req *req, cred_t *cr, bool_t ro)
1885 {
1886         int error;
1887         vnode_t *vp = NULL;
1888         vnode_t *dvp;
1889         struct vattr *vap;
1890         struct vattr va;
1891         struct vattr *dbvap;
1892         struct vattr dbva;
1893         struct vattr *davap;
1894         struct vattr dava;
1895         struct sockaddr *ca;
1896         char *name = NULL;
1897 
1898         dbvap = NULL;
1899         davap = NULL;
1900 
1901         dvp = nfs3_fhtovp(&args->where.dir, exi);
1902 
1903         DTRACE_NFSV3_4(op__mkdir__start, struct svc_req *, req,
1904             cred_t *, cr, vnode_t *, dvp, MKDIR3args *, args);
1905 
1906         if (dvp == NULL) {
1907                 error = ESTALE;
1908                 goto out;
1909         }
1910 
1911         dbva.va_mask = AT_ALL;
1912         dbvap = VOP_GETATTR(dvp, &dbva, 0, cr, NULL) ? NULL : &dbva;
1913         davap = dbvap;
1914 
1915         if (args->where.name == nfs3nametoolong) {
1916                 resp->status = NFS3ERR_NAMETOOLONG;
1917                 goto out1;
1918         }
1919 
1920         if (args->where.name == NULL || *(args->where.name) == '\0') {
1921                 resp->status = NFS3ERR_ACCES;
1922                 goto out1;
1923         }
1924 
1925         if (rdonly(ro, dvp)) {
1926                 resp->status = NFS3ERR_ROFS;
1927                 goto out1;
1928         }
1929 
1930         if (is_system_labeled()) {
1931                 bslabel_t *clabel = req->rq_label;
1932 
1933                 ASSERT(clabel != NULL);
1934                 DTRACE_PROBE2(tx__rfs3__log__info__opmkdir__clabel, char *,
1935                     "got client label from request(1)", struct svc_req *, req);
1936 
1937                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
1938                         if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
1939                             exi)) {
1940                                 resp->status = NFS3ERR_ACCES;
1941                                 goto out1;
1942                         }
1943                 }
1944         }
1945 
1946         error = sattr3_to_vattr(&args->attributes, &va);
1947         if (error)
1948                 goto out;
1949 
1950         if (!(va.va_mask & AT_MODE)) {
1951                 resp->status = NFS3ERR_INVAL;
1952                 goto out1;
1953         }
1954 
1955         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
1956         name = nfscmd_convname(ca, exi, args->where.name,
1957             NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
1958 
1959         if (name == NULL) {
1960                 resp->status = NFS3ERR_INVAL;
1961                 goto out1;
1962         }
1963 
1964         va.va_mask |= AT_TYPE;
1965         va.va_type = VDIR;
1966 
1967         error = VOP_MKDIR(dvp, name, &va, &vp, cr, NULL, 0, NULL);
1968 
1969         if (name != args->where.name)
1970                 kmem_free(name, MAXPATHLEN + 1);
1971 
1972         dava.va_mask = AT_ALL;
1973         davap = VOP_GETATTR(dvp, &dava, 0, cr, NULL) ? NULL : &dava;
1974 
1975         /*
1976          * Force modified data and metadata out to stable storage.
1977          */
1978         (void) VOP_FSYNC(dvp, 0, cr, NULL);
1979 
1980         if (error)
1981                 goto out;
1982 
1983         error = makefh3(&resp->resok.obj.handle, vp, exi);
1984         if (error)
1985                 resp->resok.obj.handle_follows = FALSE;
1986         else
1987                 resp->resok.obj.handle_follows = TRUE;
1988 
1989         va.va_mask = AT_ALL;
1990         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
1991 
1992         /*
1993          * Force modified data and metadata out to stable storage.
1994          */
1995         (void) VOP_FSYNC(vp, 0, cr, NULL);
1996 
1997         VN_RELE(vp);
1998 
1999         resp->status = NFS3_OK;
2000         vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
2001         vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
2002 
2003         DTRACE_NFSV3_4(op__mkdir__done, struct svc_req *, req,
2004             cred_t *, cr, vnode_t *, dvp, MKDIR3res *, resp);
2005         VN_RELE(dvp);
2006 
2007         return;
2008 
2009 out:
2010         if (curthread->t_flag & T_WOULDBLOCK) {
2011                 curthread->t_flag &= ~T_WOULDBLOCK;
2012                 resp->status = NFS3ERR_JUKEBOX;
2013         } else
2014                 resp->status = puterrno3(error);
2015 out1:
2016         DTRACE_NFSV3_4(op__mkdir__done, struct svc_req *, req,
2017             cred_t *, cr, vnode_t *, dvp, MKDIR3res *, resp);
2018         if (dvp != NULL)
2019                 VN_RELE(dvp);
2020         vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc);
2021 }
2022 
2023 void *
2024 rfs3_mkdir_getfh(MKDIR3args *args)
2025 {
2026 
2027         return (&args->where.dir);
2028 }
2029 
2030 void
2031 rfs3_symlink(SYMLINK3args *args, SYMLINK3res *resp, struct exportinfo *exi,
2032     struct svc_req *req, cred_t *cr, bool_t ro)
2033 {
2034         int error;
2035         vnode_t *vp;
2036         vnode_t *dvp;
2037         struct vattr *vap;
2038         struct vattr va;
2039         struct vattr *dbvap;
2040         struct vattr dbva;
2041         struct vattr *davap;
2042         struct vattr dava;
2043         struct sockaddr *ca;
2044         char *name = NULL;
2045         char *symdata = NULL;
2046 
2047         dbvap = NULL;
2048         davap = NULL;
2049 
2050         dvp = nfs3_fhtovp(&args->where.dir, exi);
2051 
2052         DTRACE_NFSV3_4(op__symlink__start, struct svc_req *, req,
2053             cred_t *, cr, vnode_t *, dvp, SYMLINK3args *, args);
2054 
2055         if (dvp == NULL) {
2056                 error = ESTALE;
2057                 goto err;
2058         }
2059 
2060         dbva.va_mask = AT_ALL;
2061         dbvap = VOP_GETATTR(dvp, &dbva, 0, cr, NULL) ? NULL : &dbva;
2062         davap = dbvap;
2063 
2064         if (args->where.name == nfs3nametoolong) {
2065                 resp->status = NFS3ERR_NAMETOOLONG;
2066                 goto err1;
2067         }
2068 
2069         if (args->where.name == NULL || *(args->where.name) == '\0') {
2070                 resp->status = NFS3ERR_ACCES;
2071                 goto err1;
2072         }
2073 
2074         if (rdonly(ro, dvp)) {
2075                 resp->status = NFS3ERR_ROFS;
2076                 goto err1;
2077         }
2078 
2079         if (is_system_labeled()) {
2080                 bslabel_t *clabel = req->rq_label;
2081 
2082                 ASSERT(clabel != NULL);
2083                 DTRACE_PROBE2(tx__rfs3__log__info__opsymlink__clabel, char *,
2084                     "got client label from request(1)", struct svc_req *, req);
2085 
2086                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2087                         if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
2088                             exi)) {
2089                                 resp->status = NFS3ERR_ACCES;
2090                                 goto err1;
2091                         }
2092                 }
2093         }
2094 
2095         error = sattr3_to_vattr(&args->symlink.symlink_attributes, &va);
2096         if (error)
2097                 goto err;
2098 
2099         if (!(va.va_mask & AT_MODE)) {
2100                 resp->status = NFS3ERR_INVAL;
2101                 goto err1;
2102         }
2103 
2104         if (args->symlink.symlink_data == nfs3nametoolong) {
2105                 resp->status = NFS3ERR_NAMETOOLONG;
2106                 goto err1;
2107         }
2108 
2109         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2110         name = nfscmd_convname(ca, exi, args->where.name,
2111             NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2112 
2113         if (name == NULL) {
2114                 /* This is really a Solaris EILSEQ */
2115                 resp->status = NFS3ERR_INVAL;
2116                 goto err1;
2117         }
2118 
2119         symdata = nfscmd_convname(ca, exi, args->symlink.symlink_data,
2120             NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2121         if (symdata == NULL) {
2122                 /* This is really a Solaris EILSEQ */
2123                 resp->status = NFS3ERR_INVAL;
2124                 goto err1;
2125         }
2126 
2127 
2128         va.va_mask |= AT_TYPE;
2129         va.va_type = VLNK;
2130 
2131         error = VOP_SYMLINK(dvp, name, &va, symdata, cr, NULL, 0);
2132 
2133         dava.va_mask = AT_ALL;
2134         davap = VOP_GETATTR(dvp, &dava, 0, cr, NULL) ? NULL : &dava;
2135 
2136         if (error)
2137                 goto err;
2138 
2139         error = VOP_LOOKUP(dvp, name, &vp, NULL, 0, NULL, cr,
2140             NULL, NULL, NULL);
2141 
2142         /*
2143          * Force modified data and metadata out to stable storage.
2144          */
2145         (void) VOP_FSYNC(dvp, 0, cr, NULL);
2146 
2147 
2148         resp->status = NFS3_OK;
2149         if (error) {
2150                 resp->resok.obj.handle_follows = FALSE;
2151                 vattr_to_post_op_attr(NULL, &resp->resok.obj_attributes);
2152                 vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
2153                 goto out;
2154         }
2155 
2156         error = makefh3(&resp->resok.obj.handle, vp, exi);
2157         if (error)
2158                 resp->resok.obj.handle_follows = FALSE;
2159         else
2160                 resp->resok.obj.handle_follows = TRUE;
2161 
2162         va.va_mask = AT_ALL;
2163         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
2164 
2165         /*
2166          * Force modified data and metadata out to stable storage.
2167          */
2168         (void) VOP_FSYNC(vp, 0, cr, NULL);
2169 
2170         VN_RELE(vp);
2171 
2172         vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
2173         vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
2174         goto out;
2175 
2176 err:
2177         if (curthread->t_flag & T_WOULDBLOCK) {
2178                 curthread->t_flag &= ~T_WOULDBLOCK;
2179                 resp->status = NFS3ERR_JUKEBOX;
2180         } else
2181                 resp->status = puterrno3(error);
2182 err1:
2183         vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc);
2184 out:
2185         if (name != NULL && name != args->where.name)
2186                 kmem_free(name, MAXPATHLEN + 1);
2187         if (symdata != NULL && symdata != args->symlink.symlink_data)
2188                 kmem_free(symdata, MAXPATHLEN + 1);
2189 
2190         DTRACE_NFSV3_4(op__symlink__done, struct svc_req *, req,
2191             cred_t *, cr, vnode_t *, dvp, SYMLINK3res *, resp);
2192 
2193         if (dvp != NULL)
2194                 VN_RELE(dvp);
2195 }
2196 
2197 void *
2198 rfs3_symlink_getfh(SYMLINK3args *args)
2199 {
2200 
2201         return (&args->where.dir);
2202 }
2203 
2204 void
2205 rfs3_mknod(MKNOD3args *args, MKNOD3res *resp, struct exportinfo *exi,
2206     struct svc_req *req, cred_t *cr, bool_t ro)
2207 {
2208         int error;
2209         vnode_t *vp;
2210         vnode_t *realvp;
2211         vnode_t *dvp;
2212         struct vattr *vap;
2213         struct vattr va;
2214         struct vattr *dbvap;
2215         struct vattr dbva;
2216         struct vattr *davap;
2217         struct vattr dava;
2218         int mode;
2219         enum vcexcl excl;
2220         struct sockaddr *ca;
2221         char *name = NULL;
2222 
2223         dbvap = NULL;
2224         davap = NULL;
2225 
2226         dvp = nfs3_fhtovp(&args->where.dir, exi);
2227 
2228         DTRACE_NFSV3_4(op__mknod__start, struct svc_req *, req,
2229             cred_t *, cr, vnode_t *, dvp, MKNOD3args *, args);
2230 
2231         if (dvp == NULL) {
2232                 error = ESTALE;
2233                 goto out;
2234         }
2235 
2236         dbva.va_mask = AT_ALL;
2237         dbvap = VOP_GETATTR(dvp, &dbva, 0, cr, NULL) ? NULL : &dbva;
2238         davap = dbvap;
2239 
2240         if (args->where.name == nfs3nametoolong) {
2241                 resp->status = NFS3ERR_NAMETOOLONG;
2242                 goto out1;
2243         }
2244 
2245         if (args->where.name == NULL || *(args->where.name) == '\0') {
2246                 resp->status = NFS3ERR_ACCES;
2247                 goto out1;
2248         }
2249 
2250         if (rdonly(ro, dvp)) {
2251                 resp->status = NFS3ERR_ROFS;
2252                 goto out1;
2253         }
2254 
2255         if (is_system_labeled()) {
2256                 bslabel_t *clabel = req->rq_label;
2257 
2258                 ASSERT(clabel != NULL);
2259                 DTRACE_PROBE2(tx__rfs3__log__info__opmknod__clabel, char *,
2260                     "got client label from request(1)", struct svc_req *, req);
2261 
2262                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2263                         if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
2264                             exi)) {
2265                                 resp->status = NFS3ERR_ACCES;
2266                                 goto out1;
2267                         }
2268                 }
2269         }
2270 
2271         switch (args->what.type) {
2272         case NF3CHR:
2273         case NF3BLK:
2274                 error = sattr3_to_vattr(
2275                     &args->what.mknoddata3_u.device.dev_attributes, &va);
2276                 if (error)
2277                         goto out;
2278                 if (secpolicy_sys_devices(cr) != 0) {
2279                         resp->status = NFS3ERR_PERM;
2280                         goto out1;
2281                 }
2282                 if (args->what.type == NF3CHR)
2283                         va.va_type = VCHR;
2284                 else
2285                         va.va_type = VBLK;
2286                 va.va_rdev = makedevice(
2287                     args->what.mknoddata3_u.device.spec.specdata1,
2288                     args->what.mknoddata3_u.device.spec.specdata2);
2289                 va.va_mask |= AT_TYPE | AT_RDEV;
2290                 break;
2291         case NF3SOCK:
2292                 error = sattr3_to_vattr(
2293                     &args->what.mknoddata3_u.pipe_attributes, &va);
2294                 if (error)
2295                         goto out;
2296                 va.va_type = VSOCK;
2297                 va.va_mask |= AT_TYPE;
2298                 break;
2299         case NF3FIFO:
2300                 error = sattr3_to_vattr(
2301                     &args->what.mknoddata3_u.pipe_attributes, &va);
2302                 if (error)
2303                         goto out;
2304                 va.va_type = VFIFO;
2305                 va.va_mask |= AT_TYPE;
2306                 break;
2307         default:
2308                 resp->status = NFS3ERR_BADTYPE;
2309                 goto out1;
2310         }
2311 
2312         /*
2313          * Must specify the mode.
2314          */
2315         if (!(va.va_mask & AT_MODE)) {
2316                 resp->status = NFS3ERR_INVAL;
2317                 goto out1;
2318         }
2319 
2320         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2321         name = nfscmd_convname(ca, exi, args->where.name,
2322             NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2323 
2324         if (name == NULL) {
2325                 resp->status = NFS3ERR_INVAL;
2326                 goto out1;
2327         }
2328 
2329         excl = EXCL;
2330 
2331         mode = 0;
2332 
2333         error = VOP_CREATE(dvp, name, &va, excl, mode,
2334             &vp, cr, 0, NULL, NULL);
2335 
2336         if (name != args->where.name)
2337                 kmem_free(name, MAXPATHLEN + 1);
2338 
2339         dava.va_mask = AT_ALL;
2340         davap = VOP_GETATTR(dvp, &dava, 0, cr, NULL) ? NULL : &dava;
2341 
2342         /*
2343          * Force modified data and metadata out to stable storage.
2344          */
2345         (void) VOP_FSYNC(dvp, 0, cr, NULL);
2346 
2347         if (error)
2348                 goto out;
2349 
2350         resp->status = NFS3_OK;
2351 
2352         error = makefh3(&resp->resok.obj.handle, vp, exi);
2353         if (error)
2354                 resp->resok.obj.handle_follows = FALSE;
2355         else
2356                 resp->resok.obj.handle_follows = TRUE;
2357 
2358         va.va_mask = AT_ALL;
2359         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
2360 
2361         /*
2362          * Force modified metadata out to stable storage.
2363          *
2364          * if a underlying vp exists, pass it to VOP_FSYNC
2365          */
2366         if (VOP_REALVP(vp, &realvp, NULL) == 0)
2367                 (void) VOP_FSYNC(realvp, FNODSYNC, cr, NULL);
2368         else
2369                 (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
2370 
2371         VN_RELE(vp);
2372 
2373         vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
2374         vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
2375         DTRACE_NFSV3_4(op__mknod__done, struct svc_req *, req,
2376             cred_t *, cr, vnode_t *, dvp, MKNOD3res *, resp);
2377         VN_RELE(dvp);
2378         return;
2379 
2380 out:
2381         if (curthread->t_flag & T_WOULDBLOCK) {
2382                 curthread->t_flag &= ~T_WOULDBLOCK;
2383                 resp->status = NFS3ERR_JUKEBOX;
2384         } else
2385                 resp->status = puterrno3(error);
2386 out1:
2387         DTRACE_NFSV3_4(op__mknod__done, struct svc_req *, req,
2388             cred_t *, cr, vnode_t *, dvp, MKNOD3res *, resp);
2389         if (dvp != NULL)
2390                 VN_RELE(dvp);
2391         vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc);
2392 }
2393 
2394 void *
2395 rfs3_mknod_getfh(MKNOD3args *args)
2396 {
2397 
2398         return (&args->where.dir);
2399 }
2400 
2401 void
2402 rfs3_remove(REMOVE3args *args, REMOVE3res *resp, struct exportinfo *exi,
2403     struct svc_req *req, cred_t *cr, bool_t ro)
2404 {
2405         int error = 0;
2406         vnode_t *vp;
2407         struct vattr *bvap;
2408         struct vattr bva;
2409         struct vattr *avap;
2410         struct vattr ava;
2411         vnode_t *targvp = NULL;
2412         struct sockaddr *ca;
2413         char *name = NULL;
2414 
2415         bvap = NULL;
2416         avap = NULL;
2417 
2418         vp = nfs3_fhtovp(&args->object.dir, exi);
2419 
2420         DTRACE_NFSV3_4(op__remove__start, struct svc_req *, req,
2421             cred_t *, cr, vnode_t *, vp, REMOVE3args *, args);
2422 
2423         if (vp == NULL) {
2424                 error = ESTALE;
2425                 goto err;
2426         }
2427 
2428         bva.va_mask = AT_ALL;
2429         bvap = VOP_GETATTR(vp, &bva, 0, cr, NULL) ? NULL : &bva;
2430         avap = bvap;
2431 
2432         if (vp->v_type != VDIR) {
2433                 resp->status = NFS3ERR_NOTDIR;
2434                 goto err1;
2435         }
2436 
2437         if (args->object.name == nfs3nametoolong) {
2438                 resp->status = NFS3ERR_NAMETOOLONG;
2439                 goto err1;
2440         }
2441 
2442         if (args->object.name == NULL || *(args->object.name) == '\0') {
2443                 resp->status = NFS3ERR_ACCES;
2444                 goto err1;
2445         }
2446 
2447         if (rdonly(ro, vp)) {
2448                 resp->status = NFS3ERR_ROFS;
2449                 goto err1;
2450         }
2451 
2452         if (is_system_labeled()) {
2453                 bslabel_t *clabel = req->rq_label;
2454 
2455                 ASSERT(clabel != NULL);
2456                 DTRACE_PROBE2(tx__rfs3__log__info__opremove__clabel, char *,
2457                     "got client label from request(1)", struct svc_req *, req);
2458 
2459                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2460                         if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
2461                             exi)) {
2462                                 resp->status = NFS3ERR_ACCES;
2463                                 goto err1;
2464                         }
2465                 }
2466         }
2467 
2468         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2469         name = nfscmd_convname(ca, exi, args->object.name,
2470             NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2471 
2472         if (name == NULL) {
2473                 resp->status = NFS3ERR_INVAL;
2474                 goto err1;
2475         }
2476 
2477         /*
2478          * Check for a conflict with a non-blocking mandatory share
2479          * reservation and V4 delegations
2480          */
2481         error = VOP_LOOKUP(vp, name, &targvp, NULL, 0,
2482             NULL, cr, NULL, NULL, NULL);
2483         if (error != 0)
2484                 goto err;
2485 
2486         if (rfs4_check_delegated(FWRITE, targvp, TRUE)) {
2487                 resp->status = NFS3ERR_JUKEBOX;
2488                 goto err1;
2489         }
2490 
2491         if (!nbl_need_check(targvp)) {
2492                 error = VOP_REMOVE(vp, name, cr, NULL, 0);
2493         } else {
2494                 nbl_start_crit(targvp, RW_READER);
2495                 if (nbl_conflict(targvp, NBL_REMOVE, 0, 0, 0, NULL)) {
2496                         error = EACCES;
2497                 } else {
2498                         error = VOP_REMOVE(vp, name, cr, NULL, 0);
2499                 }
2500                 nbl_end_crit(targvp);
2501         }
2502         VN_RELE(targvp);
2503         targvp = NULL;
2504 
2505         ava.va_mask = AT_ALL;
2506         avap = VOP_GETATTR(vp, &ava, 0, cr, NULL) ? NULL : &ava;
2507 
2508         /*
2509          * Force modified data and metadata out to stable storage.
2510          */
2511         (void) VOP_FSYNC(vp, 0, cr, NULL);
2512 
2513         if (error)
2514                 goto err;
2515 
2516         resp->status = NFS3_OK;
2517         vattr_to_wcc_data(bvap, avap, &resp->resok.dir_wcc);
2518         goto out;
2519 
2520 err:
2521         if (curthread->t_flag & T_WOULDBLOCK) {
2522                 curthread->t_flag &= ~T_WOULDBLOCK;
2523                 resp->status = NFS3ERR_JUKEBOX;
2524         } else
2525                 resp->status = puterrno3(error);
2526 err1:
2527         vattr_to_wcc_data(bvap, avap, &resp->resfail.dir_wcc);
2528 out:
2529         DTRACE_NFSV3_4(op__remove__done, struct svc_req *, req,
2530             cred_t *, cr, vnode_t *, vp, REMOVE3res *, resp);
2531 
2532         if (name != NULL && name != args->object.name)
2533                 kmem_free(name, MAXPATHLEN + 1);
2534 
2535         if (vp != NULL)
2536                 VN_RELE(vp);
2537 }
2538 
2539 void *
2540 rfs3_remove_getfh(REMOVE3args *args)
2541 {
2542 
2543         return (&args->object.dir);
2544 }
2545 
2546 void
2547 rfs3_rmdir(RMDIR3args *args, RMDIR3res *resp, struct exportinfo *exi,
2548     struct svc_req *req, cred_t *cr, bool_t ro)
2549 {
2550         int error;
2551         vnode_t *vp;
2552         struct vattr *bvap;
2553         struct vattr bva;
2554         struct vattr *avap;
2555         struct vattr ava;
2556         struct sockaddr *ca;
2557         char *name = NULL;
2558 
2559         bvap = NULL;
2560         avap = NULL;
2561 
2562         vp = nfs3_fhtovp(&args->object.dir, exi);
2563 
2564         DTRACE_NFSV3_4(op__rmdir__start, struct svc_req *, req,
2565             cred_t *, cr, vnode_t *, vp, RMDIR3args *, args);
2566 
2567         if (vp == NULL) {
2568                 error = ESTALE;
2569                 goto err;
2570         }
2571 
2572         bva.va_mask = AT_ALL;
2573         bvap = VOP_GETATTR(vp, &bva, 0, cr, NULL) ? NULL : &bva;
2574         avap = bvap;
2575 
2576         if (vp->v_type != VDIR) {
2577                 resp->status = NFS3ERR_NOTDIR;
2578                 goto err1;
2579         }
2580 
2581         if (args->object.name == nfs3nametoolong) {
2582                 resp->status = NFS3ERR_NAMETOOLONG;
2583                 goto err1;
2584         }
2585 
2586         if (args->object.name == NULL || *(args->object.name) == '\0') {
2587                 resp->status = NFS3ERR_ACCES;
2588                 goto err1;
2589         }
2590 
2591         if (rdonly(ro, vp)) {
2592                 resp->status = NFS3ERR_ROFS;
2593                 goto err1;
2594         }
2595 
2596         if (is_system_labeled()) {
2597                 bslabel_t *clabel = req->rq_label;
2598 
2599                 ASSERT(clabel != NULL);
2600                 DTRACE_PROBE2(tx__rfs3__log__info__opremovedir__clabel, char *,
2601                     "got client label from request(1)", struct svc_req *, req);
2602 
2603                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2604                         if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
2605                             exi)) {
2606                                 resp->status = NFS3ERR_ACCES;
2607                                 goto err1;
2608                         }
2609                 }
2610         }
2611 
2612         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2613         name = nfscmd_convname(ca, exi, args->object.name,
2614             NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2615 
2616         if (name == NULL) {
2617                 resp->status = NFS3ERR_INVAL;
2618                 goto err1;
2619         }
2620 
2621         error = VOP_RMDIR(vp, name, rootdir, cr, NULL, 0);
2622 
2623         if (name != args->object.name)
2624                 kmem_free(name, MAXPATHLEN + 1);
2625 
2626         ava.va_mask = AT_ALL;
2627         avap = VOP_GETATTR(vp, &ava, 0, cr, NULL) ? NULL : &ava;
2628 
2629         /*
2630          * Force modified data and metadata out to stable storage.
2631          */
2632         (void) VOP_FSYNC(vp, 0, cr, NULL);
2633 
2634         if (error) {
2635                 /*
2636                  * System V defines rmdir to return EEXIST, not ENOTEMPTY,
2637                  * if the directory is not empty.  A System V NFS server
2638                  * needs to map NFS3ERR_EXIST to NFS3ERR_NOTEMPTY to transmit
2639                  * over the wire.
2640                  */
2641                 if (error == EEXIST)
2642                         error = ENOTEMPTY;
2643                 goto err;
2644         }
2645 
2646         resp->status = NFS3_OK;
2647         vattr_to_wcc_data(bvap, avap, &resp->resok.dir_wcc);
2648         goto out;
2649 
2650 err:
2651         if (curthread->t_flag & T_WOULDBLOCK) {
2652                 curthread->t_flag &= ~T_WOULDBLOCK;
2653                 resp->status = NFS3ERR_JUKEBOX;
2654         } else
2655                 resp->status = puterrno3(error);
2656 err1:
2657         vattr_to_wcc_data(bvap, avap, &resp->resfail.dir_wcc);
2658 out:
2659         DTRACE_NFSV3_4(op__rmdir__done, struct svc_req *, req,
2660             cred_t *, cr, vnode_t *, vp, RMDIR3res *, resp);
2661         if (vp != NULL)
2662                 VN_RELE(vp);
2663 
2664 }
2665 
2666 void *
2667 rfs3_rmdir_getfh(RMDIR3args *args)
2668 {
2669 
2670         return (&args->object.dir);
2671 }
2672 
2673 void
2674 rfs3_rename(RENAME3args *args, RENAME3res *resp, struct exportinfo *exi,
2675     struct svc_req *req, cred_t *cr, bool_t ro)
2676 {
2677         int error = 0;
2678         vnode_t *fvp;
2679         vnode_t *tvp;
2680         vnode_t *targvp;
2681         struct vattr *fbvap;
2682         struct vattr fbva;
2683         struct vattr *favap;
2684         struct vattr fava;
2685         struct vattr *tbvap;
2686         struct vattr tbva;
2687         struct vattr *tavap;
2688         struct vattr tava;
2689         nfs_fh3 *fh3;
2690         struct exportinfo *to_exi;
2691         vnode_t *srcvp = NULL;
2692         bslabel_t *clabel;
2693         struct sockaddr *ca;
2694         char *name = NULL;
2695         char *toname = NULL;
2696 
2697         fbvap = NULL;
2698         favap = NULL;
2699         tbvap = NULL;
2700         tavap = NULL;
2701         tvp = NULL;
2702 
2703         fvp = nfs3_fhtovp(&args->from.dir, exi);
2704 
2705         DTRACE_NFSV3_4(op__rename__start, struct svc_req *, req,
2706             cred_t *, cr, vnode_t *, fvp, RENAME3args *, args);
2707 
2708         if (fvp == NULL) {
2709                 error = ESTALE;
2710                 goto err;
2711         }
2712 
2713         if (is_system_labeled()) {
2714                 clabel = req->rq_label;
2715                 ASSERT(clabel != NULL);
2716                 DTRACE_PROBE2(tx__rfs3__log__info__oprename__clabel, char *,
2717                     "got client label from request(1)", struct svc_req *, req);
2718 
2719                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2720                         if (!do_rfs_label_check(clabel, fvp, EQUALITY_CHECK,
2721                             exi)) {
2722                                 resp->status = NFS3ERR_ACCES;
2723                                 goto err1;
2724                         }
2725                 }
2726         }
2727 
2728         fbva.va_mask = AT_ALL;
2729         fbvap = VOP_GETATTR(fvp, &fbva, 0, cr, NULL) ? NULL : &fbva;
2730         favap = fbvap;
2731 
2732         fh3 = &args->to.dir;
2733         to_exi = checkexport(&fh3->fh3_fsid, FH3TOXFIDP(fh3));
2734         if (to_exi == NULL) {
2735                 resp->status = NFS3ERR_ACCES;
2736                 goto err1;
2737         }
2738         exi_rele(to_exi);
2739 
2740         if (to_exi != exi) {
2741                 resp->status = NFS3ERR_XDEV;
2742                 goto err1;
2743         }
2744 
2745         tvp = nfs3_fhtovp(&args->to.dir, exi);
2746         if (tvp == NULL) {
2747                 error = ESTALE;
2748                 goto err;
2749         }
2750 
2751         tbva.va_mask = AT_ALL;
2752         tbvap = VOP_GETATTR(tvp, &tbva, 0, cr, NULL) ? NULL : &tbva;
2753         tavap = tbvap;
2754 
2755         if (fvp->v_type != VDIR || tvp->v_type != VDIR) {
2756                 resp->status = NFS3ERR_NOTDIR;
2757                 goto err1;
2758         }
2759 
2760         if (args->from.name == nfs3nametoolong ||
2761             args->to.name == nfs3nametoolong) {
2762                 resp->status = NFS3ERR_NAMETOOLONG;
2763                 goto err1;
2764         }
2765         if (args->from.name == NULL || *(args->from.name) == '\0' ||
2766             args->to.name == NULL || *(args->to.name) == '\0') {
2767                 resp->status = NFS3ERR_ACCES;
2768                 goto err1;
2769         }
2770 
2771         if (rdonly(ro, tvp)) {
2772                 resp->status = NFS3ERR_ROFS;
2773                 goto err1;
2774         }
2775 
2776         if (is_system_labeled()) {
2777                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2778                         if (!do_rfs_label_check(clabel, tvp, EQUALITY_CHECK,
2779                             exi)) {
2780                                 resp->status = NFS3ERR_ACCES;
2781                                 goto err1;
2782                         }
2783                 }
2784         }
2785 
2786         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2787         name = nfscmd_convname(ca, exi, args->from.name,
2788             NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2789 
2790         if (name == NULL) {
2791                 resp->status = NFS3ERR_INVAL;
2792                 goto err1;
2793         }
2794 
2795         toname = nfscmd_convname(ca, exi, args->to.name,
2796             NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2797 
2798         if (toname == NULL) {
2799                 resp->status = NFS3ERR_INVAL;
2800                 goto err1;
2801         }
2802 
2803         /*
2804          * Check for a conflict with a non-blocking mandatory share
2805          * reservation or V4 delegations.
2806          */
2807         error = VOP_LOOKUP(fvp, name, &srcvp, NULL, 0,
2808             NULL, cr, NULL, NULL, NULL);
2809         if (error != 0)
2810                 goto err;
2811 
2812         /*
2813          * If we rename a delegated file we should recall the
2814          * delegation, since future opens should fail or would
2815          * refer to a new file.
2816          */
2817         if (rfs4_check_delegated(FWRITE, srcvp, FALSE)) {
2818                 resp->status = NFS3ERR_JUKEBOX;
2819                 goto err1;
2820         }
2821 
2822         /*
2823          * Check for renaming over a delegated file.  Check rfs4_deleg_policy
2824          * first to avoid VOP_LOOKUP if possible.
2825          */
2826         if (rfs4_deleg_policy != SRV_NEVER_DELEGATE &&
2827             VOP_LOOKUP(tvp, toname, &targvp, NULL, 0, NULL, cr,
2828             NULL, NULL, NULL) == 0) {
2829 
2830                 if (rfs4_check_delegated(FWRITE, targvp, TRUE)) {
2831                         VN_RELE(targvp);
2832                         resp->status = NFS3ERR_JUKEBOX;
2833                         goto err1;
2834                 }
2835                 VN_RELE(targvp);
2836         }
2837 
2838         if (!nbl_need_check(srcvp)) {
2839                 error = VOP_RENAME(fvp, name, tvp, toname, cr, NULL, 0);
2840         } else {
2841                 nbl_start_crit(srcvp, RW_READER);
2842                 if (nbl_conflict(srcvp, NBL_RENAME, 0, 0, 0, NULL))
2843                         error = EACCES;
2844                 else
2845                         error = VOP_RENAME(fvp, name, tvp, toname, cr, NULL, 0);
2846                 nbl_end_crit(srcvp);
2847         }
2848         if (error == 0)
2849                 vn_renamepath(tvp, srcvp, args->to.name,
2850                     strlen(args->to.name));
2851         VN_RELE(srcvp);
2852         srcvp = NULL;
2853 
2854         fava.va_mask = AT_ALL;
2855         favap = VOP_GETATTR(fvp, &fava, 0, cr, NULL) ? NULL : &fava;
2856         tava.va_mask = AT_ALL;
2857         tavap = VOP_GETATTR(tvp, &tava, 0, cr, NULL) ? NULL : &tava;
2858 
2859         /*
2860          * Force modified data and metadata out to stable storage.
2861          */
2862         (void) VOP_FSYNC(fvp, 0, cr, NULL);
2863         (void) VOP_FSYNC(tvp, 0, cr, NULL);
2864 
2865         if (error)
2866                 goto err;
2867 
2868         resp->status = NFS3_OK;
2869         vattr_to_wcc_data(fbvap, favap, &resp->resok.fromdir_wcc);
2870         vattr_to_wcc_data(tbvap, tavap, &resp->resok.todir_wcc);
2871         goto out;
2872 
2873 err:
2874         if (curthread->t_flag & T_WOULDBLOCK) {
2875                 curthread->t_flag &= ~T_WOULDBLOCK;
2876                 resp->status = NFS3ERR_JUKEBOX;
2877         } else {
2878                 resp->status = puterrno3(error);
2879         }
2880 err1:
2881         vattr_to_wcc_data(fbvap, favap, &resp->resfail.fromdir_wcc);
2882         vattr_to_wcc_data(tbvap, tavap, &resp->resfail.todir_wcc);
2883 
2884 out:
2885         if (name != NULL && name != args->from.name)
2886                 kmem_free(name, MAXPATHLEN + 1);
2887         if (toname != NULL && toname != args->to.name)
2888                 kmem_free(toname, MAXPATHLEN + 1);
2889 
2890         DTRACE_NFSV3_4(op__rename__done, struct svc_req *, req,
2891             cred_t *, cr, vnode_t *, fvp, RENAME3res *, resp);
2892         if (fvp != NULL)
2893                 VN_RELE(fvp);
2894         if (tvp != NULL)
2895                 VN_RELE(tvp);
2896 }
2897 
2898 void *
2899 rfs3_rename_getfh(RENAME3args *args)
2900 {
2901 
2902         return (&args->from.dir);
2903 }
2904 
2905 void
2906 rfs3_link(LINK3args *args, LINK3res *resp, struct exportinfo *exi,
2907     struct svc_req *req, cred_t *cr, bool_t ro)
2908 {
2909         int error;
2910         vnode_t *vp;
2911         vnode_t *dvp;
2912         struct vattr *vap;
2913         struct vattr va;
2914         struct vattr *bvap;
2915         struct vattr bva;
2916         struct vattr *avap;
2917         struct vattr ava;
2918         nfs_fh3 *fh3;
2919         struct exportinfo *to_exi;
2920         bslabel_t *clabel;
2921         struct sockaddr *ca;
2922         char *name = NULL;
2923 
2924         vap = NULL;
2925         bvap = NULL;
2926         avap = NULL;
2927         dvp = NULL;
2928 
2929         vp = nfs3_fhtovp(&args->file, exi);
2930 
2931         DTRACE_NFSV3_4(op__link__start, struct svc_req *, req,
2932             cred_t *, cr, vnode_t *, vp, LINK3args *, args);
2933 
2934         if (vp == NULL) {
2935                 error = ESTALE;
2936                 goto out;
2937         }
2938 
2939         va.va_mask = AT_ALL;
2940         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
2941 
2942         fh3 = &args->link.dir;
2943         to_exi = checkexport(&fh3->fh3_fsid, FH3TOXFIDP(fh3));
2944         if (to_exi == NULL) {
2945                 resp->status = NFS3ERR_ACCES;
2946                 goto out1;
2947         }
2948         exi_rele(to_exi);
2949 
2950         if (to_exi != exi) {
2951                 resp->status = NFS3ERR_XDEV;
2952                 goto out1;
2953         }
2954 
2955         if (is_system_labeled()) {
2956                 clabel = req->rq_label;
2957 
2958                 ASSERT(clabel != NULL);
2959                 DTRACE_PROBE2(tx__rfs3__log__info__oplink__clabel, char *,
2960                     "got client label from request(1)", struct svc_req *, req);
2961 
2962                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2963                         if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
2964                             exi)) {
2965                                 resp->status = NFS3ERR_ACCES;
2966                                 goto out1;
2967                         }
2968                 }
2969         }
2970 
2971         dvp = nfs3_fhtovp(&args->link.dir, exi);
2972         if (dvp == NULL) {
2973                 error = ESTALE;
2974                 goto out;
2975         }
2976 
2977         bva.va_mask = AT_ALL;
2978         bvap = VOP_GETATTR(dvp, &bva, 0, cr, NULL) ? NULL : &bva;
2979 
2980         if (dvp->v_type != VDIR) {
2981                 resp->status = NFS3ERR_NOTDIR;
2982                 goto out1;
2983         }
2984 
2985         if (args->link.name == nfs3nametoolong) {
2986                 resp->status = NFS3ERR_NAMETOOLONG;
2987                 goto out1;
2988         }
2989 
2990         if (args->link.name == NULL || *(args->link.name) == '\0') {
2991                 resp->status = NFS3ERR_ACCES;
2992                 goto out1;
2993         }
2994 
2995         if (rdonly(ro, dvp)) {
2996                 resp->status = NFS3ERR_ROFS;
2997                 goto out1;
2998         }
2999 
3000         if (is_system_labeled()) {
3001                 DTRACE_PROBE2(tx__rfs3__log__info__oplinkdir__clabel, char *,
3002                     "got client label from request(1)", struct svc_req *, req);
3003 
3004                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3005                         if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
3006                             exi)) {
3007                                 resp->status = NFS3ERR_ACCES;
3008                                 goto out1;
3009                         }
3010                 }
3011         }
3012 
3013         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
3014         name = nfscmd_convname(ca, exi, args->link.name,
3015             NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
3016 
3017         if (name == NULL) {
3018                 resp->status = NFS3ERR_SERVERFAULT;
3019                 goto out1;
3020         }
3021 
3022         error = VOP_LINK(dvp, vp, name, cr, NULL, 0);
3023 
3024         va.va_mask = AT_ALL;
3025         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3026         ava.va_mask = AT_ALL;
3027         avap = VOP_GETATTR(dvp, &ava, 0, cr, NULL) ? NULL : &ava;
3028 
3029         /*
3030          * Force modified data and metadata out to stable storage.
3031          */
3032         (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
3033         (void) VOP_FSYNC(dvp, 0, cr, NULL);
3034 
3035         if (error)
3036                 goto out;
3037 
3038         VN_RELE(dvp);
3039 
3040         resp->status = NFS3_OK;
3041         vattr_to_post_op_attr(vap, &resp->resok.file_attributes);
3042         vattr_to_wcc_data(bvap, avap, &resp->resok.linkdir_wcc);
3043 
3044         DTRACE_NFSV3_4(op__link__done, struct svc_req *, req,
3045             cred_t *, cr, vnode_t *, vp, LINK3res *, resp);
3046 
3047         VN_RELE(vp);
3048 
3049         return;
3050 
3051 out:
3052         if (curthread->t_flag & T_WOULDBLOCK) {
3053                 curthread->t_flag &= ~T_WOULDBLOCK;
3054                 resp->status = NFS3ERR_JUKEBOX;
3055         } else
3056                 resp->status = puterrno3(error);
3057 out1:
3058         if (name != NULL && name != args->link.name)
3059                 kmem_free(name, MAXPATHLEN + 1);
3060 
3061         DTRACE_NFSV3_4(op__link__done, struct svc_req *, req,
3062             cred_t *, cr, vnode_t *, vp, LINK3res *, resp);
3063 
3064         if (vp != NULL)
3065                 VN_RELE(vp);
3066         if (dvp != NULL)
3067                 VN_RELE(dvp);
3068         vattr_to_post_op_attr(vap, &resp->resfail.file_attributes);
3069         vattr_to_wcc_data(bvap, avap, &resp->resfail.linkdir_wcc);
3070 }
3071 
3072 void *
3073 rfs3_link_getfh(LINK3args *args)
3074 {
3075 
3076         return (&args->file);
3077 }
3078 
3079 /*
3080  * This macro defines the size of a response which contains attribute
3081  * information and one directory entry (whose length is specified by
3082  * the macro parameter).  If the incoming request is larger than this,
3083  * then we are guaranteed to be able to return at one directory entry
3084  * if one exists.  Therefore, we do not need to check for
3085  * NFS3ERR_TOOSMALL if the requested size is larger then this.  If it
3086  * is not, then we need to check to make sure that this error does not
3087  * need to be returned.
3088  *
3089  * NFS3_READDIR_MIN_COUNT is comprised of following :
3090  *
3091  * status - 1 * BYTES_PER_XDR_UNIT
3092  * attr. flag - 1 * BYTES_PER_XDR_UNIT
3093  * cookie verifier - 2 * BYTES_PER_XDR_UNIT
3094  * attributes  - NFS3_SIZEOF_FATTR3 * BYTES_PER_XDR_UNIT
3095  * boolean - 1 * BYTES_PER_XDR_UNIT
3096  * file id - 2 * BYTES_PER_XDR_UNIT
3097  * directory name length - 1 * BYTES_PER_XDR_UNIT
3098  * cookie - 2 * BYTES_PER_XDR_UNIT
3099  * end of list - 1 * BYTES_PER_XDR_UNIT
3100  * end of file - 1 * BYTES_PER_XDR_UNIT
3101  * Name length of directory to the nearest byte
3102  */
3103 
3104 #define NFS3_READDIR_MIN_COUNT(length)  \
3105         ((1 + 1 + 2 + NFS3_SIZEOF_FATTR3 + 1 + 2 + 1 + 2 + 1 + 1) * \
3106                 BYTES_PER_XDR_UNIT + roundup((length), BYTES_PER_XDR_UNIT))
3107 
3108 /* ARGSUSED */
3109 void
3110 rfs3_readdir(READDIR3args *args, READDIR3res *resp, struct exportinfo *exi,
3111     struct svc_req *req, cred_t *cr, bool_t ro)
3112 {
3113         int error;
3114         vnode_t *vp;
3115         struct vattr *vap;
3116         struct vattr va;
3117         struct iovec iov;
3118         struct uio uio;
3119         char *data;
3120         int iseof;
3121         int bufsize;
3122         int namlen;
3123         uint_t count;
3124         struct sockaddr *ca;
3125 
3126         vap = NULL;
3127 
3128         vp = nfs3_fhtovp(&args->dir, exi);
3129 
3130         DTRACE_NFSV3_4(op__readdir__start, struct svc_req *, req,
3131             cred_t *, cr, vnode_t *, vp, READDIR3args *, args);
3132 
3133         if (vp == NULL) {
3134                 error = ESTALE;
3135                 goto out;
3136         }
3137 
3138         if (is_system_labeled()) {
3139                 bslabel_t *clabel = req->rq_label;
3140 
3141                 ASSERT(clabel != NULL);
3142                 DTRACE_PROBE2(tx__rfs3__log__info__opreaddir__clabel, char *,
3143                     "got client label from request(1)", struct svc_req *, req);
3144 
3145                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3146                         if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3147                             exi)) {
3148                                 resp->status = NFS3ERR_ACCES;
3149                                 goto out1;
3150                         }
3151                 }
3152         }
3153 
3154         (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL);
3155 
3156         va.va_mask = AT_ALL;
3157         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3158 
3159         if (vp->v_type != VDIR) {
3160                 resp->status = NFS3ERR_NOTDIR;
3161                 goto out1;
3162         }
3163 
3164         error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
3165         if (error)
3166                 goto out;
3167 
3168         /*
3169          * Now don't allow arbitrary count to alloc;
3170          * allow the maximum not to exceed rfs3_tsize()
3171          */
3172         if (args->count > rfs3_tsize(req))
3173                 args->count = rfs3_tsize(req);
3174 
3175         /*
3176          * Make sure that there is room to read at least one entry
3177          * if any are available.
3178          */
3179         if (args->count < DIRENT64_RECLEN(MAXNAMELEN))
3180                 count = DIRENT64_RECLEN(MAXNAMELEN);
3181         else
3182                 count = args->count;
3183 
3184         data = kmem_alloc(count, KM_SLEEP);
3185 
3186         iov.iov_base = data;
3187         iov.iov_len = count;
3188         uio.uio_iov = &iov;
3189         uio.uio_iovcnt = 1;
3190         uio.uio_segflg = UIO_SYSSPACE;
3191         uio.uio_extflg = UIO_COPY_CACHED;
3192         uio.uio_loffset = (offset_t)args->cookie;
3193         uio.uio_resid = count;
3194 
3195         error = VOP_READDIR(vp, &uio, cr, &iseof, NULL, 0);
3196 
3197         va.va_mask = AT_ALL;
3198         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3199 
3200         if (error) {
3201                 kmem_free(data, count);
3202                 goto out;
3203         }
3204 
3205         /*
3206          * If the count was not large enough to be able to guarantee
3207          * to be able to return at least one entry, then need to
3208          * check to see if NFS3ERR_TOOSMALL should be returned.
3209          */
3210         if (args->count < NFS3_READDIR_MIN_COUNT(MAXNAMELEN)) {
3211                 /*
3212                  * bufsize is used to keep track of the size of the response.
3213                  * It is primed with:
3214                  *      1 for the status +
3215                  *      1 for the dir_attributes.attributes boolean +
3216                  *      2 for the cookie verifier
3217                  * all times BYTES_PER_XDR_UNIT to convert from XDR units
3218                  * to bytes.  If there are directory attributes to be
3219                  * returned, then:
3220                  *      NFS3_SIZEOF_FATTR3 for the dir_attributes.attr fattr3
3221                  * time BYTES_PER_XDR_UNIT is added to account for them.
3222                  */
3223                 bufsize = (1 + 1 + 2) * BYTES_PER_XDR_UNIT;
3224                 if (vap != NULL)
3225                         bufsize += NFS3_SIZEOF_FATTR3 * BYTES_PER_XDR_UNIT;
3226                 /*
3227                  * An entry is composed of:
3228                  *      1 for the true/false list indicator +
3229                  *      2 for the fileid +
3230                  *      1 for the length of the name +
3231                  *      2 for the cookie +
3232                  * all times BYTES_PER_XDR_UNIT to convert from
3233                  * XDR units to bytes, plus the length of the name
3234                  * rounded up to the nearest BYTES_PER_XDR_UNIT.
3235                  */
3236                 if (count != uio.uio_resid) {
3237                         namlen = strlen(((struct dirent64 *)data)->d_name);
3238                         bufsize += (1 + 2 + 1 + 2) * BYTES_PER_XDR_UNIT +
3239                             roundup(namlen, BYTES_PER_XDR_UNIT);
3240                 }
3241                 /*
3242                  * We need to check to see if the number of bytes left
3243                  * to go into the buffer will actually fit into the
3244                  * buffer.  This is calculated as the size of this
3245                  * entry plus:
3246                  *      1 for the true/false list indicator +
3247                  *      1 for the eof indicator
3248                  * times BYTES_PER_XDR_UNIT to convert from from
3249                  * XDR units to bytes.
3250                  */
3251                 bufsize += (1 + 1) * BYTES_PER_XDR_UNIT;
3252                 if (bufsize > args->count) {
3253                         kmem_free(data, count);
3254                         resp->status = NFS3ERR_TOOSMALL;
3255                         goto out1;
3256                 }
3257         }
3258 
3259         /*
3260          * Have a valid readir buffer for the native character
3261          * set. Need to check if a conversion is necessary and
3262          * potentially rewrite the whole buffer. Note that if the
3263          * conversion expands names enough, the structure may not
3264          * fit. In this case, we need to drop entries until if fits
3265          * and patch the counts in order that the next readdir will
3266          * get the correct entries.
3267          */
3268         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
3269         data = nfscmd_convdirent(ca, exi, data, count, &resp->status);
3270 
3271 
3272         VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
3273 
3274 #if 0 /* notyet */
3275         /*
3276          * Don't do this.  It causes local disk writes when just
3277          * reading the file and the overhead is deemed larger
3278          * than the benefit.
3279          */
3280         /*
3281          * Force modified metadata out to stable storage.
3282          */
3283         (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
3284 #endif
3285 
3286         resp->status = NFS3_OK;
3287         vattr_to_post_op_attr(vap, &resp->resok.dir_attributes);
3288         resp->resok.cookieverf = 0;
3289         resp->resok.reply.entries = (entry3 *)data;
3290         resp->resok.reply.eof = iseof;
3291         resp->resok.size = count - uio.uio_resid;
3292         resp->resok.count = args->count;
3293         resp->resok.freecount = count;
3294 
3295         DTRACE_NFSV3_4(op__readdir__done, struct svc_req *, req,
3296             cred_t *, cr, vnode_t *, vp, READDIR3res *, resp);
3297 
3298         VN_RELE(vp);
3299 
3300         return;
3301 
3302 out:
3303         if (curthread->t_flag & T_WOULDBLOCK) {
3304                 curthread->t_flag &= ~T_WOULDBLOCK;
3305                 resp->status = NFS3ERR_JUKEBOX;
3306         } else
3307                 resp->status = puterrno3(error);
3308 out1:
3309         DTRACE_NFSV3_4(op__readdir__done, struct svc_req *, req,
3310             cred_t *, cr, vnode_t *, vp, READDIR3res *, resp);
3311 
3312         if (vp != NULL) {
3313                 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
3314                 VN_RELE(vp);
3315         }
3316         vattr_to_post_op_attr(vap, &resp->resfail.dir_attributes);
3317 }
3318 
3319 void *
3320 rfs3_readdir_getfh(READDIR3args *args)
3321 {
3322 
3323         return (&args->dir);
3324 }
3325 
3326 void
3327 rfs3_readdir_free(READDIR3res *resp)
3328 {
3329 
3330         if (resp->status == NFS3_OK)
3331                 kmem_free(resp->resok.reply.entries, resp->resok.freecount);
3332 }
3333 
3334 #ifdef nextdp
3335 #undef nextdp
3336 #endif
3337 #define nextdp(dp)      ((struct dirent64 *)((char *)(dp) + (dp)->d_reclen))
3338 
3339 /*
3340  * This macro computes the size of a response which contains
3341  * one directory entry including the attributes as well as file handle.
3342  * If the incoming request is larger than this, then we are guaranteed to be
3343  * able to return at least one more directory entry if one exists.
3344  *
3345  * NFS3_READDIRPLUS_ENTRY is made up of the following:
3346  *
3347  * boolean - 1 * BYTES_PER_XDR_UNIT
3348  * file id - 2 * BYTES_PER_XDR_UNIT
3349  * directory name length - 1 * BYTES_PER_XDR_UNIT
3350  * cookie - 2 * BYTES_PER_XDR_UNIT
3351  * attribute flag - 1 * BYTES_PER_XDR_UNIT
3352  * attributes - NFS3_SIZEOF_FATTR3 * BYTES_PER_XDR_UNIT
3353  * status byte for file handle - 1 *  BYTES_PER_XDR_UNIT
3354  * length of a file handle - 1 * BYTES_PER_XDR_UNIT
3355  * Maximum length of a file handle (NFS3_MAXFHSIZE)
3356  * name length of the entry to the nearest bytes
3357  */
3358 #define NFS3_READDIRPLUS_ENTRY(namelen) \
3359         ((1 + 2 + 1 + 2 + 1 + NFS3_SIZEOF_FATTR3 + 1 + 1) * \
3360                 BYTES_PER_XDR_UNIT + \
3361         NFS3_MAXFHSIZE + roundup(namelen, BYTES_PER_XDR_UNIT))
3362 
3363 static int rfs3_readdir_unit = MAXBSIZE;
3364 
3365 /* ARGSUSED */
3366 void
3367 rfs3_readdirplus(READDIRPLUS3args *args, READDIRPLUS3res *resp,
3368     struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
3369 {
3370         int error;
3371         vnode_t *vp;
3372         struct vattr *vap;
3373         struct vattr va;
3374         struct iovec iov;
3375         struct uio uio;
3376         char *data;
3377         int iseof;
3378         struct dirent64 *dp;
3379         vnode_t *nvp;
3380         struct vattr *nvap;
3381         struct vattr nva;
3382         entryplus3_info *infop = NULL;
3383         int size = 0;
3384         int nents = 0;
3385         int bufsize = 0;
3386         int entrysize = 0;
3387         int tofit = 0;
3388         int rd_unit = rfs3_readdir_unit;
3389         int prev_len;
3390         int space_left;
3391         int i;
3392         uint_t *namlen = NULL;
3393         char *ndata = NULL;
3394         struct sockaddr *ca;
3395         size_t ret;
3396 
3397         vap = NULL;
3398 
3399         vp = nfs3_fhtovp(&args->dir, exi);
3400 
3401         DTRACE_NFSV3_4(op__readdirplus__start, struct svc_req *, req,
3402             cred_t *, cr, vnode_t *, vp, READDIRPLUS3args *, args);
3403 
3404         if (vp == NULL) {
3405                 error = ESTALE;
3406                 goto out;
3407         }
3408 
3409         if (is_system_labeled()) {
3410                 bslabel_t *clabel = req->rq_label;
3411 
3412                 ASSERT(clabel != NULL);
3413                 DTRACE_PROBE2(tx__rfs3__log__info__opreaddirplus__clabel,
3414                     char *, "got client label from request(1)",
3415                     struct svc_req *, req);
3416 
3417                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3418                         if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3419                             exi)) {
3420                                 resp->status = NFS3ERR_ACCES;
3421                                 goto out1;
3422                         }
3423                 }
3424         }
3425 
3426         (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL);
3427 
3428         va.va_mask = AT_ALL;
3429         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3430 
3431         if (vp->v_type != VDIR) {
3432                 error = ENOTDIR;
3433                 goto out;
3434         }
3435 
3436         error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
3437         if (error)
3438                 goto out;
3439 
3440         /*
3441          * Don't allow arbitrary counts for allocation
3442          */
3443         if (args->maxcount > rfs3_tsize(req))
3444                 args->maxcount = rfs3_tsize(req);
3445 
3446         /*
3447          * Make sure that there is room to read at least one entry
3448          * if any are available
3449          */
3450         args->dircount = MIN(args->dircount, args->maxcount);
3451 
3452         if (args->dircount < DIRENT64_RECLEN(MAXNAMELEN))
3453                 args->dircount = DIRENT64_RECLEN(MAXNAMELEN);
3454 
3455         /*
3456          * This allocation relies on a minimum directory entry
3457          * being roughly 24 bytes.  Therefore, the namlen array
3458          * will have enough space based on the maximum number of
3459          * entries to read.
3460          */
3461         namlen = kmem_alloc(args->dircount, KM_SLEEP);
3462 
3463         space_left = args->dircount;
3464         data = kmem_alloc(args->dircount, KM_SLEEP);
3465         dp = (struct dirent64 *)data;
3466         uio.uio_iov = &iov;
3467         uio.uio_iovcnt = 1;
3468         uio.uio_segflg = UIO_SYSSPACE;
3469         uio.uio_extflg = UIO_COPY_CACHED;
3470         uio.uio_loffset = (offset_t)args->cookie;
3471 
3472         /*
3473          * bufsize is used to keep track of the size of the response as we
3474          * get post op attributes and filehandles for each entry.  This is
3475          * an optimization as the server may have read more entries than will
3476          * fit in the buffer specified by maxcount.  We stop calculating
3477          * post op attributes and filehandles once we have exceeded maxcount.
3478          * This will minimize the effect of truncation.
3479          *
3480          * It is primed with:
3481          *      1 for the status +
3482          *      1 for the dir_attributes.attributes boolean +
3483          *      2 for the cookie verifier
3484          * all times BYTES_PER_XDR_UNIT to convert from XDR units
3485          * to bytes.  If there are directory attributes to be
3486          * returned, then:
3487          *      NFS3_SIZEOF_FATTR3 for the dir_attributes.attr fattr3
3488          * time BYTES_PER_XDR_UNIT is added to account for them.
3489          */
3490         bufsize = (1 + 1 + 2) * BYTES_PER_XDR_UNIT;
3491         if (vap != NULL)
3492                 bufsize += NFS3_SIZEOF_FATTR3 * BYTES_PER_XDR_UNIT;
3493 
3494 getmoredents:
3495         /*
3496          * Here we make a check so that our read unit is not larger than
3497          * the space left in the buffer.
3498          */
3499         rd_unit = MIN(rd_unit, space_left);
3500         iov.iov_base = (char *)dp;
3501         iov.iov_len = rd_unit;
3502         uio.uio_resid = rd_unit;
3503         prev_len = rd_unit;
3504 
3505         error = VOP_READDIR(vp, &uio, cr, &iseof, NULL, 0);
3506 
3507         if (error) {
3508                 kmem_free(data, args->dircount);
3509                 goto out;
3510         }
3511 
3512         if (uio.uio_resid == prev_len && !iseof) {
3513                 if (nents == 0) {
3514                         kmem_free(data, args->dircount);
3515                         resp->status = NFS3ERR_TOOSMALL;
3516                         goto out1;
3517                 }
3518 
3519                 /*
3520                  * We could not get any more entries, so get the attributes
3521                  * and filehandle for the entries already obtained.
3522                  */
3523                 goto good;
3524         }
3525 
3526         /*
3527          * We estimate the size of the response by assuming the
3528          * entry exists and attributes and filehandle are also valid
3529          */
3530         for (size = prev_len - uio.uio_resid;
3531             size > 0;
3532             size -= dp->d_reclen, dp = nextdp(dp)) {
3533 
3534                 if (dp->d_ino == 0) {
3535                         nents++;
3536                         continue;
3537                 }
3538 
3539                 namlen[nents] = strlen(dp->d_name);
3540                 entrysize = NFS3_READDIRPLUS_ENTRY(namlen[nents]);
3541 
3542                 /*
3543                  * We need to check to see if the number of bytes left
3544                  * to go into the buffer will actually fit into the
3545                  * buffer.  This is calculated as the size of this
3546                  * entry plus:
3547                  *      1 for the true/false list indicator +
3548                  *      1 for the eof indicator
3549                  * times BYTES_PER_XDR_UNIT to convert from XDR units
3550                  * to bytes.
3551                  *
3552                  * Also check the dircount limit against the first entry read
3553                  *
3554                  */
3555                 tofit = entrysize + (1 + 1) * BYTES_PER_XDR_UNIT;
3556                 if (bufsize + tofit > args->maxcount) {
3557                         /*
3558                          * We make a check here to see if this was the
3559                          * first entry being measured.  If so, then maxcount
3560                          * was too small to begin with and so we need to
3561                          * return with NFS3ERR_TOOSMALL.
3562                          */
3563                         if (nents == 0) {
3564                                 kmem_free(data, args->dircount);
3565                                 resp->status = NFS3ERR_TOOSMALL;
3566                                 goto out1;
3567                         }
3568                         iseof = FALSE;
3569                         goto good;
3570                 }
3571                 bufsize += entrysize;
3572                 nents++;
3573         }
3574 
3575         /*
3576          * If there is enough room to fit at least 1 more entry including
3577          * post op attributes and filehandle in the buffer AND that we haven't
3578          * exceeded dircount then go back and get some more.
3579          */
3580         if (!iseof &&
3581             (args->maxcount - bufsize) >= NFS3_READDIRPLUS_ENTRY(MAXNAMELEN)) {
3582                 space_left -= (prev_len - uio.uio_resid);
3583                 if (space_left >= DIRENT64_RECLEN(MAXNAMELEN))
3584                         goto getmoredents;
3585 
3586                 /* else, fall through */
3587         }
3588 good:
3589         va.va_mask = AT_ALL;
3590         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3591 
3592         VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
3593 
3594         infop = kmem_alloc(nents * sizeof (struct entryplus3_info), KM_SLEEP);
3595         resp->resok.infop = infop;
3596 
3597         dp = (struct dirent64 *)data;
3598         for (i = 0; i < nents; i++) {
3599 
3600                 if (dp->d_ino == 0) {
3601                         infop[i].attr.attributes = FALSE;
3602                         infop[i].fh.handle_follows = FALSE;
3603                         dp = nextdp(dp);
3604                         continue;
3605                 }
3606 
3607                 infop[i].namelen = namlen[i];
3608 
3609                 error = VOP_LOOKUP(vp, dp->d_name, &nvp, NULL, 0, NULL, cr,
3610                     NULL, NULL, NULL);
3611                 if (error) {
3612                         infop[i].attr.attributes = FALSE;
3613                         infop[i].fh.handle_follows = FALSE;
3614                         dp = nextdp(dp);
3615                         continue;
3616                 }
3617 
3618                 nva.va_mask = AT_ALL;
3619                 nvap = rfs4_delegated_getattr(nvp, &nva, 0, cr) ? NULL : &nva;
3620 
3621                 /* Lie about the object type for a referral */
3622                 if (vn_is_nfs_reparse(nvp, cr))
3623                         nvap->va_type = VLNK;
3624 
3625                 if (vn_ismntpt(nvp)) {
3626                         infop[i].attr.attributes = FALSE;
3627                         infop[i].fh.handle_follows = FALSE;
3628                 } else {
3629                         vattr_to_post_op_attr(nvap, &infop[i].attr);
3630 
3631                         error = makefh3(&infop[i].fh.handle, nvp, exi);
3632                         if (!error)
3633                                 infop[i].fh.handle_follows = TRUE;
3634                         else
3635                                 infop[i].fh.handle_follows = FALSE;
3636                 }
3637 
3638                 VN_RELE(nvp);
3639                 dp = nextdp(dp);
3640         }
3641 
3642         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
3643         ret = nfscmd_convdirplus(ca, exi, data, nents, args->dircount, &ndata);
3644         if (ndata == NULL)
3645                 ndata = data;
3646 
3647         if (ret > 0) {
3648                 /*
3649                  * We had to drop one or more entries in order to fit
3650                  * during the character conversion.  We need to patch
3651                  * up the size and eof info.
3652                  */
3653                 if (iseof)
3654                         iseof = FALSE;
3655 
3656                 ret = nfscmd_dropped_entrysize((struct dirent64 *)data,
3657                     nents, ret);
3658         }
3659 
3660 
3661 #if 0 /* notyet */
3662         /*
3663          * Don't do this.  It causes local disk writes when just
3664          * reading the file and the overhead is deemed larger
3665          * than the benefit.
3666          */
3667         /*
3668          * Force modified metadata out to stable storage.
3669          */
3670         (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
3671 #endif
3672 
3673         kmem_free(namlen, args->dircount);
3674 
3675         resp->status = NFS3_OK;
3676         vattr_to_post_op_attr(vap, &resp->resok.dir_attributes);
3677         resp->resok.cookieverf = 0;
3678         resp->resok.reply.entries = (entryplus3 *)ndata;
3679         resp->resok.reply.eof = iseof;
3680         resp->resok.size = nents;
3681         resp->resok.count = args->dircount - ret;
3682         resp->resok.maxcount = args->maxcount;
3683 
3684         DTRACE_NFSV3_4(op__readdirplus__done, struct svc_req *, req,
3685             cred_t *, cr, vnode_t *, vp, READDIRPLUS3res *, resp);
3686         if (ndata != data)
3687                 kmem_free(data, args->dircount);
3688 
3689 
3690         VN_RELE(vp);
3691 
3692         return;
3693 
3694 out:
3695         if (curthread->t_flag & T_WOULDBLOCK) {
3696                 curthread->t_flag &= ~T_WOULDBLOCK;
3697                 resp->status = NFS3ERR_JUKEBOX;
3698         } else {
3699                 resp->status = puterrno3(error);
3700         }
3701 out1:
3702         DTRACE_NFSV3_4(op__readdirplus__done, struct svc_req *, req,
3703             cred_t *, cr, vnode_t *, vp, READDIRPLUS3res *, resp);
3704 
3705         if (vp != NULL) {
3706                 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
3707                 VN_RELE(vp);
3708         }
3709 
3710         if (namlen != NULL)
3711                 kmem_free(namlen, args->dircount);
3712 
3713         vattr_to_post_op_attr(vap, &resp->resfail.dir_attributes);
3714 }
3715 
3716 void *
3717 rfs3_readdirplus_getfh(READDIRPLUS3args *args)
3718 {
3719 
3720         return (&args->dir);
3721 }
3722 
3723 void
3724 rfs3_readdirplus_free(READDIRPLUS3res *resp)
3725 {
3726 
3727         if (resp->status == NFS3_OK) {
3728                 kmem_free(resp->resok.reply.entries, resp->resok.count);
3729                 kmem_free(resp->resok.infop,
3730                     resp->resok.size * sizeof (struct entryplus3_info));
3731         }
3732 }
3733 
3734 /* ARGSUSED */
3735 void
3736 rfs3_fsstat(FSSTAT3args *args, FSSTAT3res *resp, struct exportinfo *exi,
3737     struct svc_req *req, cred_t *cr, bool_t ro)
3738 {
3739         int error;
3740         vnode_t *vp;
3741         struct vattr *vap;
3742         struct vattr va;
3743         struct statvfs64 sb;
3744 
3745         vap = NULL;
3746 
3747         vp = nfs3_fhtovp(&args->fsroot, exi);
3748 
3749         DTRACE_NFSV3_4(op__fsstat__start, struct svc_req *, req,
3750             cred_t *, cr, vnode_t *, vp, FSSTAT3args *, args);
3751 
3752         if (vp == NULL) {
3753                 error = ESTALE;
3754                 goto out;
3755         }
3756 
3757         if (is_system_labeled()) {
3758                 bslabel_t *clabel = req->rq_label;
3759 
3760                 ASSERT(clabel != NULL);
3761                 DTRACE_PROBE2(tx__rfs3__log__info__opfsstat__clabel, char *,
3762                     "got client label from request(1)", struct svc_req *, req);
3763 
3764                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3765                         if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3766                             exi)) {
3767                                 resp->status = NFS3ERR_ACCES;
3768                                 goto out1;
3769                         }
3770                 }
3771         }
3772 
3773         error = VFS_STATVFS(vp->v_vfsp, &sb);
3774 
3775         va.va_mask = AT_ALL;
3776         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3777 
3778         if (error)
3779                 goto out;
3780 
3781         resp->status = NFS3_OK;
3782         vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
3783         if (sb.f_blocks != (fsblkcnt64_t)-1)
3784                 resp->resok.tbytes = (size3)sb.f_frsize * (size3)sb.f_blocks;
3785         else
3786                 resp->resok.tbytes = (size3)sb.f_blocks;
3787         if (sb.f_bfree != (fsblkcnt64_t)-1)
3788                 resp->resok.fbytes = (size3)sb.f_frsize * (size3)sb.f_bfree;
3789         else
3790                 resp->resok.fbytes = (size3)sb.f_bfree;
3791         if (sb.f_bavail != (fsblkcnt64_t)-1)
3792                 resp->resok.abytes = (size3)sb.f_frsize * (size3)sb.f_bavail;
3793         else
3794                 resp->resok.abytes = (size3)sb.f_bavail;
3795         resp->resok.tfiles = (size3)sb.f_files;
3796         resp->resok.ffiles = (size3)sb.f_ffree;
3797         resp->resok.afiles = (size3)sb.f_favail;
3798         resp->resok.invarsec = 0;
3799 
3800         DTRACE_NFSV3_4(op__fsstat__done, struct svc_req *, req,
3801             cred_t *, cr, vnode_t *, vp, FSSTAT3res *, resp);
3802         VN_RELE(vp);
3803 
3804         return;
3805 
3806 out:
3807         if (curthread->t_flag & T_WOULDBLOCK) {
3808                 curthread->t_flag &= ~T_WOULDBLOCK;
3809                 resp->status = NFS3ERR_JUKEBOX;
3810         } else
3811                 resp->status = puterrno3(error);
3812 out1:
3813         DTRACE_NFSV3_4(op__fsstat__done, struct svc_req *, req,
3814             cred_t *, cr, vnode_t *, vp, FSSTAT3res *, resp);
3815 
3816         if (vp != NULL)
3817                 VN_RELE(vp);
3818         vattr_to_post_op_attr(vap, &resp->resfail.obj_attributes);
3819 }
3820 
3821 void *
3822 rfs3_fsstat_getfh(FSSTAT3args *args)
3823 {
3824 
3825         return (&args->fsroot);
3826 }
3827 
3828 /* ARGSUSED */
3829 void
3830 rfs3_fsinfo(FSINFO3args *args, FSINFO3res *resp, struct exportinfo *exi,
3831     struct svc_req *req, cred_t *cr, bool_t ro)
3832 {
3833         vnode_t *vp;
3834         struct vattr *vap;
3835         struct vattr va;
3836         uint32_t xfer_size;
3837         ulong_t l = 0;
3838         int error;
3839 
3840         vp = nfs3_fhtovp(&args->fsroot, exi);
3841 
3842         DTRACE_NFSV3_4(op__fsinfo__start, struct svc_req *, req,
3843             cred_t *, cr, vnode_t *, vp, FSINFO3args *, args);
3844 
3845         if (vp == NULL) {
3846                 if (curthread->t_flag & T_WOULDBLOCK) {
3847                         curthread->t_flag &= ~T_WOULDBLOCK;
3848                         resp->status = NFS3ERR_JUKEBOX;
3849                 } else
3850                         resp->status = NFS3ERR_STALE;
3851                 vattr_to_post_op_attr(NULL, &resp->resfail.obj_attributes);
3852                 goto out;
3853         }
3854 
3855         if (is_system_labeled()) {
3856                 bslabel_t *clabel = req->rq_label;
3857 
3858                 ASSERT(clabel != NULL);
3859                 DTRACE_PROBE2(tx__rfs3__log__info__opfsinfo__clabel, char *,
3860                     "got client label from request(1)", struct svc_req *, req);
3861 
3862                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3863                         if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3864                             exi)) {
3865                                 resp->status = NFS3ERR_STALE;
3866                                 vattr_to_post_op_attr(NULL,
3867                                     &resp->resfail.obj_attributes);
3868                                 goto out;
3869                         }
3870                 }
3871         }
3872 
3873         va.va_mask = AT_ALL;
3874         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3875 
3876         resp->status = NFS3_OK;
3877         vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
3878         xfer_size = rfs3_tsize(req);
3879         resp->resok.rtmax = xfer_size;
3880         resp->resok.rtpref = xfer_size;
3881         resp->resok.rtmult = DEV_BSIZE;
3882         resp->resok.wtmax = xfer_size;
3883         resp->resok.wtpref = xfer_size;
3884         resp->resok.wtmult = DEV_BSIZE;
3885         resp->resok.dtpref = MAXBSIZE;
3886 
3887         /*
3888          * Large file spec: want maxfilesize based on limit of
3889          * underlying filesystem.  We can guess 2^31-1 if need be.
3890          */
3891         error = VOP_PATHCONF(vp, _PC_FILESIZEBITS, &l, cr, NULL);
3892         if (error) {
3893                 resp->status = puterrno3(error);
3894                 goto out;
3895         }
3896 
3897         /*
3898          * If the underlying file system does not support _PC_FILESIZEBITS,
3899          * return a reasonable default. Note that error code on VOP_PATHCONF
3900          * will be 0, even if the underlying file system does not support
3901          * _PC_FILESIZEBITS.
3902          */
3903         if (l == (ulong_t)-1) {
3904                 resp->resok.maxfilesize = MAXOFF32_T;
3905         } else {
3906                 if (l >= (sizeof (uint64_t) * 8))
3907                         resp->resok.maxfilesize = INT64_MAX;
3908                 else
3909                         resp->resok.maxfilesize = (1LL << (l-1)) - 1;
3910         }
3911 
3912         resp->resok.time_delta.seconds = 0;
3913         resp->resok.time_delta.nseconds = 1000;
3914         resp->resok.properties = FSF3_LINK | FSF3_SYMLINK |
3915             FSF3_HOMOGENEOUS | FSF3_CANSETTIME;
3916 
3917         DTRACE_NFSV3_4(op__fsinfo__done, struct svc_req *, req,
3918             cred_t *, cr, vnode_t *, vp, FSINFO3res *, resp);
3919 
3920         VN_RELE(vp);
3921 
3922         return;
3923 
3924 out:
3925         DTRACE_NFSV3_4(op__fsinfo__done, struct svc_req *, req,
3926             cred_t *, cr, vnode_t *, NULL, FSINFO3res *, resp);
3927         if (vp != NULL)
3928                 VN_RELE(vp);
3929 }
3930 
3931 void *
3932 rfs3_fsinfo_getfh(FSINFO3args *args)
3933 {
3934         return (&args->fsroot);
3935 }
3936 
3937 /* ARGSUSED */
3938 void
3939 rfs3_pathconf(PATHCONF3args *args, PATHCONF3res *resp, struct exportinfo *exi,
3940     struct svc_req *req, cred_t *cr, bool_t ro)
3941 {
3942         int error;
3943         vnode_t *vp;
3944         struct vattr *vap;
3945         struct vattr va;
3946         ulong_t val;
3947 
3948         vap = NULL;
3949 
3950         vp = nfs3_fhtovp(&args->object, exi);
3951 
3952         DTRACE_NFSV3_4(op__pathconf__start, struct svc_req *, req,
3953             cred_t *, cr, vnode_t *, vp, PATHCONF3args *, args);
3954 
3955         if (vp == NULL) {
3956                 error = ESTALE;
3957                 goto out;
3958         }
3959 
3960         if (is_system_labeled()) {
3961                 bslabel_t *clabel = req->rq_label;
3962 
3963                 ASSERT(clabel != NULL);
3964                 DTRACE_PROBE2(tx__rfs3__log__info__oppathconf__clabel, char *,
3965                     "got client label from request(1)", struct svc_req *, req);
3966 
3967                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3968                         if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3969                             exi)) {
3970                                 resp->status = NFS3ERR_ACCES;
3971                                 goto out1;
3972                         }
3973                 }
3974         }
3975 
3976         va.va_mask = AT_ALL;
3977         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3978 
3979         error = VOP_PATHCONF(vp, _PC_LINK_MAX, &val, cr, NULL);
3980         if (error)
3981                 goto out;
3982         resp->resok.info.link_max = (uint32)val;
3983 
3984         error = VOP_PATHCONF(vp, _PC_NAME_MAX, &val, cr, NULL);
3985         if (error)
3986                 goto out;
3987         resp->resok.info.name_max = (uint32)val;
3988 
3989         error = VOP_PATHCONF(vp, _PC_NO_TRUNC, &val, cr, NULL);
3990         if (error)
3991                 goto out;
3992         if (val == 1)
3993                 resp->resok.info.no_trunc = TRUE;
3994         else
3995                 resp->resok.info.no_trunc = FALSE;
3996 
3997         error = VOP_PATHCONF(vp, _PC_CHOWN_RESTRICTED, &val, cr, NULL);
3998         if (error)
3999                 goto out;
4000         if (val == 1)
4001                 resp->resok.info.chown_restricted = TRUE;
4002         else
4003                 resp->resok.info.chown_restricted = FALSE;
4004 
4005         resp->status = NFS3_OK;
4006         vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
4007         resp->resok.info.case_insensitive = FALSE;
4008         resp->resok.info.case_preserving = TRUE;
4009         DTRACE_NFSV3_4(op__pathconf__done, struct svc_req *, req,
4010             cred_t *, cr, vnode_t *, vp, PATHCONF3res *, resp);
4011         VN_RELE(vp);
4012         return;
4013 
4014 out:
4015         if (curthread->t_flag & T_WOULDBLOCK) {
4016                 curthread->t_flag &= ~T_WOULDBLOCK;
4017                 resp->status = NFS3ERR_JUKEBOX;
4018         } else
4019                 resp->status = puterrno3(error);
4020 out1:
4021         DTRACE_NFSV3_4(op__pathconf__done, struct svc_req *, req,
4022             cred_t *, cr, vnode_t *, vp, PATHCONF3res *, resp);
4023         if (vp != NULL)
4024                 VN_RELE(vp);
4025         vattr_to_post_op_attr(vap, &resp->resfail.obj_attributes);
4026 }
4027 
4028 void *
4029 rfs3_pathconf_getfh(PATHCONF3args *args)
4030 {
4031 
4032         return (&args->object);
4033 }
4034 
4035 void
4036 rfs3_commit(COMMIT3args *args, COMMIT3res *resp, struct exportinfo *exi,
4037     struct svc_req *req, cred_t *cr, bool_t ro)
4038 {
4039         int error;
4040         vnode_t *vp;
4041         struct vattr *bvap;
4042         struct vattr bva;
4043         struct vattr *avap;
4044         struct vattr ava;
4045 
4046         bvap = NULL;
4047         avap = NULL;
4048 
4049         vp = nfs3_fhtovp(&args->file, exi);
4050 
4051         DTRACE_NFSV3_4(op__commit__start, struct svc_req *, req,
4052             cred_t *, cr, vnode_t *, vp, COMMIT3args *, args);
4053 
4054         if (vp == NULL) {
4055                 error = ESTALE;
4056                 goto out;
4057         }
4058 
4059         bva.va_mask = AT_ALL;
4060         error = VOP_GETATTR(vp, &bva, 0, cr, NULL);
4061 
4062         /*
4063          * If we can't get the attributes, then we can't do the
4064          * right access checking.  So, we'll fail the request.
4065          */
4066         if (error)
4067                 goto out;
4068 
4069         bvap = &bva;
4070 
4071         if (rdonly(ro, vp)) {
4072                 resp->status = NFS3ERR_ROFS;
4073                 goto out1;
4074         }
4075 
4076         if (vp->v_type != VREG) {
4077                 resp->status = NFS3ERR_INVAL;
4078                 goto out1;
4079         }
4080 
4081         if (is_system_labeled()) {
4082                 bslabel_t *clabel = req->rq_label;
4083 
4084                 ASSERT(clabel != NULL);
4085                 DTRACE_PROBE2(tx__rfs3__log__info__opcommit__clabel, char *,
4086                     "got client label from request(1)", struct svc_req *, req);
4087 
4088                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
4089                         if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
4090                             exi)) {
4091                                 resp->status = NFS3ERR_ACCES;
4092                                 goto out1;
4093                         }
4094                 }
4095         }
4096 
4097         if (crgetuid(cr) != bva.va_uid &&
4098             (error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL)))
4099                 goto out;
4100 
4101         error = VOP_FSYNC(vp, FSYNC, cr, NULL);
4102 
4103         ava.va_mask = AT_ALL;
4104         avap = VOP_GETATTR(vp, &ava, 0, cr, NULL) ? NULL : &ava;
4105 
4106         if (error)
4107                 goto out;
4108 
4109         resp->status = NFS3_OK;
4110         vattr_to_wcc_data(bvap, avap, &resp->resok.file_wcc);
4111         resp->resok.verf = write3verf;
4112 
4113         DTRACE_NFSV3_4(op__commit__done, struct svc_req *, req,
4114             cred_t *, cr, vnode_t *, vp, COMMIT3res *, resp);
4115 
4116         VN_RELE(vp);
4117 
4118         return;
4119 
4120 out:
4121         if (curthread->t_flag & T_WOULDBLOCK) {
4122                 curthread->t_flag &= ~T_WOULDBLOCK;
4123                 resp->status = NFS3ERR_JUKEBOX;
4124         } else
4125                 resp->status = puterrno3(error);
4126 out1:
4127         DTRACE_NFSV3_4(op__commit__done, struct svc_req *, req,
4128             cred_t *, cr, vnode_t *, vp, COMMIT3res *, resp);
4129 
4130         if (vp != NULL)
4131                 VN_RELE(vp);
4132         vattr_to_wcc_data(bvap, avap, &resp->resfail.file_wcc);
4133 }
4134 
4135 void *
4136 rfs3_commit_getfh(COMMIT3args *args)
4137 {
4138 
4139         return (&args->file);
4140 }
4141 
4142 static int
4143 sattr3_to_vattr(sattr3 *sap, struct vattr *vap)
4144 {
4145 
4146         vap->va_mask = 0;
4147 
4148         if (sap->mode.set_it) {
4149                 vap->va_mode = (mode_t)sap->mode.mode;
4150                 vap->va_mask |= AT_MODE;
4151         }
4152         if (sap->uid.set_it) {
4153                 vap->va_uid = (uid_t)sap->uid.uid;
4154                 vap->va_mask |= AT_UID;
4155         }
4156         if (sap->gid.set_it) {
4157                 vap->va_gid = (gid_t)sap->gid.gid;
4158                 vap->va_mask |= AT_GID;
4159         }
4160         if (sap->size.set_it) {
4161                 if (sap->size.size > (size3)((u_longlong_t)-1))
4162                         return (EINVAL);
4163                 vap->va_size = sap->size.size;
4164                 vap->va_mask |= AT_SIZE;
4165         }
4166         if (sap->atime.set_it == SET_TO_CLIENT_TIME) {
4167 #ifndef _LP64
4168                 /* check time validity */
4169                 if (!NFS3_TIME_OK(sap->atime.atime.seconds))
4170                         return (EOVERFLOW);
4171 #endif
4172                 /*
4173                  * nfs protocol defines times as unsigned so don't extend sign,
4174                  * unless sysadmin set nfs_allow_preepoch_time.
4175                  */
4176                 NFS_TIME_T_CONVERT(vap->va_atime.tv_sec,
4177                     sap->atime.atime.seconds);
4178                 vap->va_atime.tv_nsec = (uint32_t)sap->atime.atime.nseconds;
4179                 vap->va_mask |= AT_ATIME;
4180         } else if (sap->atime.set_it == SET_TO_SERVER_TIME) {
4181                 gethrestime(&vap->va_atime);
4182                 vap->va_mask |= AT_ATIME;
4183         }
4184         if (sap->mtime.set_it == SET_TO_CLIENT_TIME) {
4185 #ifndef _LP64
4186                 /* check time validity */
4187                 if (!NFS3_TIME_OK(sap->mtime.mtime.seconds))
4188                         return (EOVERFLOW);
4189 #endif
4190                 /*
4191                  * nfs protocol defines times as unsigned so don't extend sign,
4192                  * unless sysadmin set nfs_allow_preepoch_time.
4193                  */
4194                 NFS_TIME_T_CONVERT(vap->va_mtime.tv_sec,
4195                     sap->mtime.mtime.seconds);
4196                 vap->va_mtime.tv_nsec = (uint32_t)sap->mtime.mtime.nseconds;
4197                 vap->va_mask |= AT_MTIME;
4198         } else if (sap->mtime.set_it == SET_TO_SERVER_TIME) {
4199                 gethrestime(&vap->va_mtime);
4200                 vap->va_mask |= AT_MTIME;
4201         }
4202 
4203         return (0);
4204 }
4205 
4206 static ftype3 vt_to_nf3[] = {
4207         0, NF3REG, NF3DIR, NF3BLK, NF3CHR, NF3LNK, NF3FIFO, 0, 0, NF3SOCK, 0
4208 };
4209 
4210 static int
4211 vattr_to_fattr3(struct vattr *vap, fattr3 *fap)
4212 {
4213 
4214         ASSERT(vap->va_type >= VNON && vap->va_type <= VBAD);
4215         /* Return error if time or size overflow */
4216         if (! (NFS_VAP_TIME_OK(vap) && NFS3_SIZE_OK(vap->va_size))) {
4217                 return (EOVERFLOW);
4218         }
4219         fap->type = vt_to_nf3[vap->va_type];
4220         fap->mode = (mode3)(vap->va_mode & MODEMASK);
4221         fap->nlink = (uint32)vap->va_nlink;
4222         if (vap->va_uid == UID_NOBODY)
4223                 fap->uid = (uid3)NFS_UID_NOBODY;
4224         else
4225                 fap->uid = (uid3)vap->va_uid;
4226         if (vap->va_gid == GID_NOBODY)
4227                 fap->gid = (gid3)NFS_GID_NOBODY;
4228         else
4229                 fap->gid = (gid3)vap->va_gid;
4230         fap->size = (size3)vap->va_size;
4231         fap->used = (size3)DEV_BSIZE * (size3)vap->va_nblocks;
4232         fap->rdev.specdata1 = (uint32)getmajor(vap->va_rdev);
4233         fap->rdev.specdata2 = (uint32)getminor(vap->va_rdev);
4234         fap->fsid = (uint64)vap->va_fsid;
4235         fap->fileid = (fileid3)vap->va_nodeid;
4236         fap->atime.seconds = vap->va_atime.tv_sec;
4237         fap->atime.nseconds = vap->va_atime.tv_nsec;
4238         fap->mtime.seconds = vap->va_mtime.tv_sec;
4239         fap->mtime.nseconds = vap->va_mtime.tv_nsec;
4240         fap->ctime.seconds = vap->va_ctime.tv_sec;
4241         fap->ctime.nseconds = vap->va_ctime.tv_nsec;
4242         return (0);
4243 }
4244 
4245 static int
4246 vattr_to_wcc_attr(struct vattr *vap, wcc_attr *wccap)
4247 {
4248 
4249         /* Return error if time or size overflow */
4250         if (!(NFS_TIME_T_OK(vap->va_mtime.tv_sec) &&
4251             NFS_TIME_T_OK(vap->va_ctime.tv_sec) &&
4252             NFS3_SIZE_OK(vap->va_size))) {
4253                 return (EOVERFLOW);
4254         }
4255         wccap->size = (size3)vap->va_size;
4256         wccap->mtime.seconds = vap->va_mtime.tv_sec;
4257         wccap->mtime.nseconds = vap->va_mtime.tv_nsec;
4258         wccap->ctime.seconds = vap->va_ctime.tv_sec;
4259         wccap->ctime.nseconds = vap->va_ctime.tv_nsec;
4260         return (0);
4261 }
4262 
4263 static void
4264 vattr_to_pre_op_attr(struct vattr *vap, pre_op_attr *poap)
4265 {
4266 
4267         /* don't return attrs if time overflow */
4268         if ((vap != NULL) && !vattr_to_wcc_attr(vap, &poap->attr)) {
4269                 poap->attributes = TRUE;
4270         } else
4271                 poap->attributes = FALSE;
4272 }
4273 
4274 void
4275 vattr_to_post_op_attr(struct vattr *vap, post_op_attr *poap)
4276 {
4277 
4278         /* don't return attrs if time overflow */
4279         if ((vap != NULL) && !vattr_to_fattr3(vap, &poap->attr)) {
4280                 poap->attributes = TRUE;
4281         } else
4282                 poap->attributes = FALSE;
4283 }
4284 
4285 static void
4286 vattr_to_wcc_data(struct vattr *bvap, struct vattr *avap, wcc_data *wccp)
4287 {
4288 
4289         vattr_to_pre_op_attr(bvap, &wccp->before);
4290         vattr_to_post_op_attr(avap, &wccp->after);
4291 }
4292 
4293 void
4294 rfs3_srvrinit(void)
4295 {
4296         struct rfs3_verf_overlay {
4297                 uint_t id; /* a "unique" identifier */
4298                 int ts; /* a unique timestamp */
4299         } *verfp;
4300         timestruc_t now;
4301 
4302         /*
4303          * The following algorithm attempts to find a unique verifier
4304          * to be used as the write verifier returned from the server
4305          * to the client.  It is important that this verifier change
4306          * whenever the server reboots.  Of secondary importance, it
4307          * is important for the verifier to be unique between two
4308          * different servers.
4309          *
4310          * Thus, an attempt is made to use the system hostid and the
4311          * current time in seconds when the nfssrv kernel module is
4312          * loaded.  It is assumed that an NFS server will not be able
4313          * to boot and then to reboot in less than a second.  If the
4314          * hostid has not been set, then the current high resolution
4315          * time is used.  This will ensure different verifiers each
4316          * time the server reboots and minimize the chances that two
4317          * different servers will have the same verifier.
4318          */
4319 
4320 #ifndef lint
4321         /*
4322          * We ASSERT that this constant logic expression is
4323          * always true because in the past, it wasn't.
4324          */
4325         ASSERT(sizeof (*verfp) <= sizeof (write3verf));
4326 #endif
4327 
4328         gethrestime(&now);
4329         verfp = (struct rfs3_verf_overlay *)&write3verf;
4330         verfp->ts = (int)now.tv_sec;
4331         verfp->id = zone_get_hostid(NULL);
4332 
4333         if (verfp->id == 0)
4334                 verfp->id = (uint_t)now.tv_nsec;
4335 
4336         nfs3_srv_caller_id = fs_new_caller_id();
4337 
4338 }
4339 
4340 static int
4341 rdma_setup_read_data3(READ3args *args, READ3resok *rok)
4342 {
4343         struct clist    *wcl;
4344         int             wlist_len;
4345         count3          count = rok->count;
4346 
4347         wcl = args->wlist;
4348         if (rdma_setup_read_chunks(wcl, count, &wlist_len) == FALSE) {
4349                 return (FALSE);
4350         }
4351 
4352         wcl = args->wlist;
4353         rok->wlist_len = wlist_len;
4354         rok->wlist = wcl;
4355         return (TRUE);
4356 }
4357 
4358 void
4359 rfs3_srvrfini(void)
4360 {
4361         /* Nothing to do */
4362 }