1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright 2018 Nexenta Systems, Inc.
  24  * Copyright (c) 1994, 2010, Oracle and/or its affiliates. All rights reserved.
  25  * Copyright (c) 2013 by Delphix. All rights reserved.
  26  */
  27 
  28 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
  29 /* All Rights Reserved */
  30 
  31 
  32 #include <sys/param.h>
  33 #include <sys/types.h>
  34 #include <sys/systm.h>
  35 #include <sys/cred.h>
  36 #include <sys/buf.h>
  37 #include <sys/vfs.h>
  38 #include <sys/vnode.h>
  39 #include <sys/uio.h>
  40 #include <sys/errno.h>
  41 #include <sys/sysmacros.h>
  42 #include <sys/statvfs.h>
  43 #include <sys/kmem.h>
  44 #include <sys/dirent.h>
  45 #include <sys/cmn_err.h>
  46 #include <sys/debug.h>
  47 #include <sys/systeminfo.h>
  48 #include <sys/flock.h>
  49 #include <sys/nbmlock.h>
  50 #include <sys/policy.h>
  51 #include <sys/sdt.h>
  52 
  53 #include <rpc/types.h>
  54 #include <rpc/auth.h>
  55 #include <rpc/svc.h>
  56 #include <rpc/rpc_rdma.h>
  57 
  58 #include <nfs/nfs.h>
  59 #include <nfs/export.h>
  60 #include <nfs/nfs_cmd.h>
  61 
  62 #include <sys/strsubr.h>
  63 #include <sys/tsol/label.h>
  64 #include <sys/tsol/tndb.h>
  65 
  66 #include <sys/zone.h>
  67 
  68 #include <inet/ip.h>
  69 #include <inet/ip6.h>
  70 
  71 /*
  72  * Zone global variables of NFSv3 server
  73  */
  74 typedef struct nfs3_srv {
  75         writeverf3      write3verf;
  76 } nfs3_srv_t;
  77 
  78 /*
  79  * These are the interface routines for the server side of the
  80  * Network File System.  See the NFS version 3 protocol specification
  81  * for a description of this interface.
  82  */
  83 
  84 static int      sattr3_to_vattr(sattr3 *, struct vattr *);
  85 static int      vattr_to_fattr3(struct vattr *, fattr3 *);
  86 static int      vattr_to_wcc_attr(struct vattr *, wcc_attr *);
  87 static void     vattr_to_pre_op_attr(struct vattr *, pre_op_attr *);
  88 static void     vattr_to_wcc_data(struct vattr *, struct vattr *, wcc_data *);
  89 static int      rdma_setup_read_data3(READ3args *, READ3resok *);
  90 
  91 extern int nfs_loaned_buffers;
  92 
  93 u_longlong_t nfs3_srv_caller_id;
  94 static zone_key_t rfs3_zone_key;
  95 
  96 /* ARGSUSED */
  97 void
  98 rfs3_getattr(GETATTR3args *args, GETATTR3res *resp, struct exportinfo *exi,
  99     struct svc_req *req, cred_t *cr, bool_t ro)
 100 {
 101         int error;
 102         vnode_t *vp;
 103         struct vattr va;
 104 
 105         vp = nfs3_fhtovp(&args->object, exi);
 106 
 107         DTRACE_NFSV3_5(op__getattr__start, struct svc_req *, req,
 108             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
 109             GETATTR3args *, args);
 110 
 111         if (vp == NULL) {
 112                 error = ESTALE;
 113                 goto out;
 114         }
 115 
 116         va.va_mask = AT_ALL;
 117         error = rfs4_delegated_getattr(vp, &va, 0, cr);
 118 
 119         if (!error) {
 120                 /* Lie about the object type for a referral */
 121                 if (vn_is_nfs_reparse(vp, cr))
 122                         va.va_type = VLNK;
 123 
 124                 /* overflow error if time or size is out of range */
 125                 error = vattr_to_fattr3(&va, &resp->resok.obj_attributes);
 126                 if (error)
 127                         goto out;
 128                 resp->status = NFS3_OK;
 129 
 130                 DTRACE_NFSV3_5(op__getattr__done, struct svc_req *, req,
 131                     cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
 132                     GETATTR3res *, resp);
 133 
 134                 VN_RELE(vp);
 135 
 136                 return;
 137         }
 138 
 139 out:
 140         if (curthread->t_flag & T_WOULDBLOCK) {
 141                 curthread->t_flag &= ~T_WOULDBLOCK;
 142                 resp->status = NFS3ERR_JUKEBOX;
 143         } else
 144                 resp->status = puterrno3(error);
 145 
 146         DTRACE_NFSV3_5(op__getattr__done, struct svc_req *, req,
 147             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
 148             GETATTR3res *, resp);
 149 
 150         if (vp != NULL)
 151                 VN_RELE(vp);
 152 }
 153 
 154 void *
 155 rfs3_getattr_getfh(GETATTR3args *args)
 156 {
 157         return (&args->object);
 158 }
 159 
 160 void
 161 rfs3_setattr(SETATTR3args *args, SETATTR3res *resp, struct exportinfo *exi,
 162     struct svc_req *req, cred_t *cr, bool_t ro)
 163 {
 164         int error;
 165         vnode_t *vp;
 166         struct vattr *bvap;
 167         struct vattr bva;
 168         struct vattr *avap;
 169         struct vattr ava;
 170         int flag;
 171         int in_crit = 0;
 172         struct flock64 bf;
 173         caller_context_t ct;
 174 
 175         bvap = NULL;
 176         avap = NULL;
 177 
 178         vp = nfs3_fhtovp(&args->object, exi);
 179 
 180         DTRACE_NFSV3_5(op__setattr__start, struct svc_req *, req,
 181             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
 182             SETATTR3args *, args);
 183 
 184         if (vp == NULL) {
 185                 error = ESTALE;
 186                 goto out;
 187         }
 188 
 189         error = sattr3_to_vattr(&args->new_attributes, &ava);
 190         if (error)
 191                 goto out;
 192 
 193         if (is_system_labeled()) {
 194                 bslabel_t *clabel = req->rq_label;
 195 
 196                 ASSERT(clabel != NULL);
 197                 DTRACE_PROBE2(tx__rfs3__log__info__opsetattr__clabel, char *,
 198                     "got client label from request(1)", struct svc_req *, req);
 199 
 200                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
 201                         if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
 202                             exi)) {
 203                                 resp->status = NFS3ERR_ACCES;
 204                                 goto out1;
 205                         }
 206                 }
 207         }
 208 
 209         /*
 210          * We need to specially handle size changes because of
 211          * possible conflicting NBMAND locks. Get into critical
 212          * region before VOP_GETATTR, so the size attribute is
 213          * valid when checking conflicts.
 214          *
 215          * Also, check to see if the v4 side of the server has
 216          * delegated this file.  If so, then we return JUKEBOX to
 217          * allow the client to retrasmit its request.
 218          */
 219         if (vp->v_type == VREG && (ava.va_mask & AT_SIZE)) {
 220                 if (nbl_need_check(vp)) {
 221                         nbl_start_crit(vp, RW_READER);
 222                         in_crit = 1;
 223                 }
 224         }
 225 
 226         bva.va_mask = AT_ALL;
 227         error = rfs4_delegated_getattr(vp, &bva, 0, cr);
 228 
 229         /*
 230          * If we can't get the attributes, then we can't do the
 231          * right access checking.  So, we'll fail the request.
 232          */
 233         if (error)
 234                 goto out;
 235 
 236         bvap = &bva;
 237 
 238         if (rdonly(ro, vp)) {
 239                 resp->status = NFS3ERR_ROFS;
 240                 goto out1;
 241         }
 242 
 243         if (args->guard.check &&
 244             (args->guard.obj_ctime.seconds != bva.va_ctime.tv_sec ||
 245             args->guard.obj_ctime.nseconds != bva.va_ctime.tv_nsec)) {
 246                 resp->status = NFS3ERR_NOT_SYNC;
 247                 goto out1;
 248         }
 249 
 250         if (args->new_attributes.mtime.set_it == SET_TO_CLIENT_TIME)
 251                 flag = ATTR_UTIME;
 252         else
 253                 flag = 0;
 254 
 255         /*
 256          * If the filesystem is exported with nosuid, then mask off
 257          * the setuid and setgid bits.
 258          */
 259         if ((ava.va_mask & AT_MODE) && vp->v_type == VREG &&
 260             (exi->exi_export.ex_flags & EX_NOSUID))
 261                 ava.va_mode &= ~(VSUID | VSGID);
 262 
 263         ct.cc_sysid = 0;
 264         ct.cc_pid = 0;
 265         ct.cc_caller_id = nfs3_srv_caller_id;
 266         ct.cc_flags = CC_DONTBLOCK;
 267 
 268         /*
 269          * We need to specially handle size changes because it is
 270          * possible for the client to create a file with modes
 271          * which indicate read-only, but with the file opened for
 272          * writing.  If the client then tries to set the size of
 273          * the file, then the normal access checking done in
 274          * VOP_SETATTR would prevent the client from doing so,
 275          * although it should be legal for it to do so.  To get
 276          * around this, we do the access checking for ourselves
 277          * and then use VOP_SPACE which doesn't do the access
 278          * checking which VOP_SETATTR does. VOP_SPACE can only
 279          * operate on VREG files, let VOP_SETATTR handle the other
 280          * extremely rare cases.
 281          * Also the client should not be allowed to change the
 282          * size of the file if there is a conflicting non-blocking
 283          * mandatory lock in the region the change.
 284          */
 285         if (vp->v_type == VREG && (ava.va_mask & AT_SIZE)) {
 286                 if (in_crit) {
 287                         u_offset_t offset;
 288                         ssize_t length;
 289 
 290                         if (ava.va_size < bva.va_size) {
 291                                 offset = ava.va_size;
 292                                 length = bva.va_size - ava.va_size;
 293                         } else {
 294                                 offset = bva.va_size;
 295                                 length = ava.va_size - bva.va_size;
 296                         }
 297                         if (nbl_conflict(vp, NBL_WRITE, offset, length, 0,
 298                             NULL)) {
 299                                 error = EACCES;
 300                                 goto out;
 301                         }
 302                 }
 303 
 304                 if (crgetuid(cr) == bva.va_uid && ava.va_size != bva.va_size) {
 305                         ava.va_mask &= ~AT_SIZE;
 306                         bf.l_type = F_WRLCK;
 307                         bf.l_whence = 0;
 308                         bf.l_start = (off64_t)ava.va_size;
 309                         bf.l_len = 0;
 310                         bf.l_sysid = 0;
 311                         bf.l_pid = 0;
 312                         error = VOP_SPACE(vp, F_FREESP, &bf, FWRITE,
 313                             (offset_t)ava.va_size, cr, &ct);
 314                 }
 315         }
 316 
 317         if (!error && ava.va_mask)
 318                 error = VOP_SETATTR(vp, &ava, flag, cr, &ct);
 319 
 320         /* check if a monitor detected a delegation conflict */
 321         if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
 322                 resp->status = NFS3ERR_JUKEBOX;
 323                 goto out1;
 324         }
 325 
 326         ava.va_mask = AT_ALL;
 327         avap = rfs4_delegated_getattr(vp, &ava, 0, cr) ? NULL : &ava;
 328 
 329         /*
 330          * Force modified metadata out to stable storage.
 331          */
 332         (void) VOP_FSYNC(vp, FNODSYNC, cr, &ct);
 333 
 334         if (error)
 335                 goto out;
 336 
 337         if (in_crit)
 338                 nbl_end_crit(vp);
 339 
 340         resp->status = NFS3_OK;
 341         vattr_to_wcc_data(bvap, avap, &resp->resok.obj_wcc);
 342 
 343         DTRACE_NFSV3_5(op__setattr__done, struct svc_req *, req,
 344             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
 345             SETATTR3res *, resp);
 346 
 347         VN_RELE(vp);
 348 
 349         return;
 350 
 351 out:
 352         if (curthread->t_flag & T_WOULDBLOCK) {
 353                 curthread->t_flag &= ~T_WOULDBLOCK;
 354                 resp->status = NFS3ERR_JUKEBOX;
 355         } else
 356                 resp->status = puterrno3(error);
 357 out1:
 358         DTRACE_NFSV3_5(op__setattr__done, struct svc_req *, req,
 359             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
 360             SETATTR3res *, resp);
 361 
 362         if (vp != NULL) {
 363                 if (in_crit)
 364                         nbl_end_crit(vp);
 365                 VN_RELE(vp);
 366         }
 367         vattr_to_wcc_data(bvap, avap, &resp->resfail.obj_wcc);
 368 }
 369 
 370 void *
 371 rfs3_setattr_getfh(SETATTR3args *args)
 372 {
 373         return (&args->object);
 374 }
 375 
 376 /* ARGSUSED */
 377 void
 378 rfs3_lookup(LOOKUP3args *args, LOOKUP3res *resp, struct exportinfo *exi,
 379     struct svc_req *req, cred_t *cr, bool_t ro)
 380 {
 381         int error;
 382         vnode_t *vp;
 383         vnode_t *dvp;
 384         struct vattr *vap;
 385         struct vattr va;
 386         struct vattr *dvap;
 387         struct vattr dva;
 388         nfs_fh3 *fhp;
 389         struct sec_ol sec = {0, 0};
 390         bool_t publicfh_flag = FALSE, auth_weak = FALSE;
 391         struct sockaddr *ca;
 392         char *name = NULL;
 393 
 394         dvap = NULL;
 395 
 396         if (exi != NULL)
 397                 exi_hold(exi);
 398 
 399         /*
 400          * Allow lookups from the root - the default
 401          * location of the public filehandle.
 402          */
 403         if (exi != NULL && (exi->exi_export.ex_flags & EX_PUBLIC)) {
 404                 dvp = ZONE_ROOTVP();
 405                 VN_HOLD(dvp);
 406 
 407                 DTRACE_NFSV3_5(op__lookup__start, struct svc_req *, req,
 408                     cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
 409                     LOOKUP3args *, args);
 410         } else {
 411                 dvp = nfs3_fhtovp(&args->what.dir, exi);
 412 
 413                 DTRACE_NFSV3_5(op__lookup__start, struct svc_req *, req,
 414                     cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
 415                     LOOKUP3args *, args);
 416 
 417                 if (dvp == NULL) {
 418                         error = ESTALE;
 419                         goto out;
 420                 }
 421         }
 422 
 423         dva.va_mask = AT_ALL;
 424         dvap = VOP_GETATTR(dvp, &dva, 0, cr, NULL) ? NULL : &dva;
 425 
 426         if (args->what.name == nfs3nametoolong) {
 427                 resp->status = NFS3ERR_NAMETOOLONG;
 428                 goto out1;
 429         }
 430 
 431         if (args->what.name == NULL || *(args->what.name) == '\0') {
 432                 resp->status = NFS3ERR_ACCES;
 433                 goto out1;
 434         }
 435 
 436         fhp = &args->what.dir;
 437         if (strcmp(args->what.name, "..") == 0 &&
 438             EQFID(&exi->exi_fid, FH3TOFIDP(fhp))) {
 439                 if ((exi->exi_export.ex_flags & EX_NOHIDE) &&
 440                     (dvp->v_flag & VROOT)) {
 441                         /*
 442                          * special case for ".." and 'nohide'exported root
 443                          */
 444                         if (rfs_climb_crossmnt(&dvp, &exi, cr) != 0) {
 445                                 resp->status = NFS3ERR_ACCES;
 446                                 goto out1;
 447                         }
 448                 } else {
 449                         resp->status = NFS3ERR_NOENT;
 450                         goto out1;
 451                 }
 452         }
 453 
 454         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
 455         name = nfscmd_convname(ca, exi, args->what.name,
 456             NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
 457 
 458         if (name == NULL) {
 459                 resp->status = NFS3ERR_ACCES;
 460                 goto out1;
 461         }
 462 
 463         /*
 464          * If the public filehandle is used then allow
 465          * a multi-component lookup
 466          */
 467         if (PUBLIC_FH3(&args->what.dir)) {
 468                 publicfh_flag = TRUE;
 469 
 470                 exi_rele(&exi);
 471 
 472                 error = rfs_publicfh_mclookup(name, dvp, cr, &vp,
 473                     &exi, &sec);
 474 
 475                 /*
 476                  * Since WebNFS may bypass MOUNT, we need to ensure this
 477                  * request didn't come from an unlabeled admin_low client.
 478                  */
 479                 if (is_system_labeled() && error == 0) {
 480                         int             addr_type;
 481                         void            *ipaddr;
 482                         tsol_tpc_t      *tp;
 483 
 484                         if (ca->sa_family == AF_INET) {
 485                                 addr_type = IPV4_VERSION;
 486                                 ipaddr = &((struct sockaddr_in *)ca)->sin_addr;
 487                         } else if (ca->sa_family == AF_INET6) {
 488                                 addr_type = IPV6_VERSION;
 489                                 ipaddr = &((struct sockaddr_in6 *)
 490                                     ca)->sin6_addr;
 491                         }
 492                         tp = find_tpc(ipaddr, addr_type, B_FALSE);
 493                         if (tp == NULL || tp->tpc_tp.tp_doi !=
 494                             l_admin_low->tsl_doi || tp->tpc_tp.host_type !=
 495                             SUN_CIPSO) {
 496                                 VN_RELE(vp);
 497                                 error = EACCES;
 498                         }
 499                         if (tp != NULL)
 500                                 TPC_RELE(tp);
 501                 }
 502         } else {
 503                 error = VOP_LOOKUP(dvp, name, &vp,
 504                     NULL, 0, NULL, cr, NULL, NULL, NULL);
 505         }
 506 
 507         if (name != args->what.name)
 508                 kmem_free(name, MAXPATHLEN + 1);
 509 
 510         if (error == 0 && vn_ismntpt(vp)) {
 511                 error = rfs_cross_mnt(&vp, &exi);
 512                 if (error)
 513                         VN_RELE(vp);
 514         }
 515 
 516         if (is_system_labeled() && error == 0) {
 517                 bslabel_t *clabel = req->rq_label;
 518 
 519                 ASSERT(clabel != NULL);
 520                 DTRACE_PROBE2(tx__rfs3__log__info__oplookup__clabel, char *,
 521                     "got client label from request(1)", struct svc_req *, req);
 522 
 523                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
 524                         if (!do_rfs_label_check(clabel, dvp,
 525                             DOMINANCE_CHECK, exi)) {
 526                                 VN_RELE(vp);
 527                                 error = EACCES;
 528                         }
 529                 }
 530         }
 531 
 532         dva.va_mask = AT_ALL;
 533         dvap = VOP_GETATTR(dvp, &dva, 0, cr, NULL) ? NULL : &dva;
 534 
 535         if (error)
 536                 goto out;
 537 
 538         if (sec.sec_flags & SEC_QUERY) {
 539                 error = makefh3_ol(&resp->resok.object, exi, sec.sec_index);
 540         } else {
 541                 error = makefh3(&resp->resok.object, vp, exi);
 542                 if (!error && publicfh_flag && !chk_clnt_sec(exi, req))
 543                         auth_weak = TRUE;
 544         }
 545 
 546         if (error) {
 547                 VN_RELE(vp);
 548                 goto out;
 549         }
 550 
 551         va.va_mask = AT_ALL;
 552         vap = rfs4_delegated_getattr(vp, &va, 0, cr) ? NULL : &va;
 553 
 554         exi_rele(&exi);
 555         VN_RELE(vp);
 556 
 557         resp->status = NFS3_OK;
 558         vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
 559         vattr_to_post_op_attr(dvap, &resp->resok.dir_attributes);
 560 
 561         /*
 562          * If it's public fh, no 0x81, and client's flavor is
 563          * invalid, set WebNFS status to WNFSERR_CLNT_FLAVOR now.
 564          * Then set RPC status to AUTH_TOOWEAK in common_dispatch.
 565          */
 566         if (auth_weak)
 567                 resp->status = (enum nfsstat3)WNFSERR_CLNT_FLAVOR;
 568 
 569         DTRACE_NFSV3_5(op__lookup__done, struct svc_req *, req,
 570             cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
 571             LOOKUP3res *, resp);
 572         VN_RELE(dvp);
 573 
 574         return;
 575 
 576 out:
 577         if (curthread->t_flag & T_WOULDBLOCK) {
 578                 curthread->t_flag &= ~T_WOULDBLOCK;
 579                 resp->status = NFS3ERR_JUKEBOX;
 580         } else
 581                 resp->status = puterrno3(error);
 582 out1:
 583         if (exi != NULL)
 584                 exi_rele(&exi);
 585 
 586         DTRACE_NFSV3_5(op__lookup__done, struct svc_req *, req,
 587             cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
 588             LOOKUP3res *, resp);
 589 
 590         if (dvp != NULL)
 591                 VN_RELE(dvp);
 592         vattr_to_post_op_attr(dvap, &resp->resfail.dir_attributes);
 593 
 594 }
 595 
 596 void *
 597 rfs3_lookup_getfh(LOOKUP3args *args)
 598 {
 599         return (&args->what.dir);
 600 }
 601 
 602 void
 603 rfs3_access(ACCESS3args *args, ACCESS3res *resp, struct exportinfo *exi,
 604     struct svc_req *req, cred_t *cr, bool_t ro)
 605 {
 606         int error;
 607         vnode_t *vp;
 608         struct vattr *vap;
 609         struct vattr va;
 610         int checkwriteperm;
 611         boolean_t dominant_label = B_FALSE;
 612         boolean_t equal_label = B_FALSE;
 613         boolean_t admin_low_client;
 614 
 615         vap = NULL;
 616 
 617         vp = nfs3_fhtovp(&args->object, exi);
 618 
 619         DTRACE_NFSV3_5(op__access__start, struct svc_req *, req,
 620             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
 621             ACCESS3args *, args);
 622 
 623         if (vp == NULL) {
 624                 error = ESTALE;
 625                 goto out;
 626         }
 627 
 628         /*
 629          * If the file system is exported read only, it is not appropriate
 630          * to check write permissions for regular files and directories.
 631          * Special files are interpreted by the client, so the underlying
 632          * permissions are sent back to the client for interpretation.
 633          */
 634         if (rdonly(ro, vp) && (vp->v_type == VREG || vp->v_type == VDIR))
 635                 checkwriteperm = 0;
 636         else
 637                 checkwriteperm = 1;
 638 
 639         /*
 640          * We need the mode so that we can correctly determine access
 641          * permissions relative to a mandatory lock file.  Access to
 642          * mandatory lock files is denied on the server, so it might
 643          * as well be reflected to the server during the open.
 644          */
 645         va.va_mask = AT_MODE;
 646         error = VOP_GETATTR(vp, &va, 0, cr, NULL);
 647         if (error)
 648                 goto out;
 649 
 650         vap = &va;
 651 
 652         resp->resok.access = 0;
 653 
 654         if (is_system_labeled()) {
 655                 bslabel_t *clabel = req->rq_label;
 656 
 657                 ASSERT(clabel != NULL);
 658                 DTRACE_PROBE2(tx__rfs3__log__info__opaccess__clabel, char *,
 659                     "got client label from request(1)", struct svc_req *, req);
 660 
 661                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
 662                         if ((equal_label = do_rfs_label_check(clabel, vp,
 663                             EQUALITY_CHECK, exi)) == B_FALSE) {
 664                                 dominant_label = do_rfs_label_check(clabel,
 665                                     vp, DOMINANCE_CHECK, exi);
 666                         } else
 667                                 dominant_label = B_TRUE;
 668                         admin_low_client = B_FALSE;
 669                 } else
 670                         admin_low_client = B_TRUE;
 671         }
 672 
 673         if (args->access & ACCESS3_READ) {
 674                 error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
 675                 if (error) {
 676                         if (curthread->t_flag & T_WOULDBLOCK)
 677                                 goto out;
 678                 } else if (!MANDLOCK(vp, va.va_mode) &&
 679                     (!is_system_labeled() || admin_low_client ||
 680                     dominant_label))
 681                         resp->resok.access |= ACCESS3_READ;
 682         }
 683         if ((args->access & ACCESS3_LOOKUP) && vp->v_type == VDIR) {
 684                 error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL);
 685                 if (error) {
 686                         if (curthread->t_flag & T_WOULDBLOCK)
 687                                 goto out;
 688                 } else if (!is_system_labeled() || admin_low_client ||
 689                     dominant_label)
 690                         resp->resok.access |= ACCESS3_LOOKUP;
 691         }
 692         if (checkwriteperm &&
 693             (args->access & (ACCESS3_MODIFY|ACCESS3_EXTEND))) {
 694                 error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
 695                 if (error) {
 696                         if (curthread->t_flag & T_WOULDBLOCK)
 697                                 goto out;
 698                 } else if (!MANDLOCK(vp, va.va_mode) &&
 699                     (!is_system_labeled() || admin_low_client || equal_label)) {
 700                         resp->resok.access |=
 701                             (args->access & (ACCESS3_MODIFY|ACCESS3_EXTEND));
 702                 }
 703         }
 704         if (checkwriteperm &&
 705             (args->access & ACCESS3_DELETE) && vp->v_type == VDIR) {
 706                 error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
 707                 if (error) {
 708                         if (curthread->t_flag & T_WOULDBLOCK)
 709                                 goto out;
 710                 } else if (!is_system_labeled() || admin_low_client ||
 711                     equal_label)
 712                         resp->resok.access |= ACCESS3_DELETE;
 713         }
 714         if (args->access & ACCESS3_EXECUTE) {
 715                 error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL);
 716                 if (error) {
 717                         if (curthread->t_flag & T_WOULDBLOCK)
 718                                 goto out;
 719                 } else if (!MANDLOCK(vp, va.va_mode) &&
 720                     (!is_system_labeled() || admin_low_client ||
 721                     dominant_label))
 722                         resp->resok.access |= ACCESS3_EXECUTE;
 723         }
 724 
 725         va.va_mask = AT_ALL;
 726         vap = rfs4_delegated_getattr(vp, &va, 0, cr) ? NULL : &va;
 727 
 728         resp->status = NFS3_OK;
 729         vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
 730 
 731         DTRACE_NFSV3_5(op__access__done, struct svc_req *, req,
 732             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
 733             ACCESS3res *, resp);
 734 
 735         VN_RELE(vp);
 736 
 737         return;
 738 
 739 out:
 740         if (curthread->t_flag & T_WOULDBLOCK) {
 741                 curthread->t_flag &= ~T_WOULDBLOCK;
 742                 resp->status = NFS3ERR_JUKEBOX;
 743         } else
 744                 resp->status = puterrno3(error);
 745         DTRACE_NFSV3_5(op__access__done, struct svc_req *, req,
 746             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
 747             ACCESS3res *, resp);
 748         if (vp != NULL)
 749                 VN_RELE(vp);
 750         vattr_to_post_op_attr(vap, &resp->resfail.obj_attributes);
 751 }
 752 
 753 void *
 754 rfs3_access_getfh(ACCESS3args *args)
 755 {
 756         return (&args->object);
 757 }
 758 
 759 /* ARGSUSED */
 760 void
 761 rfs3_readlink(READLINK3args *args, READLINK3res *resp, struct exportinfo *exi,
 762     struct svc_req *req, cred_t *cr, bool_t ro)
 763 {
 764         int error;
 765         vnode_t *vp;
 766         struct vattr *vap;
 767         struct vattr va;
 768         struct iovec iov;
 769         struct uio uio;
 770         char *data;
 771         struct sockaddr *ca;
 772         char *name = NULL;
 773         int is_referral = 0;
 774 
 775         vap = NULL;
 776 
 777         vp = nfs3_fhtovp(&args->symlink, exi);
 778 
 779         DTRACE_NFSV3_5(op__readlink__start, struct svc_req *, req,
 780             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
 781             READLINK3args *, args);
 782 
 783         if (vp == NULL) {
 784                 error = ESTALE;
 785                 goto out;
 786         }
 787 
 788         va.va_mask = AT_ALL;
 789         error = VOP_GETATTR(vp, &va, 0, cr, NULL);
 790         if (error)
 791                 goto out;
 792 
 793         vap = &va;
 794 
 795         /* We lied about the object type for a referral */
 796         if (vn_is_nfs_reparse(vp, cr))
 797                 is_referral = 1;
 798 
 799         if (vp->v_type != VLNK && !is_referral) {
 800                 resp->status = NFS3ERR_INVAL;
 801                 goto out1;
 802         }
 803 
 804         if (MANDLOCK(vp, va.va_mode)) {
 805                 resp->status = NFS3ERR_ACCES;
 806                 goto out1;
 807         }
 808 
 809         if (is_system_labeled()) {
 810                 bslabel_t *clabel = req->rq_label;
 811 
 812                 ASSERT(clabel != NULL);
 813                 DTRACE_PROBE2(tx__rfs3__log__info__opreadlink__clabel, char *,
 814                     "got client label from request(1)", struct svc_req *, req);
 815 
 816                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
 817                         if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
 818                             exi)) {
 819                                 resp->status = NFS3ERR_ACCES;
 820                                 goto out1;
 821                         }
 822                 }
 823         }
 824 
 825         data = kmem_alloc(MAXPATHLEN + 1, KM_SLEEP);
 826 
 827         if (is_referral) {
 828                 char *s;
 829                 size_t strsz;
 830 
 831                 /* Get an artificial symlink based on a referral */
 832                 s = build_symlink(vp, cr, &strsz);
 833                 global_svstat_ptr[3][NFS_REFERLINKS].value.ui64++;
 834                 DTRACE_PROBE2(nfs3serv__func__referral__reflink,
 835                     vnode_t *, vp, char *, s);
 836                 if (s == NULL)
 837                         error = EINVAL;
 838                 else {
 839                         error = 0;
 840                         (void) strlcpy(data, s, MAXPATHLEN + 1);
 841                         kmem_free(s, strsz);
 842                 }
 843 
 844         } else {
 845 
 846                 iov.iov_base = data;
 847                 iov.iov_len = MAXPATHLEN;
 848                 uio.uio_iov = &iov;
 849                 uio.uio_iovcnt = 1;
 850                 uio.uio_segflg = UIO_SYSSPACE;
 851                 uio.uio_extflg = UIO_COPY_CACHED;
 852                 uio.uio_loffset = 0;
 853                 uio.uio_resid = MAXPATHLEN;
 854 
 855                 error = VOP_READLINK(vp, &uio, cr, NULL);
 856 
 857                 if (!error)
 858                         *(data + MAXPATHLEN - uio.uio_resid) = '\0';
 859         }
 860 
 861         va.va_mask = AT_ALL;
 862         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
 863 
 864         /* Lie about object type again just to be consistent */
 865         if (is_referral && vap != NULL)
 866                 vap->va_type = VLNK;
 867 
 868 #if 0 /* notyet */
 869         /*
 870          * Don't do this.  It causes local disk writes when just
 871          * reading the file and the overhead is deemed larger
 872          * than the benefit.
 873          */
 874         /*
 875          * Force modified metadata out to stable storage.
 876          */
 877         (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
 878 #endif
 879 
 880         if (error) {
 881                 kmem_free(data, MAXPATHLEN + 1);
 882                 goto out;
 883         }
 884 
 885         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
 886         name = nfscmd_convname(ca, exi, data, NFSCMD_CONV_OUTBOUND,
 887             MAXPATHLEN + 1);
 888 
 889         if (name == NULL) {
 890                 /*
 891                  * Even though the conversion failed, we return
 892                  * something. We just don't translate it.
 893                  */
 894                 name = data;
 895         }
 896 
 897         resp->status = NFS3_OK;
 898         vattr_to_post_op_attr(vap, &resp->resok.symlink_attributes);
 899         resp->resok.data = name;
 900 
 901         DTRACE_NFSV3_5(op__readlink__done, struct svc_req *, req,
 902             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
 903             READLINK3res *, resp);
 904         VN_RELE(vp);
 905 
 906         if (name != data)
 907                 kmem_free(data, MAXPATHLEN + 1);
 908 
 909         return;
 910 
 911 out:
 912         if (curthread->t_flag & T_WOULDBLOCK) {
 913                 curthread->t_flag &= ~T_WOULDBLOCK;
 914                 resp->status = NFS3ERR_JUKEBOX;
 915         } else
 916                 resp->status = puterrno3(error);
 917 out1:
 918         DTRACE_NFSV3_5(op__readlink__done, struct svc_req *, req,
 919             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
 920             READLINK3res *, resp);
 921         if (vp != NULL)
 922                 VN_RELE(vp);
 923         vattr_to_post_op_attr(vap, &resp->resfail.symlink_attributes);
 924 }
 925 
 926 void *
 927 rfs3_readlink_getfh(READLINK3args *args)
 928 {
 929         return (&args->symlink);
 930 }
 931 
 932 void
 933 rfs3_readlink_free(READLINK3res *resp)
 934 {
 935         if (resp->status == NFS3_OK)
 936                 kmem_free(resp->resok.data, MAXPATHLEN + 1);
 937 }
 938 
 939 /*
 940  * Server routine to handle read
 941  * May handle RDMA data as well as mblks
 942  */
 943 /* ARGSUSED */
 944 void
 945 rfs3_read(READ3args *args, READ3res *resp, struct exportinfo *exi,
 946     struct svc_req *req, cred_t *cr, bool_t ro)
 947 {
 948         int error;
 949         vnode_t *vp;
 950         struct vattr *vap;
 951         struct vattr va;
 952         struct iovec iov, *iovp = NULL;
 953         int iovcnt;
 954         struct uio uio;
 955         u_offset_t offset;
 956         mblk_t *mp = NULL;
 957         int in_crit = 0;
 958         int need_rwunlock = 0;
 959         caller_context_t ct;
 960         int rdma_used = 0;
 961         int loaned_buffers;
 962         struct uio *uiop;
 963 
 964         vap = NULL;
 965 
 966         vp = nfs3_fhtovp(&args->file, exi);
 967 
 968         DTRACE_NFSV3_5(op__read__start, struct svc_req *, req,
 969             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
 970             READ3args *, args);
 971 
 972 
 973         if (vp == NULL) {
 974                 error = ESTALE;
 975                 goto out;
 976         }
 977 
 978         if (args->wlist) {
 979                 if (args->count > clist_len(args->wlist)) {
 980                         error = EINVAL;
 981                         goto out;
 982                 }
 983                 rdma_used = 1;
 984         }
 985 
 986         /* use loaned buffers for TCP */
 987         loaned_buffers = (nfs_loaned_buffers && !rdma_used) ? 1 : 0;
 988 
 989         if (is_system_labeled()) {
 990                 bslabel_t *clabel = req->rq_label;
 991 
 992                 ASSERT(clabel != NULL);
 993                 DTRACE_PROBE2(tx__rfs3__log__info__opread__clabel, char *,
 994                     "got client label from request(1)", struct svc_req *, req);
 995 
 996                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
 997                         if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
 998                             exi)) {
 999                                 resp->status = NFS3ERR_ACCES;
1000                                 goto out1;
1001                         }
1002                 }
1003         }
1004 
1005         ct.cc_sysid = 0;
1006         ct.cc_pid = 0;
1007         ct.cc_caller_id = nfs3_srv_caller_id;
1008         ct.cc_flags = CC_DONTBLOCK;
1009 
1010         /*
1011          * Enter the critical region before calling VOP_RWLOCK
1012          * to avoid a deadlock with write requests.
1013          */
1014         if (nbl_need_check(vp)) {
1015                 nbl_start_crit(vp, RW_READER);
1016                 in_crit = 1;
1017                 if (nbl_conflict(vp, NBL_READ, args->offset, args->count, 0,
1018                     NULL)) {
1019                         error = EACCES;
1020                         goto out;
1021                 }
1022         }
1023 
1024         error = VOP_RWLOCK(vp, V_WRITELOCK_FALSE, &ct);
1025 
1026         /* check if a monitor detected a delegation conflict */
1027         if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1028                 resp->status = NFS3ERR_JUKEBOX;
1029                 goto out1;
1030         }
1031 
1032         need_rwunlock = 1;
1033 
1034         va.va_mask = AT_ALL;
1035         error = VOP_GETATTR(vp, &va, 0, cr, &ct);
1036 
1037         /*
1038          * If we can't get the attributes, then we can't do the
1039          * right access checking.  So, we'll fail the request.
1040          */
1041         if (error)
1042                 goto out;
1043 
1044         vap = &va;
1045 
1046         if (vp->v_type != VREG) {
1047                 resp->status = NFS3ERR_INVAL;
1048                 goto out1;
1049         }
1050 
1051         if (crgetuid(cr) != va.va_uid) {
1052                 error = VOP_ACCESS(vp, VREAD, 0, cr, &ct);
1053                 if (error) {
1054                         if (curthread->t_flag & T_WOULDBLOCK)
1055                                 goto out;
1056                         error = VOP_ACCESS(vp, VEXEC, 0, cr, &ct);
1057                         if (error)
1058                                 goto out;
1059                 }
1060         }
1061 
1062         if (MANDLOCK(vp, va.va_mode)) {
1063                 resp->status = NFS3ERR_ACCES;
1064                 goto out1;
1065         }
1066 
1067         offset = args->offset;
1068         if (offset >= va.va_size) {
1069                 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
1070                 if (in_crit)
1071                         nbl_end_crit(vp);
1072                 resp->status = NFS3_OK;
1073                 vattr_to_post_op_attr(vap, &resp->resok.file_attributes);
1074                 resp->resok.count = 0;
1075                 resp->resok.eof = TRUE;
1076                 resp->resok.data.data_len = 0;
1077                 resp->resok.data.data_val = NULL;
1078                 resp->resok.data.mp = NULL;
1079                 /* RDMA */
1080                 resp->resok.wlist = args->wlist;
1081                 resp->resok.wlist_len = resp->resok.count;
1082                 if (resp->resok.wlist)
1083                         clist_zero_len(resp->resok.wlist);
1084                 goto done;
1085         }
1086 
1087         if (args->count == 0) {
1088                 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
1089                 if (in_crit)
1090                         nbl_end_crit(vp);
1091                 resp->status = NFS3_OK;
1092                 vattr_to_post_op_attr(vap, &resp->resok.file_attributes);
1093                 resp->resok.count = 0;
1094                 resp->resok.eof = FALSE;
1095                 resp->resok.data.data_len = 0;
1096                 resp->resok.data.data_val = NULL;
1097                 resp->resok.data.mp = NULL;
1098                 /* RDMA */
1099                 resp->resok.wlist = args->wlist;
1100                 resp->resok.wlist_len = resp->resok.count;
1101                 if (resp->resok.wlist)
1102                         clist_zero_len(resp->resok.wlist);
1103                 goto done;
1104         }
1105 
1106         /*
1107          * do not allocate memory more the max. allowed
1108          * transfer size
1109          */
1110         if (args->count > rfs3_tsize(req))
1111                 args->count = rfs3_tsize(req);
1112 
1113         if (loaned_buffers) {
1114                 uiop = (uio_t *)rfs_setup_xuio(vp);
1115                 ASSERT(uiop != NULL);
1116                 uiop->uio_segflg = UIO_SYSSPACE;
1117                 uiop->uio_loffset = args->offset;
1118                 uiop->uio_resid = args->count;
1119 
1120                 /* Jump to do the read if successful */
1121                 if (VOP_REQZCBUF(vp, UIO_READ, (xuio_t *)uiop, cr, &ct) == 0) {
1122                         /*
1123                          * Need to hold the vnode until after VOP_RETZCBUF()
1124                          * is called.
1125                          */
1126                         VN_HOLD(vp);
1127                         goto doio_read;
1128                 }
1129 
1130                 DTRACE_PROBE2(nfss__i__reqzcbuf_failed, int,
1131                     uiop->uio_loffset, int, uiop->uio_resid);
1132 
1133                 uiop->uio_extflg = 0;
1134                 /* failure to setup for zero copy */
1135                 rfs_free_xuio((void *)uiop);
1136                 loaned_buffers = 0;
1137         }
1138 
1139         /*
1140          * If returning data via RDMA Write, then grab the chunk list.
1141          * If we aren't returning READ data w/RDMA_WRITE, then grab
1142          * a mblk.
1143          */
1144         if (rdma_used) {
1145                 (void) rdma_get_wchunk(req, &iov, args->wlist);
1146                 uio.uio_iov = &iov;
1147                 uio.uio_iovcnt = 1;
1148         } else {
1149                 /*
1150                  * mp will contain the data to be sent out in the read reply.
1151                  * For UDP, this will be freed after the reply has been sent
1152                  * out by the driver.  For TCP, it will be freed after the last
1153                  * segment associated with the reply has been ACKed by the
1154                  * client.
1155                  */
1156                 mp = rfs_read_alloc(args->count, &iovp, &iovcnt);
1157                 uio.uio_iov = iovp;
1158                 uio.uio_iovcnt = iovcnt;
1159         }
1160 
1161         uio.uio_segflg = UIO_SYSSPACE;
1162         uio.uio_extflg = UIO_COPY_CACHED;
1163         uio.uio_loffset = args->offset;
1164         uio.uio_resid = args->count;
1165         uiop = &uio;
1166 
1167 doio_read:
1168         error = VOP_READ(vp, uiop, 0, cr, &ct);
1169 
1170         if (error) {
1171                 if (mp)
1172                         freemsg(mp);
1173                 /* check if a monitor detected a delegation conflict */
1174                 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1175                         resp->status = NFS3ERR_JUKEBOX;
1176                         goto out1;
1177                 }
1178                 goto out;
1179         }
1180 
1181         /* make mblk using zc buffers */
1182         if (loaned_buffers) {
1183                 mp = uio_to_mblk(uiop);
1184                 ASSERT(mp != NULL);
1185         }
1186 
1187         va.va_mask = AT_ALL;
1188         error = VOP_GETATTR(vp, &va, 0, cr, &ct);
1189 
1190         if (error)
1191                 vap = NULL;
1192         else
1193                 vap = &va;
1194 
1195         VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
1196 
1197         if (in_crit)
1198                 nbl_end_crit(vp);
1199 
1200         resp->status = NFS3_OK;
1201         vattr_to_post_op_attr(vap, &resp->resok.file_attributes);
1202         resp->resok.count = args->count - uiop->uio_resid;
1203         if (!error && offset + resp->resok.count == va.va_size)
1204                 resp->resok.eof = TRUE;
1205         else
1206                 resp->resok.eof = FALSE;
1207         resp->resok.data.data_len = resp->resok.count;
1208 
1209         if (mp)
1210                 rfs_rndup_mblks(mp, resp->resok.count, loaned_buffers);
1211 
1212         resp->resok.data.mp = mp;
1213         resp->resok.size = (uint_t)args->count;
1214 
1215         if (rdma_used) {
1216                 resp->resok.data.data_val = (caddr_t)iov.iov_base;
1217                 if (!rdma_setup_read_data3(args, &(resp->resok))) {
1218                         resp->status = NFS3ERR_INVAL;
1219                 }
1220         } else {
1221                 resp->resok.data.data_val = (caddr_t)mp->b_datap->db_base;
1222                 (resp->resok).wlist = NULL;
1223         }
1224 
1225 done:
1226         DTRACE_NFSV3_5(op__read__done, struct svc_req *, req,
1227             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
1228             READ3res *, resp);
1229 
1230         VN_RELE(vp);
1231 
1232         if (iovp != NULL)
1233                 kmem_free(iovp, iovcnt * sizeof (struct iovec));
1234 
1235         return;
1236 
1237 out:
1238         if (curthread->t_flag & T_WOULDBLOCK) {
1239                 curthread->t_flag &= ~T_WOULDBLOCK;
1240                 resp->status = NFS3ERR_JUKEBOX;
1241         } else
1242                 resp->status = puterrno3(error);
1243 out1:
1244         DTRACE_NFSV3_5(op__read__done, struct svc_req *, req,
1245             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
1246             READ3res *, resp);
1247 
1248         if (vp != NULL) {
1249                 if (need_rwunlock)
1250                         VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
1251                 if (in_crit)
1252                         nbl_end_crit(vp);
1253                 VN_RELE(vp);
1254         }
1255         vattr_to_post_op_attr(vap, &resp->resfail.file_attributes);
1256 
1257         if (iovp != NULL)
1258                 kmem_free(iovp, iovcnt * sizeof (struct iovec));
1259 }
1260 
1261 void
1262 rfs3_read_free(READ3res *resp)
1263 {
1264         mblk_t *mp;
1265 
1266         if (resp->status == NFS3_OK) {
1267                 mp = resp->resok.data.mp;
1268                 if (mp != NULL)
1269                         freemsg(mp);
1270         }
1271 }
1272 
1273 void *
1274 rfs3_read_getfh(READ3args *args)
1275 {
1276         return (&args->file);
1277 }
1278 
1279 #define MAX_IOVECS      12
1280 
1281 #ifdef DEBUG
1282 static int rfs3_write_hits = 0;
1283 static int rfs3_write_misses = 0;
1284 #endif
1285 
1286 void
1287 rfs3_write(WRITE3args *args, WRITE3res *resp, struct exportinfo *exi,
1288     struct svc_req *req, cred_t *cr, bool_t ro)
1289 {
1290         nfs3_srv_t *ns;
1291         int error;
1292         vnode_t *vp;
1293         struct vattr *bvap = NULL;
1294         struct vattr bva;
1295         struct vattr *avap = NULL;
1296         struct vattr ava;
1297         u_offset_t rlimit;
1298         struct uio uio;
1299         struct iovec iov[MAX_IOVECS];
1300         mblk_t *m;
1301         struct iovec *iovp;
1302         int iovcnt;
1303         int ioflag;
1304         cred_t *savecred;
1305         int in_crit = 0;
1306         int rwlock_ret = -1;
1307         caller_context_t ct;
1308 
1309         vp = nfs3_fhtovp(&args->file, exi);
1310 
1311         DTRACE_NFSV3_5(op__write__start, struct svc_req *, req,
1312             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
1313             WRITE3args *, args);
1314 
1315         if (vp == NULL) {
1316                 error = ESTALE;
1317                 goto err;
1318         }
1319 
1320         ns = zone_getspecific(rfs3_zone_key, curzone);
1321         if (is_system_labeled()) {
1322                 bslabel_t *clabel = req->rq_label;
1323 
1324                 ASSERT(clabel != NULL);
1325                 DTRACE_PROBE2(tx__rfs3__log__info__opwrite__clabel, char *,
1326                     "got client label from request(1)", struct svc_req *, req);
1327 
1328                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
1329                         if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
1330                             exi)) {
1331                                 resp->status = NFS3ERR_ACCES;
1332                                 goto err1;
1333                         }
1334                 }
1335         }
1336 
1337         ct.cc_sysid = 0;
1338         ct.cc_pid = 0;
1339         ct.cc_caller_id = nfs3_srv_caller_id;
1340         ct.cc_flags = CC_DONTBLOCK;
1341 
1342         /*
1343          * We have to enter the critical region before calling VOP_RWLOCK
1344          * to avoid a deadlock with ufs.
1345          */
1346         if (nbl_need_check(vp)) {
1347                 nbl_start_crit(vp, RW_READER);
1348                 in_crit = 1;
1349                 if (nbl_conflict(vp, NBL_WRITE, args->offset, args->count, 0,
1350                     NULL)) {
1351                         error = EACCES;
1352                         goto err;
1353                 }
1354         }
1355 
1356         rwlock_ret = VOP_RWLOCK(vp, V_WRITELOCK_TRUE, &ct);
1357 
1358         /* check if a monitor detected a delegation conflict */
1359         if (rwlock_ret == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1360                 resp->status = NFS3ERR_JUKEBOX;
1361                 rwlock_ret = -1;
1362                 goto err1;
1363         }
1364 
1365 
1366         bva.va_mask = AT_ALL;
1367         error = VOP_GETATTR(vp, &bva, 0, cr, &ct);
1368 
1369         /*
1370          * If we can't get the attributes, then we can't do the
1371          * right access checking.  So, we'll fail the request.
1372          */
1373         if (error)
1374                 goto err;
1375 
1376         bvap = &bva;
1377         avap = bvap;
1378 
1379         if (args->count != args->data.data_len) {
1380                 resp->status = NFS3ERR_INVAL;
1381                 goto err1;
1382         }
1383 
1384         if (rdonly(ro, vp)) {
1385                 resp->status = NFS3ERR_ROFS;
1386                 goto err1;
1387         }
1388 
1389         if (vp->v_type != VREG) {
1390                 resp->status = NFS3ERR_INVAL;
1391                 goto err1;
1392         }
1393 
1394         if (crgetuid(cr) != bva.va_uid &&
1395             (error = VOP_ACCESS(vp, VWRITE, 0, cr, &ct)))
1396                 goto err;
1397 
1398         if (MANDLOCK(vp, bva.va_mode)) {
1399                 resp->status = NFS3ERR_ACCES;
1400                 goto err1;
1401         }
1402 
1403         if (args->count == 0) {
1404                 resp->status = NFS3_OK;
1405                 vattr_to_wcc_data(bvap, avap, &resp->resok.file_wcc);
1406                 resp->resok.count = 0;
1407                 resp->resok.committed = args->stable;
1408                 resp->resok.verf = ns->write3verf;
1409                 goto out;
1410         }
1411 
1412         if (args->mblk != NULL) {
1413                 iovcnt = 0;
1414                 for (m = args->mblk; m != NULL; m = m->b_cont)
1415                         iovcnt++;
1416                 if (iovcnt <= MAX_IOVECS) {
1417 #ifdef DEBUG
1418                         rfs3_write_hits++;
1419 #endif
1420                         iovp = iov;
1421                 } else {
1422 #ifdef DEBUG
1423                         rfs3_write_misses++;
1424 #endif
1425                         iovp = kmem_alloc(sizeof (*iovp) * iovcnt, KM_SLEEP);
1426                 }
1427                 mblk_to_iov(args->mblk, iovcnt, iovp);
1428 
1429         } else if (args->rlist != NULL) {
1430                 iovcnt = 1;
1431                 iovp = iov;
1432                 iovp->iov_base = (char *)((args->rlist)->u.c_daddr3);
1433                 iovp->iov_len = args->count;
1434         } else {
1435                 iovcnt = 1;
1436                 iovp = iov;
1437                 iovp->iov_base = args->data.data_val;
1438                 iovp->iov_len = args->count;
1439         }
1440 
1441         uio.uio_iov = iovp;
1442         uio.uio_iovcnt = iovcnt;
1443 
1444         uio.uio_segflg = UIO_SYSSPACE;
1445         uio.uio_extflg = UIO_COPY_DEFAULT;
1446         uio.uio_loffset = args->offset;
1447         uio.uio_resid = args->count;
1448         uio.uio_llimit = curproc->p_fsz_ctl;
1449         rlimit = uio.uio_llimit - args->offset;
1450         if (rlimit < (u_offset_t)uio.uio_resid)
1451                 uio.uio_resid = (int)rlimit;
1452 
1453         if (args->stable == UNSTABLE)
1454                 ioflag = 0;
1455         else if (args->stable == FILE_SYNC)
1456                 ioflag = FSYNC;
1457         else if (args->stable == DATA_SYNC)
1458                 ioflag = FDSYNC;
1459         else {
1460                 if (iovp != iov)
1461                         kmem_free(iovp, sizeof (*iovp) * iovcnt);
1462                 resp->status = NFS3ERR_INVAL;
1463                 goto err1;
1464         }
1465 
1466         /*
1467          * We're changing creds because VM may fault and we need
1468          * the cred of the current thread to be used if quota
1469          * checking is enabled.
1470          */
1471         savecred = curthread->t_cred;
1472         curthread->t_cred = cr;
1473         error = VOP_WRITE(vp, &uio, ioflag, cr, &ct);
1474         curthread->t_cred = savecred;
1475 
1476         if (iovp != iov)
1477                 kmem_free(iovp, sizeof (*iovp) * iovcnt);
1478 
1479         /* check if a monitor detected a delegation conflict */
1480         if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1481                 resp->status = NFS3ERR_JUKEBOX;
1482                 goto err1;
1483         }
1484 
1485         ava.va_mask = AT_ALL;
1486         avap = VOP_GETATTR(vp, &ava, 0, cr, &ct) ? NULL : &ava;
1487 
1488         if (error)
1489                 goto err;
1490 
1491         /*
1492          * If we were unable to get the V_WRITELOCK_TRUE, then we
1493          * may not have accurate after attrs, so check if
1494          * we have both attributes, they have a non-zero va_seq, and
1495          * va_seq has changed by exactly one,
1496          * if not, turn off the before attr.
1497          */
1498         if (rwlock_ret != V_WRITELOCK_TRUE) {
1499                 if (bvap == NULL || avap == NULL ||
1500                     bvap->va_seq == 0 || avap->va_seq == 0 ||
1501                     avap->va_seq != (bvap->va_seq + 1)) {
1502                         bvap = NULL;
1503                 }
1504         }
1505 
1506         resp->status = NFS3_OK;
1507         vattr_to_wcc_data(bvap, avap, &resp->resok.file_wcc);
1508         resp->resok.count = args->count - uio.uio_resid;
1509         resp->resok.committed = args->stable;
1510         resp->resok.verf = ns->write3verf;
1511         goto out;
1512 
1513 err:
1514         if (curthread->t_flag & T_WOULDBLOCK) {
1515                 curthread->t_flag &= ~T_WOULDBLOCK;
1516                 resp->status = NFS3ERR_JUKEBOX;
1517         } else
1518                 resp->status = puterrno3(error);
1519 err1:
1520         vattr_to_wcc_data(bvap, avap, &resp->resfail.file_wcc);
1521 out:
1522         DTRACE_NFSV3_5(op__write__done, struct svc_req *, req,
1523             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
1524             WRITE3res *, resp);
1525 
1526         if (vp != NULL) {
1527                 if (rwlock_ret != -1)
1528                         VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, &ct);
1529                 if (in_crit)
1530                         nbl_end_crit(vp);
1531                 VN_RELE(vp);
1532         }
1533 }
1534 
1535 void *
1536 rfs3_write_getfh(WRITE3args *args)
1537 {
1538         return (&args->file);
1539 }
1540 
1541 void
1542 rfs3_create(CREATE3args *args, CREATE3res *resp, struct exportinfo *exi,
1543     struct svc_req *req, cred_t *cr, bool_t ro)
1544 {
1545         int error;
1546         int in_crit = 0;
1547         vnode_t *vp;
1548         vnode_t *tvp = NULL;
1549         vnode_t *dvp;
1550         struct vattr *vap;
1551         struct vattr va;
1552         struct vattr *dbvap;
1553         struct vattr dbva;
1554         struct vattr *davap;
1555         struct vattr dava;
1556         enum vcexcl excl;
1557         nfstime3 *mtime;
1558         len_t reqsize;
1559         bool_t trunc;
1560         struct sockaddr *ca;
1561         char *name = NULL;
1562 
1563         dbvap = NULL;
1564         davap = NULL;
1565 
1566         dvp = nfs3_fhtovp(&args->where.dir, exi);
1567 
1568         DTRACE_NFSV3_5(op__create__start, struct svc_req *, req,
1569             cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
1570             CREATE3args *, args);
1571 
1572         if (dvp == NULL) {
1573                 error = ESTALE;
1574                 goto out;
1575         }
1576 
1577         dbva.va_mask = AT_ALL;
1578         dbvap = VOP_GETATTR(dvp, &dbva, 0, cr, NULL) ? NULL : &dbva;
1579         davap = dbvap;
1580 
1581         if (args->where.name == nfs3nametoolong) {
1582                 resp->status = NFS3ERR_NAMETOOLONG;
1583                 goto out1;
1584         }
1585 
1586         if (args->where.name == NULL || *(args->where.name) == '\0') {
1587                 resp->status = NFS3ERR_ACCES;
1588                 goto out1;
1589         }
1590 
1591         if (rdonly(ro, dvp)) {
1592                 resp->status = NFS3ERR_ROFS;
1593                 goto out1;
1594         }
1595 
1596         if (protect_zfs_mntpt(dvp) != 0) {
1597                 resp->status = NFS3ERR_ACCES;
1598                 goto out1;
1599         }
1600 
1601         if (is_system_labeled()) {
1602                 bslabel_t *clabel = req->rq_label;
1603 
1604                 ASSERT(clabel != NULL);
1605                 DTRACE_PROBE2(tx__rfs3__log__info__opcreate__clabel, char *,
1606                     "got client label from request(1)", struct svc_req *, req);
1607 
1608                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
1609                         if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
1610                             exi)) {
1611                                 resp->status = NFS3ERR_ACCES;
1612                                 goto out1;
1613                         }
1614                 }
1615         }
1616 
1617         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
1618         name = nfscmd_convname(ca, exi, args->where.name,
1619             NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
1620 
1621         if (name == NULL) {
1622                 /* This is really a Solaris EILSEQ */
1623                 resp->status = NFS3ERR_INVAL;
1624                 goto out1;
1625         }
1626 
1627         if (args->how.mode == EXCLUSIVE) {
1628                 va.va_mask = AT_TYPE | AT_MODE | AT_MTIME;
1629                 va.va_type = VREG;
1630                 va.va_mode = (mode_t)0;
1631                 /*
1632                  * Ensure no time overflows and that types match
1633                  */
1634                 mtime = (nfstime3 *)&args->how.createhow3_u.verf;
1635                 va.va_mtime.tv_sec = mtime->seconds % INT32_MAX;
1636                 va.va_mtime.tv_nsec = mtime->nseconds;
1637                 excl = EXCL;
1638         } else {
1639                 error = sattr3_to_vattr(&args->how.createhow3_u.obj_attributes,
1640                     &va);
1641                 if (error)
1642                         goto out;
1643                 va.va_mask |= AT_TYPE;
1644                 va.va_type = VREG;
1645                 if (args->how.mode == GUARDED)
1646                         excl = EXCL;
1647                 else {
1648                         excl = NONEXCL;
1649 
1650                         /*
1651                          * During creation of file in non-exclusive mode
1652                          * if size of file is being set then make sure
1653                          * that if the file already exists that no conflicting
1654                          * non-blocking mandatory locks exists in the region
1655                          * being modified. If there are conflicting locks fail
1656                          * the operation with EACCES.
1657                          */
1658                         if (va.va_mask & AT_SIZE) {
1659                                 struct vattr tva;
1660 
1661                                 /*
1662                                  * Does file already exist?
1663                                  */
1664                                 error = VOP_LOOKUP(dvp, name, &tvp,
1665                                     NULL, 0, NULL, cr, NULL, NULL, NULL);
1666 
1667                                 /*
1668                                  * Check to see if the file has been delegated
1669                                  * to a v4 client.  If so, then begin recall of
1670                                  * the delegation and return JUKEBOX to allow
1671                                  * the client to retrasmit its request.
1672                                  */
1673 
1674                                 trunc = va.va_size == 0;
1675                                 if (!error &&
1676                                     rfs4_check_delegated(FWRITE, tvp, trunc)) {
1677                                         resp->status = NFS3ERR_JUKEBOX;
1678                                         goto out1;
1679                                 }
1680 
1681                                 /*
1682                                  * Check for NBMAND lock conflicts
1683                                  */
1684                                 if (!error && nbl_need_check(tvp)) {
1685                                         u_offset_t offset;
1686                                         ssize_t len;
1687 
1688                                         nbl_start_crit(tvp, RW_READER);
1689                                         in_crit = 1;
1690 
1691                                         tva.va_mask = AT_SIZE;
1692                                         error = VOP_GETATTR(tvp, &tva, 0, cr,
1693                                             NULL);
1694                                         /*
1695                                          * Can't check for conflicts, so return
1696                                          * error.
1697                                          */
1698                                         if (error)
1699                                                 goto out;
1700 
1701                                         offset = tva.va_size < va.va_size ?
1702                                             tva.va_size : va.va_size;
1703                                         len = tva.va_size < va.va_size ?
1704                                             va.va_size - tva.va_size :
1705                                             tva.va_size - va.va_size;
1706                                         if (nbl_conflict(tvp, NBL_WRITE,
1707                                             offset, len, 0, NULL)) {
1708                                                 error = EACCES;
1709                                                 goto out;
1710                                         }
1711                                 } else if (tvp) {
1712                                         VN_RELE(tvp);
1713                                         tvp = NULL;
1714                                 }
1715                         }
1716                 }
1717                 if (va.va_mask & AT_SIZE)
1718                         reqsize = va.va_size;
1719         }
1720 
1721         /*
1722          * Must specify the mode.
1723          */
1724         if (!(va.va_mask & AT_MODE)) {
1725                 resp->status = NFS3ERR_INVAL;
1726                 goto out1;
1727         }
1728 
1729         /*
1730          * If the filesystem is exported with nosuid, then mask off
1731          * the setuid and setgid bits.
1732          */
1733         if (va.va_type == VREG && (exi->exi_export.ex_flags & EX_NOSUID))
1734                 va.va_mode &= ~(VSUID | VSGID);
1735 
1736 tryagain:
1737         /*
1738          * The file open mode used is VWRITE.  If the client needs
1739          * some other semantic, then it should do the access checking
1740          * itself.  It would have been nice to have the file open mode
1741          * passed as part of the arguments.
1742          */
1743         error = VOP_CREATE(dvp, name, &va, excl, VWRITE,
1744             &vp, cr, 0, NULL, NULL);
1745 
1746         dava.va_mask = AT_ALL;
1747         davap = VOP_GETATTR(dvp, &dava, 0, cr, NULL) ? NULL : &dava;
1748 
1749         if (error) {
1750                 /*
1751                  * If we got something other than file already exists
1752                  * then just return this error.  Otherwise, we got
1753                  * EEXIST.  If we were doing a GUARDED create, then
1754                  * just return this error.  Otherwise, we need to
1755                  * make sure that this wasn't a duplicate of an
1756                  * exclusive create request.
1757                  *
1758                  * The assumption is made that a non-exclusive create
1759                  * request will never return EEXIST.
1760                  */
1761                 if (error != EEXIST || args->how.mode == GUARDED)
1762                         goto out;
1763                 /*
1764                  * Lookup the file so that we can get a vnode for it.
1765                  */
1766                 error = VOP_LOOKUP(dvp, name, &vp, NULL, 0,
1767                     NULL, cr, NULL, NULL, NULL);
1768                 if (error) {
1769                         /*
1770                          * We couldn't find the file that we thought that
1771                          * we just created.  So, we'll just try creating
1772                          * it again.
1773                          */
1774                         if (error == ENOENT)
1775                                 goto tryagain;
1776                         goto out;
1777                 }
1778 
1779                 /*
1780                  * If the file is delegated to a v4 client, go ahead
1781                  * and initiate recall, this create is a hint that a
1782                  * conflicting v3 open has occurred.
1783                  */
1784 
1785                 if (rfs4_check_delegated(FWRITE, vp, FALSE)) {
1786                         VN_RELE(vp);
1787                         resp->status = NFS3ERR_JUKEBOX;
1788                         goto out1;
1789                 }
1790 
1791                 va.va_mask = AT_ALL;
1792                 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
1793 
1794                 mtime = (nfstime3 *)&args->how.createhow3_u.verf;
1795                 /* % with INT32_MAX to prevent overflows */
1796                 if (args->how.mode == EXCLUSIVE && (vap == NULL ||
1797                     vap->va_mtime.tv_sec !=
1798                     (mtime->seconds % INT32_MAX) ||
1799                     vap->va_mtime.tv_nsec != mtime->nseconds)) {
1800                         VN_RELE(vp);
1801                         error = EEXIST;
1802                         goto out;
1803                 }
1804         } else {
1805 
1806                 if ((args->how.mode == UNCHECKED ||
1807                     args->how.mode == GUARDED) &&
1808                     args->how.createhow3_u.obj_attributes.size.set_it &&
1809                     va.va_size == 0)
1810                         trunc = TRUE;
1811                 else
1812                         trunc = FALSE;
1813 
1814                 if (rfs4_check_delegated(FWRITE, vp, trunc)) {
1815                         VN_RELE(vp);
1816                         resp->status = NFS3ERR_JUKEBOX;
1817                         goto out1;
1818                 }
1819 
1820                 va.va_mask = AT_ALL;
1821                 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
1822 
1823                 /*
1824                  * We need to check to make sure that the file got
1825                  * created to the indicated size.  If not, we do a
1826                  * setattr to try to change the size, but we don't
1827                  * try too hard.  This shouldn't a problem as most
1828                  * clients will only specifiy a size of zero which
1829                  * local file systems handle.  However, even if
1830                  * the client does specify a non-zero size, it can
1831                  * still recover by checking the size of the file
1832                  * after it has created it and then issue a setattr
1833                  * request of its own to set the size of the file.
1834                  */
1835                 if (vap != NULL &&
1836                     (args->how.mode == UNCHECKED ||
1837                     args->how.mode == GUARDED) &&
1838                     args->how.createhow3_u.obj_attributes.size.set_it &&
1839                     vap->va_size != reqsize) {
1840                         va.va_mask = AT_SIZE;
1841                         va.va_size = reqsize;
1842                         (void) VOP_SETATTR(vp, &va, 0, cr, NULL);
1843                         va.va_mask = AT_ALL;
1844                         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
1845                 }
1846         }
1847 
1848         if (name != args->where.name)
1849                 kmem_free(name, MAXPATHLEN + 1);
1850 
1851         error = makefh3(&resp->resok.obj.handle, vp, exi);
1852         if (error)
1853                 resp->resok.obj.handle_follows = FALSE;
1854         else
1855                 resp->resok.obj.handle_follows = TRUE;
1856 
1857         /*
1858          * Force modified data and metadata out to stable storage.
1859          */
1860         (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
1861         (void) VOP_FSYNC(dvp, 0, cr, NULL);
1862 
1863         VN_RELE(vp);
1864         if (tvp != NULL) {
1865                 if (in_crit)
1866                         nbl_end_crit(tvp);
1867                 VN_RELE(tvp);
1868         }
1869 
1870         resp->status = NFS3_OK;
1871         vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
1872         vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
1873 
1874         DTRACE_NFSV3_5(op__create__done, struct svc_req *, req,
1875             cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
1876             CREATE3res *, resp);
1877 
1878         VN_RELE(dvp);
1879         return;
1880 
1881 out:
1882         if (curthread->t_flag & T_WOULDBLOCK) {
1883                 curthread->t_flag &= ~T_WOULDBLOCK;
1884                 resp->status = NFS3ERR_JUKEBOX;
1885         } else
1886                 resp->status = puterrno3(error);
1887 out1:
1888         DTRACE_NFSV3_5(op__create__done, struct svc_req *, req,
1889             cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
1890             CREATE3res *, resp);
1891 
1892         if (name != NULL && name != args->where.name)
1893                 kmem_free(name, MAXPATHLEN + 1);
1894 
1895         if (tvp != NULL) {
1896                 if (in_crit)
1897                         nbl_end_crit(tvp);
1898                 VN_RELE(tvp);
1899         }
1900         if (dvp != NULL)
1901                 VN_RELE(dvp);
1902         vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc);
1903 }
1904 
1905 void *
1906 rfs3_create_getfh(CREATE3args *args)
1907 {
1908         return (&args->where.dir);
1909 }
1910 
1911 void
1912 rfs3_mkdir(MKDIR3args *args, MKDIR3res *resp, struct exportinfo *exi,
1913     struct svc_req *req, cred_t *cr, bool_t ro)
1914 {
1915         int error;
1916         vnode_t *vp = NULL;
1917         vnode_t *dvp;
1918         struct vattr *vap;
1919         struct vattr va;
1920         struct vattr *dbvap;
1921         struct vattr dbva;
1922         struct vattr *davap;
1923         struct vattr dava;
1924         struct sockaddr *ca;
1925         char *name = NULL;
1926 
1927         dbvap = NULL;
1928         davap = NULL;
1929 
1930         dvp = nfs3_fhtovp(&args->where.dir, exi);
1931 
1932         DTRACE_NFSV3_5(op__mkdir__start, struct svc_req *, req,
1933             cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
1934             MKDIR3args *, args);
1935 
1936         if (dvp == NULL) {
1937                 error = ESTALE;
1938                 goto out;
1939         }
1940 
1941         dbva.va_mask = AT_ALL;
1942         dbvap = VOP_GETATTR(dvp, &dbva, 0, cr, NULL) ? NULL : &dbva;
1943         davap = dbvap;
1944 
1945         if (args->where.name == nfs3nametoolong) {
1946                 resp->status = NFS3ERR_NAMETOOLONG;
1947                 goto out1;
1948         }
1949 
1950         if (args->where.name == NULL || *(args->where.name) == '\0') {
1951                 resp->status = NFS3ERR_ACCES;
1952                 goto out1;
1953         }
1954 
1955         if (rdonly(ro, dvp)) {
1956                 resp->status = NFS3ERR_ROFS;
1957                 goto out1;
1958         }
1959 
1960         if (protect_zfs_mntpt(dvp) != 0) {
1961                 resp->status = NFS3ERR_ACCES;
1962                 goto out1;
1963         }
1964 
1965         if (is_system_labeled()) {
1966                 bslabel_t *clabel = req->rq_label;
1967 
1968                 ASSERT(clabel != NULL);
1969                 DTRACE_PROBE2(tx__rfs3__log__info__opmkdir__clabel, char *,
1970                     "got client label from request(1)", struct svc_req *, req);
1971 
1972                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
1973                         if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
1974                             exi)) {
1975                                 resp->status = NFS3ERR_ACCES;
1976                                 goto out1;
1977                         }
1978                 }
1979         }
1980 
1981         error = sattr3_to_vattr(&args->attributes, &va);
1982         if (error)
1983                 goto out;
1984 
1985         if (!(va.va_mask & AT_MODE)) {
1986                 resp->status = NFS3ERR_INVAL;
1987                 goto out1;
1988         }
1989 
1990         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
1991         name = nfscmd_convname(ca, exi, args->where.name,
1992             NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
1993 
1994         if (name == NULL) {
1995                 resp->status = NFS3ERR_INVAL;
1996                 goto out1;
1997         }
1998 
1999         va.va_mask |= AT_TYPE;
2000         va.va_type = VDIR;
2001 
2002         error = VOP_MKDIR(dvp, name, &va, &vp, cr, NULL, 0, NULL);
2003 
2004         if (name != args->where.name)
2005                 kmem_free(name, MAXPATHLEN + 1);
2006 
2007         dava.va_mask = AT_ALL;
2008         davap = VOP_GETATTR(dvp, &dava, 0, cr, NULL) ? NULL : &dava;
2009 
2010         /*
2011          * Force modified data and metadata out to stable storage.
2012          */
2013         (void) VOP_FSYNC(dvp, 0, cr, NULL);
2014 
2015         if (error)
2016                 goto out;
2017 
2018         error = makefh3(&resp->resok.obj.handle, vp, exi);
2019         if (error)
2020                 resp->resok.obj.handle_follows = FALSE;
2021         else
2022                 resp->resok.obj.handle_follows = TRUE;
2023 
2024         va.va_mask = AT_ALL;
2025         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
2026 
2027         /*
2028          * Force modified data and metadata out to stable storage.
2029          */
2030         (void) VOP_FSYNC(vp, 0, cr, NULL);
2031 
2032         VN_RELE(vp);
2033 
2034         resp->status = NFS3_OK;
2035         vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
2036         vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
2037 
2038         DTRACE_NFSV3_5(op__mkdir__done, struct svc_req *, req,
2039             cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
2040             MKDIR3res *, resp);
2041         VN_RELE(dvp);
2042 
2043         return;
2044 
2045 out:
2046         if (curthread->t_flag & T_WOULDBLOCK) {
2047                 curthread->t_flag &= ~T_WOULDBLOCK;
2048                 resp->status = NFS3ERR_JUKEBOX;
2049         } else
2050                 resp->status = puterrno3(error);
2051 out1:
2052         DTRACE_NFSV3_5(op__mkdir__done, struct svc_req *, req,
2053             cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
2054             MKDIR3res *, resp);
2055         if (dvp != NULL)
2056                 VN_RELE(dvp);
2057         vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc);
2058 }
2059 
2060 void *
2061 rfs3_mkdir_getfh(MKDIR3args *args)
2062 {
2063         return (&args->where.dir);
2064 }
2065 
2066 void
2067 rfs3_symlink(SYMLINK3args *args, SYMLINK3res *resp, struct exportinfo *exi,
2068     struct svc_req *req, cred_t *cr, bool_t ro)
2069 {
2070         int error;
2071         vnode_t *vp;
2072         vnode_t *dvp;
2073         struct vattr *vap;
2074         struct vattr va;
2075         struct vattr *dbvap;
2076         struct vattr dbva;
2077         struct vattr *davap;
2078         struct vattr dava;
2079         struct sockaddr *ca;
2080         char *name = NULL;
2081         char *symdata = NULL;
2082 
2083         dbvap = NULL;
2084         davap = NULL;
2085 
2086         dvp = nfs3_fhtovp(&args->where.dir, exi);
2087 
2088         DTRACE_NFSV3_5(op__symlink__start, struct svc_req *, req,
2089             cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
2090             SYMLINK3args *, args);
2091 
2092         if (dvp == NULL) {
2093                 error = ESTALE;
2094                 goto err;
2095         }
2096 
2097         dbva.va_mask = AT_ALL;
2098         dbvap = VOP_GETATTR(dvp, &dbva, 0, cr, NULL) ? NULL : &dbva;
2099         davap = dbvap;
2100 
2101         if (args->where.name == nfs3nametoolong) {
2102                 resp->status = NFS3ERR_NAMETOOLONG;
2103                 goto err1;
2104         }
2105 
2106         if (args->where.name == NULL || *(args->where.name) == '\0') {
2107                 resp->status = NFS3ERR_ACCES;
2108                 goto err1;
2109         }
2110 
2111         if (rdonly(ro, dvp)) {
2112                 resp->status = NFS3ERR_ROFS;
2113                 goto err1;
2114         }
2115 
2116         if (protect_zfs_mntpt(dvp) != 0) {
2117                 resp->status = NFS3ERR_ACCES;
2118                 goto err1;
2119         }
2120 
2121         if (is_system_labeled()) {
2122                 bslabel_t *clabel = req->rq_label;
2123 
2124                 ASSERT(clabel != NULL);
2125                 DTRACE_PROBE2(tx__rfs3__log__info__opsymlink__clabel, char *,
2126                     "got client label from request(1)", struct svc_req *, req);
2127 
2128                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2129                         if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
2130                             exi)) {
2131                                 resp->status = NFS3ERR_ACCES;
2132                                 goto err1;
2133                         }
2134                 }
2135         }
2136 
2137         error = sattr3_to_vattr(&args->symlink.symlink_attributes, &va);
2138         if (error)
2139                 goto err;
2140 
2141         if (!(va.va_mask & AT_MODE)) {
2142                 resp->status = NFS3ERR_INVAL;
2143                 goto err1;
2144         }
2145 
2146         if (args->symlink.symlink_data == nfs3nametoolong) {
2147                 resp->status = NFS3ERR_NAMETOOLONG;
2148                 goto err1;
2149         }
2150 
2151         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2152         name = nfscmd_convname(ca, exi, args->where.name,
2153             NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2154 
2155         if (name == NULL) {
2156                 /* This is really a Solaris EILSEQ */
2157                 resp->status = NFS3ERR_INVAL;
2158                 goto err1;
2159         }
2160 
2161         symdata = nfscmd_convname(ca, exi, args->symlink.symlink_data,
2162             NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2163         if (symdata == NULL) {
2164                 /* This is really a Solaris EILSEQ */
2165                 resp->status = NFS3ERR_INVAL;
2166                 goto err1;
2167         }
2168 
2169 
2170         va.va_mask |= AT_TYPE;
2171         va.va_type = VLNK;
2172 
2173         error = VOP_SYMLINK(dvp, name, &va, symdata, cr, NULL, 0);
2174 
2175         dava.va_mask = AT_ALL;
2176         davap = VOP_GETATTR(dvp, &dava, 0, cr, NULL) ? NULL : &dava;
2177 
2178         if (error)
2179                 goto err;
2180 
2181         error = VOP_LOOKUP(dvp, name, &vp, NULL, 0, NULL, cr,
2182             NULL, NULL, NULL);
2183 
2184         /*
2185          * Force modified data and metadata out to stable storage.
2186          */
2187         (void) VOP_FSYNC(dvp, 0, cr, NULL);
2188 
2189 
2190         resp->status = NFS3_OK;
2191         if (error) {
2192                 resp->resok.obj.handle_follows = FALSE;
2193                 vattr_to_post_op_attr(NULL, &resp->resok.obj_attributes);
2194                 vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
2195                 goto out;
2196         }
2197 
2198         error = makefh3(&resp->resok.obj.handle, vp, exi);
2199         if (error)
2200                 resp->resok.obj.handle_follows = FALSE;
2201         else
2202                 resp->resok.obj.handle_follows = TRUE;
2203 
2204         va.va_mask = AT_ALL;
2205         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
2206 
2207         /*
2208          * Force modified data and metadata out to stable storage.
2209          */
2210         (void) VOP_FSYNC(vp, 0, cr, NULL);
2211 
2212         VN_RELE(vp);
2213 
2214         vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
2215         vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
2216         goto out;
2217 
2218 err:
2219         if (curthread->t_flag & T_WOULDBLOCK) {
2220                 curthread->t_flag &= ~T_WOULDBLOCK;
2221                 resp->status = NFS3ERR_JUKEBOX;
2222         } else
2223                 resp->status = puterrno3(error);
2224 err1:
2225         vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc);
2226 out:
2227         if (name != NULL && name != args->where.name)
2228                 kmem_free(name, MAXPATHLEN + 1);
2229         if (symdata != NULL && symdata != args->symlink.symlink_data)
2230                 kmem_free(symdata, MAXPATHLEN + 1);
2231 
2232         DTRACE_NFSV3_5(op__symlink__done, struct svc_req *, req,
2233             cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
2234             SYMLINK3res *, resp);
2235 
2236         if (dvp != NULL)
2237                 VN_RELE(dvp);
2238 }
2239 
2240 void *
2241 rfs3_symlink_getfh(SYMLINK3args *args)
2242 {
2243         return (&args->where.dir);
2244 }
2245 
2246 void
2247 rfs3_mknod(MKNOD3args *args, MKNOD3res *resp, struct exportinfo *exi,
2248     struct svc_req *req, cred_t *cr, bool_t ro)
2249 {
2250         int error;
2251         vnode_t *vp;
2252         vnode_t *realvp;
2253         vnode_t *dvp;
2254         struct vattr *vap;
2255         struct vattr va;
2256         struct vattr *dbvap;
2257         struct vattr dbva;
2258         struct vattr *davap;
2259         struct vattr dava;
2260         int mode;
2261         enum vcexcl excl;
2262         struct sockaddr *ca;
2263         char *name = NULL;
2264 
2265         dbvap = NULL;
2266         davap = NULL;
2267 
2268         dvp = nfs3_fhtovp(&args->where.dir, exi);
2269 
2270         DTRACE_NFSV3_5(op__mknod__start, struct svc_req *, req,
2271             cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
2272             MKNOD3args *, args);
2273 
2274         if (dvp == NULL) {
2275                 error = ESTALE;
2276                 goto out;
2277         }
2278 
2279         dbva.va_mask = AT_ALL;
2280         dbvap = VOP_GETATTR(dvp, &dbva, 0, cr, NULL) ? NULL : &dbva;
2281         davap = dbvap;
2282 
2283         if (args->where.name == nfs3nametoolong) {
2284                 resp->status = NFS3ERR_NAMETOOLONG;
2285                 goto out1;
2286         }
2287 
2288         if (args->where.name == NULL || *(args->where.name) == '\0') {
2289                 resp->status = NFS3ERR_ACCES;
2290                 goto out1;
2291         }
2292 
2293         if (rdonly(ro, dvp)) {
2294                 resp->status = NFS3ERR_ROFS;
2295                 goto out1;
2296         }
2297 
2298         if (protect_zfs_mntpt(dvp) != 0) {
2299                 resp->status = NFS3ERR_ACCES;
2300                 goto out1;
2301         }
2302 
2303         if (is_system_labeled()) {
2304                 bslabel_t *clabel = req->rq_label;
2305 
2306                 ASSERT(clabel != NULL);
2307                 DTRACE_PROBE2(tx__rfs3__log__info__opmknod__clabel, char *,
2308                     "got client label from request(1)", struct svc_req *, req);
2309 
2310                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2311                         if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
2312                             exi)) {
2313                                 resp->status = NFS3ERR_ACCES;
2314                                 goto out1;
2315                         }
2316                 }
2317         }
2318 
2319         switch (args->what.type) {
2320         case NF3CHR:
2321         case NF3BLK:
2322                 error = sattr3_to_vattr(
2323                     &args->what.mknoddata3_u.device.dev_attributes, &va);
2324                 if (error)
2325                         goto out;
2326                 if (secpolicy_sys_devices(cr) != 0) {
2327                         resp->status = NFS3ERR_PERM;
2328                         goto out1;
2329                 }
2330                 if (args->what.type == NF3CHR)
2331                         va.va_type = VCHR;
2332                 else
2333                         va.va_type = VBLK;
2334                 va.va_rdev = makedevice(
2335                     args->what.mknoddata3_u.device.spec.specdata1,
2336                     args->what.mknoddata3_u.device.spec.specdata2);
2337                 va.va_mask |= AT_TYPE | AT_RDEV;
2338                 break;
2339         case NF3SOCK:
2340                 error = sattr3_to_vattr(
2341                     &args->what.mknoddata3_u.pipe_attributes, &va);
2342                 if (error)
2343                         goto out;
2344                 va.va_type = VSOCK;
2345                 va.va_mask |= AT_TYPE;
2346                 break;
2347         case NF3FIFO:
2348                 error = sattr3_to_vattr(
2349                     &args->what.mknoddata3_u.pipe_attributes, &va);
2350                 if (error)
2351                         goto out;
2352                 va.va_type = VFIFO;
2353                 va.va_mask |= AT_TYPE;
2354                 break;
2355         default:
2356                 resp->status = NFS3ERR_BADTYPE;
2357                 goto out1;
2358         }
2359 
2360         /*
2361          * Must specify the mode.
2362          */
2363         if (!(va.va_mask & AT_MODE)) {
2364                 resp->status = NFS3ERR_INVAL;
2365                 goto out1;
2366         }
2367 
2368         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2369         name = nfscmd_convname(ca, exi, args->where.name,
2370             NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2371 
2372         if (name == NULL) {
2373                 resp->status = NFS3ERR_INVAL;
2374                 goto out1;
2375         }
2376 
2377         excl = EXCL;
2378 
2379         mode = 0;
2380 
2381         error = VOP_CREATE(dvp, name, &va, excl, mode,
2382             &vp, cr, 0, NULL, NULL);
2383 
2384         if (name != args->where.name)
2385                 kmem_free(name, MAXPATHLEN + 1);
2386 
2387         dava.va_mask = AT_ALL;
2388         davap = VOP_GETATTR(dvp, &dava, 0, cr, NULL) ? NULL : &dava;
2389 
2390         /*
2391          * Force modified data and metadata out to stable storage.
2392          */
2393         (void) VOP_FSYNC(dvp, 0, cr, NULL);
2394 
2395         if (error)
2396                 goto out;
2397 
2398         resp->status = NFS3_OK;
2399 
2400         error = makefh3(&resp->resok.obj.handle, vp, exi);
2401         if (error)
2402                 resp->resok.obj.handle_follows = FALSE;
2403         else
2404                 resp->resok.obj.handle_follows = TRUE;
2405 
2406         va.va_mask = AT_ALL;
2407         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
2408 
2409         /*
2410          * Force modified metadata out to stable storage.
2411          *
2412          * if a underlying vp exists, pass it to VOP_FSYNC
2413          */
2414         if (VOP_REALVP(vp, &realvp, NULL) == 0)
2415                 (void) VOP_FSYNC(realvp, FNODSYNC, cr, NULL);
2416         else
2417                 (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
2418 
2419         VN_RELE(vp);
2420 
2421         vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
2422         vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
2423         DTRACE_NFSV3_5(op__mknod__done, struct svc_req *, req,
2424             cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
2425             MKNOD3res *, resp);
2426         VN_RELE(dvp);
2427         return;
2428 
2429 out:
2430         if (curthread->t_flag & T_WOULDBLOCK) {
2431                 curthread->t_flag &= ~T_WOULDBLOCK;
2432                 resp->status = NFS3ERR_JUKEBOX;
2433         } else
2434                 resp->status = puterrno3(error);
2435 out1:
2436         DTRACE_NFSV3_5(op__mknod__done, struct svc_req *, req,
2437             cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
2438             MKNOD3res *, resp);
2439         if (dvp != NULL)
2440                 VN_RELE(dvp);
2441         vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc);
2442 }
2443 
2444 void *
2445 rfs3_mknod_getfh(MKNOD3args *args)
2446 {
2447         return (&args->where.dir);
2448 }
2449 
2450 void
2451 rfs3_remove(REMOVE3args *args, REMOVE3res *resp, struct exportinfo *exi,
2452     struct svc_req *req, cred_t *cr, bool_t ro)
2453 {
2454         int error = 0;
2455         vnode_t *vp;
2456         struct vattr *bvap;
2457         struct vattr bva;
2458         struct vattr *avap;
2459         struct vattr ava;
2460         vnode_t *targvp = NULL;
2461         struct sockaddr *ca;
2462         char *name = NULL;
2463 
2464         bvap = NULL;
2465         avap = NULL;
2466 
2467         vp = nfs3_fhtovp(&args->object.dir, exi);
2468 
2469         DTRACE_NFSV3_5(op__remove__start, struct svc_req *, req,
2470             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
2471             REMOVE3args *, args);
2472 
2473         if (vp == NULL) {
2474                 error = ESTALE;
2475                 goto err;
2476         }
2477 
2478         bva.va_mask = AT_ALL;
2479         bvap = VOP_GETATTR(vp, &bva, 0, cr, NULL) ? NULL : &bva;
2480         avap = bvap;
2481 
2482         if (vp->v_type != VDIR) {
2483                 resp->status = NFS3ERR_NOTDIR;
2484                 goto err1;
2485         }
2486 
2487         if (args->object.name == nfs3nametoolong) {
2488                 resp->status = NFS3ERR_NAMETOOLONG;
2489                 goto err1;
2490         }
2491 
2492         if (args->object.name == NULL || *(args->object.name) == '\0') {
2493                 resp->status = NFS3ERR_ACCES;
2494                 goto err1;
2495         }
2496 
2497         if (rdonly(ro, vp)) {
2498                 resp->status = NFS3ERR_ROFS;
2499                 goto err1;
2500         }
2501 
2502         if (is_system_labeled()) {
2503                 bslabel_t *clabel = req->rq_label;
2504 
2505                 ASSERT(clabel != NULL);
2506                 DTRACE_PROBE2(tx__rfs3__log__info__opremove__clabel, char *,
2507                     "got client label from request(1)", struct svc_req *, req);
2508 
2509                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2510                         if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
2511                             exi)) {
2512                                 resp->status = NFS3ERR_ACCES;
2513                                 goto err1;
2514                         }
2515                 }
2516         }
2517 
2518         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2519         name = nfscmd_convname(ca, exi, args->object.name,
2520             NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2521 
2522         if (name == NULL) {
2523                 resp->status = NFS3ERR_INVAL;
2524                 goto err1;
2525         }
2526 
2527         /*
2528          * Check for a conflict with a non-blocking mandatory share
2529          * reservation and V4 delegations
2530          */
2531         error = VOP_LOOKUP(vp, name, &targvp, NULL, 0,
2532             NULL, cr, NULL, NULL, NULL);
2533         if (error != 0)
2534                 goto err;
2535 
2536         if (rfs4_check_delegated(FWRITE, targvp, TRUE)) {
2537                 resp->status = NFS3ERR_JUKEBOX;
2538                 goto err1;
2539         }
2540 
2541         if (!nbl_need_check(targvp)) {
2542                 error = VOP_REMOVE(vp, name, cr, NULL, 0);
2543         } else {
2544                 nbl_start_crit(targvp, RW_READER);
2545                 if (nbl_conflict(targvp, NBL_REMOVE, 0, 0, 0, NULL)) {
2546                         error = EACCES;
2547                 } else {
2548                         error = VOP_REMOVE(vp, name, cr, NULL, 0);
2549                 }
2550                 nbl_end_crit(targvp);
2551         }
2552         VN_RELE(targvp);
2553         targvp = NULL;
2554 
2555         ava.va_mask = AT_ALL;
2556         avap = VOP_GETATTR(vp, &ava, 0, cr, NULL) ? NULL : &ava;
2557 
2558         /*
2559          * Force modified data and metadata out to stable storage.
2560          */
2561         (void) VOP_FSYNC(vp, 0, cr, NULL);
2562 
2563         if (error)
2564                 goto err;
2565 
2566         resp->status = NFS3_OK;
2567         vattr_to_wcc_data(bvap, avap, &resp->resok.dir_wcc);
2568         goto out;
2569 
2570 err:
2571         if (curthread->t_flag & T_WOULDBLOCK) {
2572                 curthread->t_flag &= ~T_WOULDBLOCK;
2573                 resp->status = NFS3ERR_JUKEBOX;
2574         } else
2575                 resp->status = puterrno3(error);
2576 err1:
2577         vattr_to_wcc_data(bvap, avap, &resp->resfail.dir_wcc);
2578 out:
2579         DTRACE_NFSV3_5(op__remove__done, struct svc_req *, req,
2580             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
2581             REMOVE3res *, resp);
2582 
2583         if (name != NULL && name != args->object.name)
2584                 kmem_free(name, MAXPATHLEN + 1);
2585 
2586         if (vp != NULL)
2587                 VN_RELE(vp);
2588 }
2589 
2590 void *
2591 rfs3_remove_getfh(REMOVE3args *args)
2592 {
2593         return (&args->object.dir);
2594 }
2595 
2596 void
2597 rfs3_rmdir(RMDIR3args *args, RMDIR3res *resp, struct exportinfo *exi,
2598     struct svc_req *req, cred_t *cr, bool_t ro)
2599 {
2600         int error;
2601         vnode_t *vp;
2602         struct vattr *bvap;
2603         struct vattr bva;
2604         struct vattr *avap;
2605         struct vattr ava;
2606         struct sockaddr *ca;
2607         char *name = NULL;
2608 
2609         bvap = NULL;
2610         avap = NULL;
2611 
2612         vp = nfs3_fhtovp(&args->object.dir, exi);
2613 
2614         DTRACE_NFSV3_5(op__rmdir__start, struct svc_req *, req,
2615             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
2616             RMDIR3args *, args);
2617 
2618         if (vp == NULL) {
2619                 error = ESTALE;
2620                 goto err;
2621         }
2622 
2623         bva.va_mask = AT_ALL;
2624         bvap = VOP_GETATTR(vp, &bva, 0, cr, NULL) ? NULL : &bva;
2625         avap = bvap;
2626 
2627         if (vp->v_type != VDIR) {
2628                 resp->status = NFS3ERR_NOTDIR;
2629                 goto err1;
2630         }
2631 
2632         if (args->object.name == nfs3nametoolong) {
2633                 resp->status = NFS3ERR_NAMETOOLONG;
2634                 goto err1;
2635         }
2636 
2637         if (args->object.name == NULL || *(args->object.name) == '\0') {
2638                 resp->status = NFS3ERR_ACCES;
2639                 goto err1;
2640         }
2641 
2642         if (rdonly(ro, vp)) {
2643                 resp->status = NFS3ERR_ROFS;
2644                 goto err1;
2645         }
2646 
2647         if (is_system_labeled()) {
2648                 bslabel_t *clabel = req->rq_label;
2649 
2650                 ASSERT(clabel != NULL);
2651                 DTRACE_PROBE2(tx__rfs3__log__info__opremovedir__clabel, char *,
2652                     "got client label from request(1)", struct svc_req *, req);
2653 
2654                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2655                         if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
2656                             exi)) {
2657                                 resp->status = NFS3ERR_ACCES;
2658                                 goto err1;
2659                         }
2660                 }
2661         }
2662 
2663         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2664         name = nfscmd_convname(ca, exi, args->object.name,
2665             NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2666 
2667         if (name == NULL) {
2668                 resp->status = NFS3ERR_INVAL;
2669                 goto err1;
2670         }
2671 
2672         error = VOP_RMDIR(vp, name, ZONE_ROOTVP(), cr, NULL, 0);
2673 
2674         if (name != args->object.name)
2675                 kmem_free(name, MAXPATHLEN + 1);
2676 
2677         ava.va_mask = AT_ALL;
2678         avap = VOP_GETATTR(vp, &ava, 0, cr, NULL) ? NULL : &ava;
2679 
2680         /*
2681          * Force modified data and metadata out to stable storage.
2682          */
2683         (void) VOP_FSYNC(vp, 0, cr, NULL);
2684 
2685         if (error) {
2686                 /*
2687                  * System V defines rmdir to return EEXIST, not ENOTEMPTY,
2688                  * if the directory is not empty.  A System V NFS server
2689                  * needs to map NFS3ERR_EXIST to NFS3ERR_NOTEMPTY to transmit
2690                  * over the wire.
2691                  */
2692                 if (error == EEXIST)
2693                         error = ENOTEMPTY;
2694                 goto err;
2695         }
2696 
2697         resp->status = NFS3_OK;
2698         vattr_to_wcc_data(bvap, avap, &resp->resok.dir_wcc);
2699         goto out;
2700 
2701 err:
2702         if (curthread->t_flag & T_WOULDBLOCK) {
2703                 curthread->t_flag &= ~T_WOULDBLOCK;
2704                 resp->status = NFS3ERR_JUKEBOX;
2705         } else
2706                 resp->status = puterrno3(error);
2707 err1:
2708         vattr_to_wcc_data(bvap, avap, &resp->resfail.dir_wcc);
2709 out:
2710         DTRACE_NFSV3_5(op__rmdir__done, struct svc_req *, req,
2711             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
2712             RMDIR3res *, resp);
2713         if (vp != NULL)
2714                 VN_RELE(vp);
2715 
2716 }
2717 
2718 void *
2719 rfs3_rmdir_getfh(RMDIR3args *args)
2720 {
2721         return (&args->object.dir);
2722 }
2723 
2724 void
2725 rfs3_rename(RENAME3args *args, RENAME3res *resp, struct exportinfo *exi,
2726     struct svc_req *req, cred_t *cr, bool_t ro)
2727 {
2728         int error = 0;
2729         vnode_t *fvp;
2730         vnode_t *tvp;
2731         vnode_t *targvp;
2732         struct vattr *fbvap;
2733         struct vattr fbva;
2734         struct vattr *favap;
2735         struct vattr fava;
2736         struct vattr *tbvap;
2737         struct vattr tbva;
2738         struct vattr *tavap;
2739         struct vattr tava;
2740         nfs_fh3 *fh3;
2741         struct exportinfo *to_exi;
2742         vnode_t *srcvp = NULL;
2743         bslabel_t *clabel;
2744         struct sockaddr *ca;
2745         char *name = NULL;
2746         char *toname = NULL;
2747 
2748         fbvap = NULL;
2749         favap = NULL;
2750         tbvap = NULL;
2751         tavap = NULL;
2752         tvp = NULL;
2753 
2754         fvp = nfs3_fhtovp(&args->from.dir, exi);
2755 
2756         DTRACE_NFSV3_5(op__rename__start, struct svc_req *, req,
2757             cred_t *, cr, vnode_t *, fvp, struct exportinfo *, exi,
2758             RENAME3args *, args);
2759 
2760         if (fvp == NULL) {
2761                 error = ESTALE;
2762                 goto err;
2763         }
2764 
2765         if (is_system_labeled()) {
2766                 clabel = req->rq_label;
2767                 ASSERT(clabel != NULL);
2768                 DTRACE_PROBE2(tx__rfs3__log__info__oprename__clabel, char *,
2769                     "got client label from request(1)", struct svc_req *, req);
2770 
2771                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2772                         if (!do_rfs_label_check(clabel, fvp, EQUALITY_CHECK,
2773                             exi)) {
2774                                 resp->status = NFS3ERR_ACCES;
2775                                 goto err1;
2776                         }
2777                 }
2778         }
2779 
2780         fbva.va_mask = AT_ALL;
2781         fbvap = VOP_GETATTR(fvp, &fbva, 0, cr, NULL) ? NULL : &fbva;
2782         favap = fbvap;
2783 
2784         fh3 = &args->to.dir;
2785         to_exi = checkexport(&fh3->fh3_fsid, FH3TOXFIDP(fh3));
2786         if (to_exi == NULL) {
2787                 resp->status = NFS3ERR_ACCES;
2788                 goto err1;
2789         }
2790         exi_rele(&to_exi);
2791 
2792         if (to_exi != exi) {
2793                 resp->status = NFS3ERR_XDEV;
2794                 goto err1;
2795         }
2796 
2797         tvp = nfs3_fhtovp(&args->to.dir, exi);
2798         if (tvp == NULL) {
2799                 error = ESTALE;
2800                 goto err;
2801         }
2802 
2803         tbva.va_mask = AT_ALL;
2804         tbvap = VOP_GETATTR(tvp, &tbva, 0, cr, NULL) ? NULL : &tbva;
2805         tavap = tbvap;
2806 
2807         if (fvp->v_type != VDIR || tvp->v_type != VDIR) {
2808                 resp->status = NFS3ERR_NOTDIR;
2809                 goto err1;
2810         }
2811 
2812         if (args->from.name == nfs3nametoolong ||
2813             args->to.name == nfs3nametoolong) {
2814                 resp->status = NFS3ERR_NAMETOOLONG;
2815                 goto err1;
2816         }
2817         if (args->from.name == NULL || *(args->from.name) == '\0' ||
2818             args->to.name == NULL || *(args->to.name) == '\0') {
2819                 resp->status = NFS3ERR_ACCES;
2820                 goto err1;
2821         }
2822 
2823         if (rdonly(ro, tvp)) {
2824                 resp->status = NFS3ERR_ROFS;
2825                 goto err1;
2826         }
2827 
2828         if (protect_zfs_mntpt(tvp) != 0) {
2829                 resp->status = NFS3ERR_ACCES;
2830                 goto err1;
2831         }
2832 
2833         if (is_system_labeled()) {
2834                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2835                         if (!do_rfs_label_check(clabel, tvp, EQUALITY_CHECK,
2836                             exi)) {
2837                                 resp->status = NFS3ERR_ACCES;
2838                                 goto err1;
2839                         }
2840                 }
2841         }
2842 
2843         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2844         name = nfscmd_convname(ca, exi, args->from.name,
2845             NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2846 
2847         if (name == NULL) {
2848                 resp->status = NFS3ERR_INVAL;
2849                 goto err1;
2850         }
2851 
2852         toname = nfscmd_convname(ca, exi, args->to.name,
2853             NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2854 
2855         if (toname == NULL) {
2856                 resp->status = NFS3ERR_INVAL;
2857                 goto err1;
2858         }
2859 
2860         /*
2861          * Check for a conflict with a non-blocking mandatory share
2862          * reservation or V4 delegations.
2863          */
2864         error = VOP_LOOKUP(fvp, name, &srcvp, NULL, 0,
2865             NULL, cr, NULL, NULL, NULL);
2866         if (error != 0)
2867                 goto err;
2868 
2869         /*
2870          * If we rename a delegated file we should recall the
2871          * delegation, since future opens should fail or would
2872          * refer to a new file.
2873          */
2874         if (rfs4_check_delegated(FWRITE, srcvp, FALSE)) {
2875                 resp->status = NFS3ERR_JUKEBOX;
2876                 goto err1;
2877         }
2878 
2879         /*
2880          * Check for renaming over a delegated file.  Check nfs4_deleg_policy
2881          * first to avoid VOP_LOOKUP if possible.
2882          */
2883         if (nfs4_get_deleg_policy() != SRV_NEVER_DELEGATE &&
2884             VOP_LOOKUP(tvp, toname, &targvp, NULL, 0, NULL, cr,
2885             NULL, NULL, NULL) == 0) {
2886 
2887                 if (rfs4_check_delegated(FWRITE, targvp, TRUE)) {
2888                         VN_RELE(targvp);
2889                         resp->status = NFS3ERR_JUKEBOX;
2890                         goto err1;
2891                 }
2892                 VN_RELE(targvp);
2893         }
2894 
2895         if (!nbl_need_check(srcvp)) {
2896                 error = VOP_RENAME(fvp, name, tvp, toname, cr, NULL, 0);
2897         } else {
2898                 nbl_start_crit(srcvp, RW_READER);
2899                 if (nbl_conflict(srcvp, NBL_RENAME, 0, 0, 0, NULL))
2900                         error = EACCES;
2901                 else
2902                         error = VOP_RENAME(fvp, name, tvp, toname, cr, NULL, 0);
2903                 nbl_end_crit(srcvp);
2904         }
2905         if (error == 0)
2906                 vn_renamepath(tvp, srcvp, args->to.name,
2907                     strlen(args->to.name));
2908         VN_RELE(srcvp);
2909         srcvp = NULL;
2910 
2911         fava.va_mask = AT_ALL;
2912         favap = VOP_GETATTR(fvp, &fava, 0, cr, NULL) ? NULL : &fava;
2913         tava.va_mask = AT_ALL;
2914         tavap = VOP_GETATTR(tvp, &tava, 0, cr, NULL) ? NULL : &tava;
2915 
2916         /*
2917          * Force modified data and metadata out to stable storage.
2918          */
2919         (void) VOP_FSYNC(fvp, 0, cr, NULL);
2920         (void) VOP_FSYNC(tvp, 0, cr, NULL);
2921 
2922         if (error)
2923                 goto err;
2924 
2925         resp->status = NFS3_OK;
2926         vattr_to_wcc_data(fbvap, favap, &resp->resok.fromdir_wcc);
2927         vattr_to_wcc_data(tbvap, tavap, &resp->resok.todir_wcc);
2928         goto out;
2929 
2930 err:
2931         if (curthread->t_flag & T_WOULDBLOCK) {
2932                 curthread->t_flag &= ~T_WOULDBLOCK;
2933                 resp->status = NFS3ERR_JUKEBOX;
2934         } else {
2935                 resp->status = puterrno3(error);
2936         }
2937 err1:
2938         vattr_to_wcc_data(fbvap, favap, &resp->resfail.fromdir_wcc);
2939         vattr_to_wcc_data(tbvap, tavap, &resp->resfail.todir_wcc);
2940 
2941 out:
2942         if (name != NULL && name != args->from.name)
2943                 kmem_free(name, MAXPATHLEN + 1);
2944         if (toname != NULL && toname != args->to.name)
2945                 kmem_free(toname, MAXPATHLEN + 1);
2946 
2947         DTRACE_NFSV3_5(op__rename__done, struct svc_req *, req,
2948             cred_t *, cr, vnode_t *, fvp, struct exportinfo *, exi,
2949             RENAME3res *, resp);
2950         if (fvp != NULL)
2951                 VN_RELE(fvp);
2952         if (tvp != NULL)
2953                 VN_RELE(tvp);
2954 }
2955 
2956 void *
2957 rfs3_rename_getfh(RENAME3args *args)
2958 {
2959         return (&args->from.dir);
2960 }
2961 
2962 void
2963 rfs3_link(LINK3args *args, LINK3res *resp, struct exportinfo *exi,
2964     struct svc_req *req, cred_t *cr, bool_t ro)
2965 {
2966         int error;
2967         vnode_t *vp;
2968         vnode_t *dvp;
2969         struct vattr *vap;
2970         struct vattr va;
2971         struct vattr *bvap;
2972         struct vattr bva;
2973         struct vattr *avap;
2974         struct vattr ava;
2975         nfs_fh3 *fh3;
2976         struct exportinfo *to_exi;
2977         bslabel_t *clabel;
2978         struct sockaddr *ca;
2979         char *name = NULL;
2980 
2981         vap = NULL;
2982         bvap = NULL;
2983         avap = NULL;
2984         dvp = NULL;
2985 
2986         vp = nfs3_fhtovp(&args->file, exi);
2987 
2988         DTRACE_NFSV3_5(op__link__start, struct svc_req *, req,
2989             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
2990             LINK3args *, args);
2991 
2992         if (vp == NULL) {
2993                 error = ESTALE;
2994                 goto out;
2995         }
2996 
2997         va.va_mask = AT_ALL;
2998         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
2999 
3000         fh3 = &args->link.dir;
3001         to_exi = checkexport(&fh3->fh3_fsid, FH3TOXFIDP(fh3));
3002         if (to_exi == NULL) {
3003                 resp->status = NFS3ERR_ACCES;
3004                 goto out1;
3005         }
3006         exi_rele(&to_exi);
3007 
3008         if (to_exi != exi) {
3009                 resp->status = NFS3ERR_XDEV;
3010                 goto out1;
3011         }
3012 
3013         if (is_system_labeled()) {
3014                 clabel = req->rq_label;
3015 
3016                 ASSERT(clabel != NULL);
3017                 DTRACE_PROBE2(tx__rfs3__log__info__oplink__clabel, char *,
3018                     "got client label from request(1)", struct svc_req *, req);
3019 
3020                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3021                         if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3022                             exi)) {
3023                                 resp->status = NFS3ERR_ACCES;
3024                                 goto out1;
3025                         }
3026                 }
3027         }
3028 
3029         dvp = nfs3_fhtovp(&args->link.dir, exi);
3030         if (dvp == NULL) {
3031                 error = ESTALE;
3032                 goto out;
3033         }
3034 
3035         bva.va_mask = AT_ALL;
3036         bvap = VOP_GETATTR(dvp, &bva, 0, cr, NULL) ? NULL : &bva;
3037 
3038         if (dvp->v_type != VDIR) {
3039                 resp->status = NFS3ERR_NOTDIR;
3040                 goto out1;
3041         }
3042 
3043         if (args->link.name == nfs3nametoolong) {
3044                 resp->status = NFS3ERR_NAMETOOLONG;
3045                 goto out1;
3046         }
3047 
3048         if (args->link.name == NULL || *(args->link.name) == '\0') {
3049                 resp->status = NFS3ERR_ACCES;
3050                 goto out1;
3051         }
3052 
3053         if (rdonly(ro, dvp)) {
3054                 resp->status = NFS3ERR_ROFS;
3055                 goto out1;
3056         }
3057 
3058         if (protect_zfs_mntpt(dvp) != 0) {
3059                 resp->status = NFS3ERR_ACCES;
3060                 goto out1;
3061         }
3062 
3063         if (is_system_labeled()) {
3064                 DTRACE_PROBE2(tx__rfs3__log__info__oplinkdir__clabel, char *,
3065                     "got client label from request(1)", struct svc_req *, req);
3066 
3067                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3068                         if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
3069                             exi)) {
3070                                 resp->status = NFS3ERR_ACCES;
3071                                 goto out1;
3072                         }
3073                 }
3074         }
3075 
3076         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
3077         name = nfscmd_convname(ca, exi, args->link.name,
3078             NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
3079 
3080         if (name == NULL) {
3081                 resp->status = NFS3ERR_SERVERFAULT;
3082                 goto out1;
3083         }
3084 
3085         error = VOP_LINK(dvp, vp, name, cr, NULL, 0);
3086 
3087         va.va_mask = AT_ALL;
3088         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3089         ava.va_mask = AT_ALL;
3090         avap = VOP_GETATTR(dvp, &ava, 0, cr, NULL) ? NULL : &ava;
3091 
3092         /*
3093          * Force modified data and metadata out to stable storage.
3094          */
3095         (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
3096         (void) VOP_FSYNC(dvp, 0, cr, NULL);
3097 
3098         if (error)
3099                 goto out;
3100 
3101         VN_RELE(dvp);
3102 
3103         resp->status = NFS3_OK;
3104         vattr_to_post_op_attr(vap, &resp->resok.file_attributes);
3105         vattr_to_wcc_data(bvap, avap, &resp->resok.linkdir_wcc);
3106 
3107         DTRACE_NFSV3_5(op__link__done, struct svc_req *, req,
3108             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3109             LINK3res *, resp);
3110 
3111         VN_RELE(vp);
3112 
3113         return;
3114 
3115 out:
3116         if (curthread->t_flag & T_WOULDBLOCK) {
3117                 curthread->t_flag &= ~T_WOULDBLOCK;
3118                 resp->status = NFS3ERR_JUKEBOX;
3119         } else
3120                 resp->status = puterrno3(error);
3121 out1:
3122         if (name != NULL && name != args->link.name)
3123                 kmem_free(name, MAXPATHLEN + 1);
3124 
3125         DTRACE_NFSV3_5(op__link__done, struct svc_req *, req,
3126             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3127             LINK3res *, resp);
3128 
3129         if (vp != NULL)
3130                 VN_RELE(vp);
3131         if (dvp != NULL)
3132                 VN_RELE(dvp);
3133         vattr_to_post_op_attr(vap, &resp->resfail.file_attributes);
3134         vattr_to_wcc_data(bvap, avap, &resp->resfail.linkdir_wcc);
3135 }
3136 
3137 void *
3138 rfs3_link_getfh(LINK3args *args)
3139 {
3140         return (&args->file);
3141 }
3142 
3143 #ifdef nextdp
3144 #undef nextdp
3145 #endif
3146 #define nextdp(dp)      ((struct dirent64 *)((char *)(dp) + (dp)->d_reclen))
3147 
3148 /* ARGSUSED */
3149 void
3150 rfs3_readdir(READDIR3args *args, READDIR3res *resp, struct exportinfo *exi,
3151     struct svc_req *req, cred_t *cr, bool_t ro)
3152 {
3153         int error;
3154         vnode_t *vp;
3155         struct vattr *vap;
3156         struct vattr va;
3157         struct iovec iov;
3158         struct uio uio;
3159         int iseof;
3160 
3161         count3 count = args->count;
3162         count3 size;            /* size of the READDIR3resok structure */
3163 
3164         size_t datasz;
3165         char *data = NULL;
3166         dirent64_t *dp;
3167 
3168         struct sockaddr *ca;
3169         entry3 **eptr;
3170         entry3 *entry;
3171 
3172         vp = nfs3_fhtovp(&args->dir, exi);
3173 
3174         DTRACE_NFSV3_5(op__readdir__start, struct svc_req *, req,
3175             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3176             READDIR3args *, args);
3177 
3178         if (vp == NULL) {
3179                 resp->status = NFS3ERR_STALE;
3180                 vap = NULL;
3181                 goto out1;
3182         }
3183 
3184         if (vp->v_type != VDIR) {
3185                 resp->status = NFS3ERR_NOTDIR;
3186                 vap = NULL;
3187                 goto out1;
3188         }
3189 
3190         if (is_system_labeled()) {
3191                 bslabel_t *clabel = req->rq_label;
3192 
3193                 ASSERT(clabel != NULL);
3194                 DTRACE_PROBE2(tx__rfs3__log__info__opreaddir__clabel, char *,
3195                     "got client label from request(1)", struct svc_req *, req);
3196 
3197                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3198                         if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3199                             exi)) {
3200                                 resp->status = NFS3ERR_ACCES;
3201                                 vap = NULL;
3202                                 goto out1;
3203                         }
3204                 }
3205         }
3206 
3207         (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL);
3208 
3209         va.va_mask = AT_ALL;
3210         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3211 
3212         error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
3213         if (error)
3214                 goto out;
3215 
3216         /*
3217          * Don't allow arbitrary counts for allocation
3218          */
3219         if (count > rfs3_tsize(req))
3220                 count = rfs3_tsize(req);
3221 
3222         /*
3223          * struct READDIR3resok:
3224          *   dir_attributes:    1 + NFS3_SIZEOF_FATTR3
3225          *   cookieverf:        2
3226          *   entries (bool):    1
3227          *   eof:               1
3228          */
3229         size = (1 + NFS3_SIZEOF_FATTR3 + 2 + 1 + 1) * BYTES_PER_XDR_UNIT;
3230 
3231         if (size > count) {
3232                 resp->status = NFS3ERR_TOOSMALL;
3233                 goto out1;
3234         }
3235 
3236         /*
3237          * This is simplification.  The dirent64_t size is not the same as the
3238          * size of XDR representation of entry3, but the sizes are similar so
3239          * we'll assume they are same.  This assumption should not cause any
3240          * harm.  In worst case we will need to issue VOP_READDIR() once more.
3241          */
3242         datasz = count;
3243 
3244         /*
3245          * Make sure that there is room to read at least one entry
3246          * if any are available.
3247          */
3248         if (datasz < DIRENT64_RECLEN(MAXNAMELEN))
3249                 datasz = DIRENT64_RECLEN(MAXNAMELEN);
3250 
3251         data = kmem_alloc(datasz, KM_NOSLEEP);
3252         if (data == NULL) {
3253                 /* The allocation failed; downsize and wait for it this time */
3254                 if (datasz > MAXBSIZE)
3255                         datasz = MAXBSIZE;
3256                 data = kmem_alloc(datasz, KM_SLEEP);
3257         }
3258 
3259         uio.uio_iov = &iov;
3260         uio.uio_iovcnt = 1;
3261         uio.uio_segflg = UIO_SYSSPACE;
3262         uio.uio_extflg = UIO_COPY_CACHED;
3263         uio.uio_loffset = (offset_t)args->cookie;
3264         uio.uio_resid = datasz;
3265 
3266         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
3267         eptr = &resp->resok.reply.entries;
3268         entry = NULL;
3269 
3270 getmoredents:
3271         iov.iov_base = data;
3272         iov.iov_len = datasz;
3273 
3274         error = VOP_READDIR(vp, &uio, cr, &iseof, NULL, 0);
3275         if (error) {
3276                 iseof = 0;
3277                 goto done;
3278         }
3279 
3280         if (iov.iov_len == datasz)
3281                 goto done;
3282 
3283         for (dp = (dirent64_t *)data; (char *)dp - data < datasz - iov.iov_len;
3284             dp = nextdp(dp)) {
3285                 char *name;
3286                 count3 esize;
3287 
3288                 if (dp->d_ino == 0) {
3289                         if (entry != NULL)
3290                                 entry->cookie = (cookie3)dp->d_off;
3291                         continue;
3292                 }
3293 
3294                 name = nfscmd_convname(ca, exi, dp->d_name,
3295                     NFSCMD_CONV_OUTBOUND, MAXPATHLEN + 1);
3296                 if (name == NULL) {
3297                         if (entry != NULL)
3298                                 entry->cookie = (cookie3)dp->d_off;
3299                         continue;
3300                 }
3301 
3302                 /*
3303                  * struct entry3:
3304                  *   fileid:            2
3305                  *   name (length):     1
3306                  *   name (data):       length (rounded up)
3307                  *   cookie:            2
3308                  *   nextentry (bool):  1
3309                  */
3310                 esize = (2 + 1 + 2 + 1) * BYTES_PER_XDR_UNIT +
3311                     RNDUP(strlen(name));
3312 
3313                 /* If the new entry does not fit, discard it */
3314                 if (esize > count - size) {
3315                         if (name != dp->d_name)
3316                                 kmem_free(name, MAXPATHLEN + 1);
3317                         iseof = 0;
3318                         goto done;
3319                 }
3320 
3321                 entry = kmem_alloc(sizeof (entry3), KM_SLEEP);
3322 
3323                 entry->fileid = (fileid3)dp->d_ino;
3324                 entry->name = strdup(name);
3325                 if (name != dp->d_name)
3326                         kmem_free(name, MAXPATHLEN + 1);
3327                 entry->cookie = (cookie3)dp->d_off;
3328 
3329                 size += esize;
3330 
3331                 /* Add the entry to the linked list */
3332                 *eptr = entry;
3333                 eptr = &entry->nextentry;
3334         }
3335 
3336         if (!iseof && size < count) {
3337                 uio.uio_resid = MIN(datasz, MAXBSIZE);
3338                 goto getmoredents;
3339         }
3340 
3341 done:
3342         *eptr = NULL;
3343 
3344         va.va_mask = AT_ALL;
3345         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3346 
3347         if (!iseof && resp->resok.reply.entries == NULL) {
3348                 if (error)
3349                         goto out;
3350                 resp->status = NFS3ERR_TOOSMALL;
3351                 goto out1;
3352         }
3353 
3354         VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
3355 
3356 #if 0 /* notyet */
3357         /*
3358          * Don't do this.  It causes local disk writes when just
3359          * reading the file and the overhead is deemed larger
3360          * than the benefit.
3361          */
3362         /*
3363          * Force modified metadata out to stable storage.
3364          */
3365         (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
3366 #endif
3367 
3368         resp->status = NFS3_OK;
3369         resp->resok.cookieverf = 0;
3370         resp->resok.reply.eof = iseof ? TRUE : FALSE;
3371 
3372         vattr_to_post_op_attr(vap, &resp->resok.dir_attributes);
3373 
3374         DTRACE_NFSV3_5(op__readdir__done, struct svc_req *, req,
3375             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3376             READDIR3res *, resp);
3377 
3378         VN_RELE(vp);
3379 
3380         if (data != NULL)
3381                 kmem_free(data, datasz);
3382 
3383         return;
3384 
3385 out:
3386         if (curthread->t_flag & T_WOULDBLOCK) {
3387                 curthread->t_flag &= ~T_WOULDBLOCK;
3388                 resp->status = NFS3ERR_JUKEBOX;
3389         } else
3390                 resp->status = puterrno3(error);
3391 out1:
3392         vattr_to_post_op_attr(vap, &resp->resfail.dir_attributes);
3393 
3394         DTRACE_NFSV3_5(op__readdir__done, struct svc_req *, req,
3395             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3396             READDIR3res *, resp);
3397 
3398         if (vp != NULL) {
3399                 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
3400                 VN_RELE(vp);
3401         }
3402 
3403         if (data != NULL)
3404                 kmem_free(data, datasz);
3405 }
3406 
3407 void *
3408 rfs3_readdir_getfh(READDIR3args *args)
3409 {
3410         return (&args->dir);
3411 }
3412 
3413 void
3414 rfs3_readdir_free(READDIR3res *resp)
3415 {
3416         if (resp->status == NFS3_OK) {
3417                 entry3 *entry, *nentry;
3418 
3419                 for (entry = resp->resok.reply.entries; entry != NULL;
3420                     entry = nentry) {
3421                         nentry = entry->nextentry;
3422                         strfree(entry->name);
3423                         kmem_free(entry, sizeof (entry3));
3424                 }
3425         }
3426 }
3427 
3428 #ifdef nextdp
3429 #undef nextdp
3430 #endif
3431 #define nextdp(dp)      ((struct dirent64 *)((char *)(dp) + (dp)->d_reclen))
3432 
3433 /* ARGSUSED */
3434 void
3435 rfs3_readdirplus(READDIRPLUS3args *args, READDIRPLUS3res *resp,
3436     struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
3437 {
3438         int error;
3439         vnode_t *vp;
3440         struct vattr *vap;
3441         struct vattr va;
3442         struct iovec iov;
3443         struct uio uio;
3444         int iseof;
3445 
3446         count3 dircount = args->dircount;
3447         count3 maxcount = args->maxcount;
3448         count3 dirsize = 0;
3449         count3 size;            /* size of the READDIRPLUS3resok structure */
3450 
3451         size_t datasz;
3452         char *data = NULL;
3453         dirent64_t *dp;
3454 
3455         struct sockaddr *ca;
3456         entryplus3 **eptr;
3457         entryplus3 *entry;
3458 
3459         vp = nfs3_fhtovp(&args->dir, exi);
3460 
3461         DTRACE_NFSV3_5(op__readdirplus__start, struct svc_req *, req,
3462             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3463             READDIRPLUS3args *, args);
3464 
3465         if (vp == NULL) {
3466                 resp->status = NFS3ERR_STALE;
3467                 vap = NULL;
3468                 goto out1;
3469         }
3470 
3471         if (vp->v_type != VDIR) {
3472                 resp->status = NFS3ERR_NOTDIR;
3473                 vap = NULL;
3474                 goto out1;
3475         }
3476 
3477         if (is_system_labeled()) {
3478                 bslabel_t *clabel = req->rq_label;
3479 
3480                 ASSERT(clabel != NULL);
3481                 DTRACE_PROBE2(tx__rfs3__log__info__opreaddirplus__clabel,
3482                     char *, "got client label from request(1)",
3483                     struct svc_req *, req);
3484 
3485                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3486                         if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3487                             exi)) {
3488                                 resp->status = NFS3ERR_ACCES;
3489                                 vap = NULL;
3490                                 goto out1;
3491                         }
3492                 }
3493         }
3494 
3495         (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL);
3496 
3497         va.va_mask = AT_ALL;
3498         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3499 
3500         error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
3501         if (error)
3502                 goto out;
3503 
3504         /*
3505          * Don't allow arbitrary counts for allocation
3506          */
3507         if (maxcount > rfs3_tsize(req))
3508                 maxcount = rfs3_tsize(req);
3509 
3510         /*
3511          * struct READDIRPLUS3resok:
3512          *   dir_attributes:    1 + NFS3_SIZEOF_FATTR3
3513          *   cookieverf:        2
3514          *   entries (bool):    1
3515          *   eof:               1
3516          */
3517         size = (1 + NFS3_SIZEOF_FATTR3 + 2 + 1 + 1) * BYTES_PER_XDR_UNIT;
3518 
3519         if (size > maxcount) {
3520                 resp->status = NFS3ERR_TOOSMALL;
3521                 goto out1;
3522         }
3523 
3524         /*
3525          * This is simplification.  The dirent64_t size is not the same as the
3526          * size of XDR representation of entryplus3 (excluding attributes and
3527          * handle), but the sizes are similar so we'll assume they are same.
3528          * This assumption should not cause any harm.  In worst case we will
3529          * need to issue VOP_READDIR() once more.
3530          */
3531 
3532         datasz = MIN(dircount, maxcount);
3533 
3534         /*
3535          * Make sure that there is room to read at least one entry
3536          * if any are available.
3537          */
3538         if (datasz < DIRENT64_RECLEN(MAXNAMELEN))
3539                 datasz = DIRENT64_RECLEN(MAXNAMELEN);
3540 
3541         data = kmem_alloc(datasz, KM_NOSLEEP);
3542         if (data == NULL) {
3543                 /* The allocation failed; downsize and wait for it this time */
3544                 if (datasz > MAXBSIZE)
3545                         datasz = MAXBSIZE;
3546                 data = kmem_alloc(datasz, KM_SLEEP);
3547         }
3548 
3549         uio.uio_iov = &iov;
3550         uio.uio_iovcnt = 1;
3551         uio.uio_segflg = UIO_SYSSPACE;
3552         uio.uio_extflg = UIO_COPY_CACHED;
3553         uio.uio_loffset = (offset_t)args->cookie;
3554         uio.uio_resid = datasz;
3555 
3556         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
3557         eptr = &resp->resok.reply.entries;
3558         entry = NULL;
3559 
3560 getmoredents:
3561         iov.iov_base = data;
3562         iov.iov_len = datasz;
3563 
3564         error = VOP_READDIR(vp, &uio, cr, &iseof, NULL, 0);
3565         if (error) {
3566                 iseof = 0;
3567                 goto done;
3568         }
3569 
3570         if (iov.iov_len == datasz)
3571                 goto done;
3572 
3573         for (dp = (dirent64_t *)data; (char *)dp - data < datasz - iov.iov_len;
3574             dp = nextdp(dp)) {
3575                 char *name;
3576                 vnode_t *nvp;
3577                 count3 edirsize;
3578                 count3 esize;
3579 
3580                 if (dp->d_ino == 0) {
3581                         if (entry != NULL)
3582                                 entry->cookie = (cookie3)dp->d_off;
3583                         continue;
3584                 }
3585 
3586                 name = nfscmd_convname(ca, exi, dp->d_name,
3587                     NFSCMD_CONV_OUTBOUND, MAXPATHLEN + 1);
3588                 if (name == NULL) {
3589                         if (entry != NULL)
3590                                 entry->cookie = (cookie3)dp->d_off;
3591                         continue;
3592                 }
3593 
3594                 /*
3595                  * struct entryplus3:
3596                  *   fileid:            2
3597                  *   name (length):     1
3598                  *   name (data):       length (rounded up)
3599                  *   cookie:            2
3600                  */
3601                 edirsize = (2 + 1 + 2) * BYTES_PER_XDR_UNIT +
3602                     RNDUP(strlen(name));
3603 
3604                 /*
3605                  * struct entryplus3:
3606                  *   attributes_follow: 1
3607                  *   handle_follows:    1
3608                  *   nextentry (bool):  1
3609                  */
3610                 esize = edirsize + (1 + 1 + 1) * BYTES_PER_XDR_UNIT;
3611 
3612                 /* If the new entry does not fit, we are done */
3613                 if (edirsize > dircount - dirsize || esize > maxcount - size) {
3614                         if (name != dp->d_name)
3615                                 kmem_free(name, MAXPATHLEN + 1);
3616                         iseof = 0;
3617                         error = 0;
3618                         goto done;
3619                 }
3620 
3621                 entry = kmem_alloc(sizeof (entryplus3), KM_SLEEP);
3622 
3623                 entry->fileid = (fileid3)dp->d_ino;
3624                 entry->name = strdup(name);
3625                 if (name != dp->d_name)
3626                         kmem_free(name, MAXPATHLEN + 1);
3627                 entry->cookie = (cookie3)dp->d_off;
3628 
3629                 error = VOP_LOOKUP(vp, dp->d_name, &nvp, NULL, 0, NULL, cr,
3630                     NULL, NULL, NULL);
3631                 if (error) {
3632                         entry->name_attributes.attributes = FALSE;
3633                         entry->name_handle.handle_follows = FALSE;
3634                 } else {
3635                         struct vattr nva;
3636                         struct vattr *nvap;
3637 
3638                         nva.va_mask = AT_ALL;
3639                         nvap = rfs4_delegated_getattr(nvp, &nva, 0, cr) ? NULL :
3640                             &nva;
3641 
3642                         /* Lie about the object type for a referral */
3643                         if (nvap != NULL && vn_is_nfs_reparse(nvp, cr))
3644                                 nvap->va_type = VLNK;
3645 
3646                         if (vn_ismntpt(nvp)) {
3647                                 entry->name_attributes.attributes = FALSE;
3648                                 entry->name_handle.handle_follows = FALSE;
3649                         } else {
3650                                 vattr_to_post_op_attr(nvap,
3651                                     &entry->name_attributes);
3652 
3653                                 error = makefh3(&entry->name_handle.handle, nvp,
3654                                     exi);
3655                                 if (!error)
3656                                         entry->name_handle.handle_follows =
3657                                             TRUE;
3658                                 else
3659                                         entry->name_handle.handle_follows =
3660                                             FALSE;
3661                         }
3662 
3663                         VN_RELE(nvp);
3664                 }
3665 
3666                 /*
3667                  * struct entryplus3 (optionally):
3668                  *   attributes:        NFS3_SIZEOF_FATTR3
3669                  *   handle length:     1
3670                  *   handle data:       length (rounded up)
3671                  */
3672                 if (entry->name_attributes.attributes == TRUE)
3673                         esize += NFS3_SIZEOF_FATTR3 * BYTES_PER_XDR_UNIT;
3674                 if (entry->name_handle.handle_follows == TRUE)
3675                         esize += 1 * BYTES_PER_XDR_UNIT +
3676                             RNDUP(entry->name_handle.handle.fh3_length);
3677 
3678                 /* If the new entry does not fit, discard it */
3679                 if (esize > maxcount - size) {
3680                         strfree(entry->name);
3681                         kmem_free(entry, sizeof (entryplus3));
3682                         iseof = 0;
3683                         error = 0;
3684                         goto done;
3685                 }
3686 
3687                 dirsize += edirsize;
3688                 size += esize;
3689 
3690                 /* Add the entry to the linked list */
3691                 *eptr = entry;
3692                 eptr = &entry->nextentry;
3693         }
3694 
3695         if (!iseof && dirsize < dircount && size < maxcount) {
3696                 uio.uio_resid = MIN(datasz, MAXBSIZE);
3697                 goto getmoredents;
3698         }
3699 
3700 done:
3701         *eptr = NULL;
3702 
3703         va.va_mask = AT_ALL;
3704         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3705 
3706         if (!iseof && resp->resok.reply.entries == NULL) {
3707                 if (error)
3708                         goto out;
3709                 resp->status = NFS3ERR_TOOSMALL;
3710                 goto out1;
3711         }
3712 
3713         VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
3714 
3715 #if 0 /* notyet */
3716         /*
3717          * Don't do this.  It causes local disk writes when just
3718          * reading the file and the overhead is deemed larger
3719          * than the benefit.
3720          */
3721         /*
3722          * Force modified metadata out to stable storage.
3723          */
3724         (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
3725 #endif
3726 
3727         resp->status = NFS3_OK;
3728         resp->resok.cookieverf = 0;
3729         resp->resok.reply.eof = iseof ? TRUE : FALSE;
3730 
3731         vattr_to_post_op_attr(vap, &resp->resok.dir_attributes);
3732 
3733         DTRACE_NFSV3_5(op__readdirplus__done, struct svc_req *, req,
3734             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3735             READDIRPLUS3res *, resp);
3736 
3737         VN_RELE(vp);
3738 
3739         if (data != NULL)
3740                 kmem_free(data, datasz);
3741 
3742         return;
3743 
3744 out:
3745         if (curthread->t_flag & T_WOULDBLOCK) {
3746                 curthread->t_flag &= ~T_WOULDBLOCK;
3747                 resp->status = NFS3ERR_JUKEBOX;
3748         } else {
3749                 resp->status = puterrno3(error);
3750         }
3751 out1:
3752         vattr_to_post_op_attr(vap, &resp->resfail.dir_attributes);
3753 
3754         DTRACE_NFSV3_5(op__readdirplus__done, struct svc_req *, req,
3755             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3756             READDIRPLUS3res *, resp);
3757 
3758         if (vp != NULL) {
3759                 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
3760                 VN_RELE(vp);
3761         }
3762 
3763         if (data != NULL)
3764                 kmem_free(data, datasz);
3765 }
3766 
3767 void *
3768 rfs3_readdirplus_getfh(READDIRPLUS3args *args)
3769 {
3770         return (&args->dir);
3771 }
3772 
3773 void
3774 rfs3_readdirplus_free(READDIRPLUS3res *resp)
3775 {
3776         if (resp->status == NFS3_OK) {
3777                 entryplus3 *entry, *nentry;
3778 
3779                 for (entry = resp->resok.reply.entries; entry != NULL;
3780                     entry = nentry) {
3781                         nentry = entry->nextentry;
3782                         strfree(entry->name);
3783                         kmem_free(entry, sizeof (entryplus3));
3784                 }
3785         }
3786 }
3787 
3788 /* ARGSUSED */
3789 void
3790 rfs3_fsstat(FSSTAT3args *args, FSSTAT3res *resp, struct exportinfo *exi,
3791     struct svc_req *req, cred_t *cr, bool_t ro)
3792 {
3793         int error;
3794         vnode_t *vp;
3795         struct vattr *vap;
3796         struct vattr va;
3797         struct statvfs64 sb;
3798 
3799         vap = NULL;
3800 
3801         vp = nfs3_fhtovp(&args->fsroot, exi);
3802 
3803         DTRACE_NFSV3_5(op__fsstat__start, struct svc_req *, req,
3804             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3805             FSSTAT3args *, args);
3806 
3807         if (vp == NULL) {
3808                 error = ESTALE;
3809                 goto out;
3810         }
3811 
3812         if (is_system_labeled()) {
3813                 bslabel_t *clabel = req->rq_label;
3814 
3815                 ASSERT(clabel != NULL);
3816                 DTRACE_PROBE2(tx__rfs3__log__info__opfsstat__clabel, char *,
3817                     "got client label from request(1)", struct svc_req *, req);
3818 
3819                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3820                         if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3821                             exi)) {
3822                                 resp->status = NFS3ERR_ACCES;
3823                                 goto out1;
3824                         }
3825                 }
3826         }
3827 
3828         error = VFS_STATVFS(vp->v_vfsp, &sb);
3829 
3830         va.va_mask = AT_ALL;
3831         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3832 
3833         if (error)
3834                 goto out;
3835 
3836         resp->status = NFS3_OK;
3837         vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
3838         if (sb.f_blocks != (fsblkcnt64_t)-1)
3839                 resp->resok.tbytes = (size3)sb.f_frsize * (size3)sb.f_blocks;
3840         else
3841                 resp->resok.tbytes = (size3)sb.f_blocks;
3842         if (sb.f_bfree != (fsblkcnt64_t)-1)
3843                 resp->resok.fbytes = (size3)sb.f_frsize * (size3)sb.f_bfree;
3844         else
3845                 resp->resok.fbytes = (size3)sb.f_bfree;
3846         if (sb.f_bavail != (fsblkcnt64_t)-1)
3847                 resp->resok.abytes = (size3)sb.f_frsize * (size3)sb.f_bavail;
3848         else
3849                 resp->resok.abytes = (size3)sb.f_bavail;
3850         resp->resok.tfiles = (size3)sb.f_files;
3851         resp->resok.ffiles = (size3)sb.f_ffree;
3852         resp->resok.afiles = (size3)sb.f_favail;
3853         resp->resok.invarsec = 0;
3854 
3855         DTRACE_NFSV3_5(op__fsstat__done, struct svc_req *, req,
3856             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3857             FSSTAT3res *, resp);
3858         VN_RELE(vp);
3859 
3860         return;
3861 
3862 out:
3863         if (curthread->t_flag & T_WOULDBLOCK) {
3864                 curthread->t_flag &= ~T_WOULDBLOCK;
3865                 resp->status = NFS3ERR_JUKEBOX;
3866         } else
3867                 resp->status = puterrno3(error);
3868 out1:
3869         DTRACE_NFSV3_5(op__fsstat__done, struct svc_req *, req,
3870             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3871             FSSTAT3res *, resp);
3872 
3873         if (vp != NULL)
3874                 VN_RELE(vp);
3875         vattr_to_post_op_attr(vap, &resp->resfail.obj_attributes);
3876 }
3877 
3878 void *
3879 rfs3_fsstat_getfh(FSSTAT3args *args)
3880 {
3881         return (&args->fsroot);
3882 }
3883 
3884 /* ARGSUSED */
3885 void
3886 rfs3_fsinfo(FSINFO3args *args, FSINFO3res *resp, struct exportinfo *exi,
3887     struct svc_req *req, cred_t *cr, bool_t ro)
3888 {
3889         vnode_t *vp;
3890         struct vattr *vap;
3891         struct vattr va;
3892         uint32_t xfer_size;
3893         ulong_t l = 0;
3894         int error;
3895 
3896         vp = nfs3_fhtovp(&args->fsroot, exi);
3897 
3898         DTRACE_NFSV3_5(op__fsinfo__start, struct svc_req *, req,
3899             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3900             FSINFO3args *, args);
3901 
3902         if (vp == NULL) {
3903                 if (curthread->t_flag & T_WOULDBLOCK) {
3904                         curthread->t_flag &= ~T_WOULDBLOCK;
3905                         resp->status = NFS3ERR_JUKEBOX;
3906                 } else
3907                         resp->status = NFS3ERR_STALE;
3908                 vattr_to_post_op_attr(NULL, &resp->resfail.obj_attributes);
3909                 goto out;
3910         }
3911 
3912         if (is_system_labeled()) {
3913                 bslabel_t *clabel = req->rq_label;
3914 
3915                 ASSERT(clabel != NULL);
3916                 DTRACE_PROBE2(tx__rfs3__log__info__opfsinfo__clabel, char *,
3917                     "got client label from request(1)", struct svc_req *, req);
3918 
3919                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3920                         if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3921                             exi)) {
3922                                 resp->status = NFS3ERR_STALE;
3923                                 vattr_to_post_op_attr(NULL,
3924                                     &resp->resfail.obj_attributes);
3925                                 goto out;
3926                         }
3927                 }
3928         }
3929 
3930         va.va_mask = AT_ALL;
3931         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3932 
3933         resp->status = NFS3_OK;
3934         vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
3935         xfer_size = rfs3_tsize(req);
3936         resp->resok.rtmax = xfer_size;
3937         resp->resok.rtpref = xfer_size;
3938         resp->resok.rtmult = DEV_BSIZE;
3939         resp->resok.wtmax = xfer_size;
3940         resp->resok.wtpref = xfer_size;
3941         resp->resok.wtmult = DEV_BSIZE;
3942         resp->resok.dtpref = MAXBSIZE;
3943 
3944         /*
3945          * Large file spec: want maxfilesize based on limit of
3946          * underlying filesystem.  We can guess 2^31-1 if need be.
3947          */
3948         error = VOP_PATHCONF(vp, _PC_FILESIZEBITS, &l, cr, NULL);
3949         if (error) {
3950                 resp->status = puterrno3(error);
3951                 goto out;
3952         }
3953 
3954         /*
3955          * If the underlying file system does not support _PC_FILESIZEBITS,
3956          * return a reasonable default. Note that error code on VOP_PATHCONF
3957          * will be 0, even if the underlying file system does not support
3958          * _PC_FILESIZEBITS.
3959          */
3960         if (l == (ulong_t)-1) {
3961                 resp->resok.maxfilesize = MAXOFF32_T;
3962         } else {
3963                 if (l >= (sizeof (uint64_t) * 8))
3964                         resp->resok.maxfilesize = INT64_MAX;
3965                 else
3966                         resp->resok.maxfilesize = (1LL << (l-1)) - 1;
3967         }
3968 
3969         resp->resok.time_delta.seconds = 0;
3970         resp->resok.time_delta.nseconds = 1000;
3971         resp->resok.properties = FSF3_LINK | FSF3_SYMLINK |
3972             FSF3_HOMOGENEOUS | FSF3_CANSETTIME;
3973 
3974         DTRACE_NFSV3_5(op__fsinfo__done, struct svc_req *, req,
3975             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3976             FSINFO3res *, resp);
3977 
3978         VN_RELE(vp);
3979 
3980         return;
3981 
3982 out:
3983         DTRACE_NFSV3_5(op__fsinfo__done, struct svc_req *, req,
3984             cred_t *, cr, vnode_t *, NULL, struct exportinfo *, exi,
3985             FSINFO3res *, resp);
3986         if (vp != NULL)
3987                 VN_RELE(vp);
3988 }
3989 
3990 void *
3991 rfs3_fsinfo_getfh(FSINFO3args *args)
3992 {
3993         return (&args->fsroot);
3994 }
3995 
3996 /* ARGSUSED */
3997 void
3998 rfs3_pathconf(PATHCONF3args *args, PATHCONF3res *resp, struct exportinfo *exi,
3999     struct svc_req *req, cred_t *cr, bool_t ro)
4000 {
4001         int error;
4002         vnode_t *vp;
4003         struct vattr *vap;
4004         struct vattr va;
4005         ulong_t val;
4006 
4007         vap = NULL;
4008 
4009         vp = nfs3_fhtovp(&args->object, exi);
4010 
4011         DTRACE_NFSV3_5(op__pathconf__start, struct svc_req *, req,
4012             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
4013             PATHCONF3args *, args);
4014 
4015         if (vp == NULL) {
4016                 error = ESTALE;
4017                 goto out;
4018         }
4019 
4020         if (is_system_labeled()) {
4021                 bslabel_t *clabel = req->rq_label;
4022 
4023                 ASSERT(clabel != NULL);
4024                 DTRACE_PROBE2(tx__rfs3__log__info__oppathconf__clabel, char *,
4025                     "got client label from request(1)", struct svc_req *, req);
4026 
4027                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
4028                         if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
4029                             exi)) {
4030                                 resp->status = NFS3ERR_ACCES;
4031                                 goto out1;
4032                         }
4033                 }
4034         }
4035 
4036         va.va_mask = AT_ALL;
4037         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
4038 
4039         error = VOP_PATHCONF(vp, _PC_LINK_MAX, &val, cr, NULL);
4040         if (error)
4041                 goto out;
4042         resp->resok.info.link_max = (uint32)val;
4043 
4044         error = VOP_PATHCONF(vp, _PC_NAME_MAX, &val, cr, NULL);
4045         if (error)
4046                 goto out;
4047         resp->resok.info.name_max = (uint32)val;
4048 
4049         error = VOP_PATHCONF(vp, _PC_NO_TRUNC, &val, cr, NULL);
4050         if (error)
4051                 goto out;
4052         if (val == 1)
4053                 resp->resok.info.no_trunc = TRUE;
4054         else
4055                 resp->resok.info.no_trunc = FALSE;
4056 
4057         error = VOP_PATHCONF(vp, _PC_CHOWN_RESTRICTED, &val, cr, NULL);
4058         if (error)
4059                 goto out;
4060         if (val == 1)
4061                 resp->resok.info.chown_restricted = TRUE;
4062         else
4063                 resp->resok.info.chown_restricted = FALSE;
4064 
4065         resp->status = NFS3_OK;
4066         vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
4067         resp->resok.info.case_insensitive = FALSE;
4068         resp->resok.info.case_preserving = TRUE;
4069         DTRACE_NFSV3_5(op__pathconf__done, struct svc_req *, req,
4070             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
4071             PATHCONF3res *, resp);
4072         VN_RELE(vp);
4073         return;
4074 
4075 out:
4076         if (curthread->t_flag & T_WOULDBLOCK) {
4077                 curthread->t_flag &= ~T_WOULDBLOCK;
4078                 resp->status = NFS3ERR_JUKEBOX;
4079         } else
4080                 resp->status = puterrno3(error);
4081 out1:
4082         DTRACE_NFSV3_5(op__pathconf__done, struct svc_req *, req,
4083             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
4084             PATHCONF3res *, resp);
4085         if (vp != NULL)
4086                 VN_RELE(vp);
4087         vattr_to_post_op_attr(vap, &resp->resfail.obj_attributes);
4088 }
4089 
4090 void *
4091 rfs3_pathconf_getfh(PATHCONF3args *args)
4092 {
4093         return (&args->object);
4094 }
4095 
4096 void
4097 rfs3_commit(COMMIT3args *args, COMMIT3res *resp, struct exportinfo *exi,
4098     struct svc_req *req, cred_t *cr, bool_t ro)
4099 {
4100         nfs3_srv_t *ns;
4101         int error;
4102         vnode_t *vp;
4103         struct vattr *bvap;
4104         struct vattr bva;
4105         struct vattr *avap;
4106         struct vattr ava;
4107 
4108         bvap = NULL;
4109         avap = NULL;
4110 
4111         vp = nfs3_fhtovp(&args->file, exi);
4112 
4113         DTRACE_NFSV3_5(op__commit__start, struct svc_req *, req,
4114             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
4115             COMMIT3args *, args);
4116 
4117         if (vp == NULL) {
4118                 error = ESTALE;
4119                 goto out;
4120         }
4121 
4122         ns = zone_getspecific(rfs3_zone_key, curzone);
4123         bva.va_mask = AT_ALL;
4124         error = VOP_GETATTR(vp, &bva, 0, cr, NULL);
4125 
4126         /*
4127          * If we can't get the attributes, then we can't do the
4128          * right access checking.  So, we'll fail the request.
4129          */
4130         if (error)
4131                 goto out;
4132 
4133         bvap = &bva;
4134 
4135         if (rdonly(ro, vp)) {
4136                 resp->status = NFS3ERR_ROFS;
4137                 goto out1;
4138         }
4139 
4140         if (vp->v_type != VREG) {
4141                 resp->status = NFS3ERR_INVAL;
4142                 goto out1;
4143         }
4144 
4145         if (is_system_labeled()) {
4146                 bslabel_t *clabel = req->rq_label;
4147 
4148                 ASSERT(clabel != NULL);
4149                 DTRACE_PROBE2(tx__rfs3__log__info__opcommit__clabel, char *,
4150                     "got client label from request(1)", struct svc_req *, req);
4151 
4152                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
4153                         if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
4154                             exi)) {
4155                                 resp->status = NFS3ERR_ACCES;
4156                                 goto out1;
4157                         }
4158                 }
4159         }
4160 
4161         if (crgetuid(cr) != bva.va_uid &&
4162             (error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL)))
4163                 goto out;
4164 
4165         error = VOP_FSYNC(vp, FSYNC, cr, NULL);
4166 
4167         ava.va_mask = AT_ALL;
4168         avap = VOP_GETATTR(vp, &ava, 0, cr, NULL) ? NULL : &ava;
4169 
4170         if (error)
4171                 goto out;
4172 
4173         resp->status = NFS3_OK;
4174         vattr_to_wcc_data(bvap, avap, &resp->resok.file_wcc);
4175         resp->resok.verf = ns->write3verf;
4176 
4177         DTRACE_NFSV3_5(op__commit__done, struct svc_req *, req,
4178             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
4179             COMMIT3res *, resp);
4180 
4181         VN_RELE(vp);
4182 
4183         return;
4184 
4185 out:
4186         if (curthread->t_flag & T_WOULDBLOCK) {
4187                 curthread->t_flag &= ~T_WOULDBLOCK;
4188                 resp->status = NFS3ERR_JUKEBOX;
4189         } else
4190                 resp->status = puterrno3(error);
4191 out1:
4192         DTRACE_NFSV3_5(op__commit__done, struct svc_req *, req,
4193             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
4194             COMMIT3res *, resp);
4195 
4196         if (vp != NULL)
4197                 VN_RELE(vp);
4198         vattr_to_wcc_data(bvap, avap, &resp->resfail.file_wcc);
4199 }
4200 
4201 void *
4202 rfs3_commit_getfh(COMMIT3args *args)
4203 {
4204         return (&args->file);
4205 }
4206 
4207 static int
4208 sattr3_to_vattr(sattr3 *sap, struct vattr *vap)
4209 {
4210 
4211         vap->va_mask = 0;
4212 
4213         if (sap->mode.set_it) {
4214                 vap->va_mode = (mode_t)sap->mode.mode;
4215                 vap->va_mask |= AT_MODE;
4216         }
4217         if (sap->uid.set_it) {
4218                 vap->va_uid = (uid_t)sap->uid.uid;
4219                 vap->va_mask |= AT_UID;
4220         }
4221         if (sap->gid.set_it) {
4222                 vap->va_gid = (gid_t)sap->gid.gid;
4223                 vap->va_mask |= AT_GID;
4224         }
4225         if (sap->size.set_it) {
4226                 if (sap->size.size > (size3)((u_longlong_t)-1))
4227                         return (EINVAL);
4228                 vap->va_size = sap->size.size;
4229                 vap->va_mask |= AT_SIZE;
4230         }
4231         if (sap->atime.set_it == SET_TO_CLIENT_TIME) {
4232 #ifndef _LP64
4233                 /* check time validity */
4234                 if (!NFS3_TIME_OK(sap->atime.atime.seconds))
4235                         return (EOVERFLOW);
4236 #endif
4237                 /*
4238                  * nfs protocol defines times as unsigned so don't extend sign,
4239                  * unless sysadmin set nfs_allow_preepoch_time.
4240                  */
4241                 NFS_TIME_T_CONVERT(vap->va_atime.tv_sec,
4242                     sap->atime.atime.seconds);
4243                 vap->va_atime.tv_nsec = (uint32_t)sap->atime.atime.nseconds;
4244                 vap->va_mask |= AT_ATIME;
4245         } else if (sap->atime.set_it == SET_TO_SERVER_TIME) {
4246                 gethrestime(&vap->va_atime);
4247                 vap->va_mask |= AT_ATIME;
4248         }
4249         if (sap->mtime.set_it == SET_TO_CLIENT_TIME) {
4250 #ifndef _LP64
4251                 /* check time validity */
4252                 if (!NFS3_TIME_OK(sap->mtime.mtime.seconds))
4253                         return (EOVERFLOW);
4254 #endif
4255                 /*
4256                  * nfs protocol defines times as unsigned so don't extend sign,
4257                  * unless sysadmin set nfs_allow_preepoch_time.
4258                  */
4259                 NFS_TIME_T_CONVERT(vap->va_mtime.tv_sec,
4260                     sap->mtime.mtime.seconds);
4261                 vap->va_mtime.tv_nsec = (uint32_t)sap->mtime.mtime.nseconds;
4262                 vap->va_mask |= AT_MTIME;
4263         } else if (sap->mtime.set_it == SET_TO_SERVER_TIME) {
4264                 gethrestime(&vap->va_mtime);
4265                 vap->va_mask |= AT_MTIME;
4266         }
4267 
4268         return (0);
4269 }
4270 
4271 static const ftype3 vt_to_nf3[] = {
4272         0, NF3REG, NF3DIR, NF3BLK, NF3CHR, NF3LNK, NF3FIFO, 0, 0, NF3SOCK, 0
4273 };
4274 
4275 static int
4276 vattr_to_fattr3(struct vattr *vap, fattr3 *fap)
4277 {
4278 
4279         ASSERT(vap->va_type >= VNON && vap->va_type <= VBAD);
4280         /* Return error if time or size overflow */
4281         if (! (NFS_VAP_TIME_OK(vap) && NFS3_SIZE_OK(vap->va_size))) {
4282                 return (EOVERFLOW);
4283         }
4284         fap->type = vt_to_nf3[vap->va_type];
4285         fap->mode = (mode3)(vap->va_mode & MODEMASK);
4286         fap->nlink = (uint32)vap->va_nlink;
4287         if (vap->va_uid == UID_NOBODY)
4288                 fap->uid = (uid3)NFS_UID_NOBODY;
4289         else
4290                 fap->uid = (uid3)vap->va_uid;
4291         if (vap->va_gid == GID_NOBODY)
4292                 fap->gid = (gid3)NFS_GID_NOBODY;
4293         else
4294                 fap->gid = (gid3)vap->va_gid;
4295         fap->size = (size3)vap->va_size;
4296         fap->used = (size3)DEV_BSIZE * (size3)vap->va_nblocks;
4297         fap->rdev.specdata1 = (uint32)getmajor(vap->va_rdev);
4298         fap->rdev.specdata2 = (uint32)getminor(vap->va_rdev);
4299         fap->fsid = (uint64)vap->va_fsid;
4300         fap->fileid = (fileid3)vap->va_nodeid;
4301         fap->atime.seconds = vap->va_atime.tv_sec;
4302         fap->atime.nseconds = vap->va_atime.tv_nsec;
4303         fap->mtime.seconds = vap->va_mtime.tv_sec;
4304         fap->mtime.nseconds = vap->va_mtime.tv_nsec;
4305         fap->ctime.seconds = vap->va_ctime.tv_sec;
4306         fap->ctime.nseconds = vap->va_ctime.tv_nsec;
4307         return (0);
4308 }
4309 
4310 static int
4311 vattr_to_wcc_attr(struct vattr *vap, wcc_attr *wccap)
4312 {
4313 
4314         /* Return error if time or size overflow */
4315         if (!(NFS_TIME_T_OK(vap->va_mtime.tv_sec) &&
4316             NFS_TIME_T_OK(vap->va_ctime.tv_sec) &&
4317             NFS3_SIZE_OK(vap->va_size))) {
4318                 return (EOVERFLOW);
4319         }
4320         wccap->size = (size3)vap->va_size;
4321         wccap->mtime.seconds = vap->va_mtime.tv_sec;
4322         wccap->mtime.nseconds = vap->va_mtime.tv_nsec;
4323         wccap->ctime.seconds = vap->va_ctime.tv_sec;
4324         wccap->ctime.nseconds = vap->va_ctime.tv_nsec;
4325         return (0);
4326 }
4327 
4328 static void
4329 vattr_to_pre_op_attr(struct vattr *vap, pre_op_attr *poap)
4330 {
4331 
4332         /* don't return attrs if time overflow */
4333         if ((vap != NULL) && !vattr_to_wcc_attr(vap, &poap->attr)) {
4334                 poap->attributes = TRUE;
4335         } else
4336                 poap->attributes = FALSE;
4337 }
4338 
4339 void
4340 vattr_to_post_op_attr(struct vattr *vap, post_op_attr *poap)
4341 {
4342 
4343         /* don't return attrs if time overflow */
4344         if ((vap != NULL) && !vattr_to_fattr3(vap, &poap->attr)) {
4345                 poap->attributes = TRUE;
4346         } else
4347                 poap->attributes = FALSE;
4348 }
4349 
4350 static void
4351 vattr_to_wcc_data(struct vattr *bvap, struct vattr *avap, wcc_data *wccp)
4352 {
4353         vattr_to_pre_op_attr(bvap, &wccp->before);
4354         vattr_to_post_op_attr(avap, &wccp->after);
4355 }
4356 
4357 static int
4358 rdma_setup_read_data3(READ3args *args, READ3resok *rok)
4359 {
4360         struct clist    *wcl;
4361         int             wlist_len;
4362         count3          count = rok->count;
4363 
4364         wcl = args->wlist;
4365         if (rdma_setup_read_chunks(wcl, count, &wlist_len) == FALSE)
4366                 return (FALSE);
4367 
4368         wcl = args->wlist;
4369         rok->wlist_len = wlist_len;
4370         rok->wlist = wcl;
4371         return (TRUE);
4372 }
4373 
4374 /* ARGSUSED */
4375 static void *
4376 rfs3_zone_init(zoneid_t zoneid)
4377 {
4378         nfs3_srv_t *ns;
4379         struct rfs3_verf_overlay {
4380                 uint_t id; /* a "unique" identifier */
4381                 int ts; /* a unique timestamp */
4382         } *verfp;
4383         timestruc_t now;
4384 
4385         ns = kmem_zalloc(sizeof (*ns), KM_SLEEP);
4386 
4387         /*
4388          * The following algorithm attempts to find a unique verifier
4389          * to be used as the write verifier returned from the server
4390          * to the client.  It is important that this verifier change
4391          * whenever the server reboots.  Of secondary importance, it
4392          * is important for the verifier to be unique between two
4393          * different servers.
4394          *
4395          * Thus, an attempt is made to use the system hostid and the
4396          * current time in seconds when the nfssrv kernel module is
4397          * loaded.  It is assumed that an NFS server will not be able
4398          * to boot and then to reboot in less than a second.  If the
4399          * hostid has not been set, then the current high resolution
4400          * time is used.  This will ensure different verifiers each
4401          * time the server reboots and minimize the chances that two
4402          * different servers will have the same verifier.
4403          */
4404 
4405 #ifndef lint
4406         /*
4407          * We ASSERT that this constant logic expression is
4408          * always true because in the past, it wasn't.
4409          */
4410         ASSERT(sizeof (*verfp) <= sizeof (ns->write3verf));
4411 #endif
4412 
4413         gethrestime(&now);
4414         verfp = (struct rfs3_verf_overlay *)&ns->write3verf;
4415         verfp->ts = (int)now.tv_sec;
4416         verfp->id = zone_get_hostid(NULL);
4417 
4418         if (verfp->id == 0)
4419                 verfp->id = (uint_t)now.tv_nsec;
4420 
4421         return (ns);
4422 }
4423 
4424 /* ARGSUSED */
4425 static void
4426 rfs3_zone_fini(zoneid_t zoneid, void *data)
4427 {
4428         nfs3_srv_t *ns = data;
4429 
4430         kmem_free(ns, sizeof (*ns));
4431 }
4432 
4433 void
4434 rfs3_srvrinit(void)
4435 {
4436         nfs3_srv_caller_id = fs_new_caller_id();
4437         zone_key_create(&rfs3_zone_key, rfs3_zone_init, NULL, rfs3_zone_fini);
4438 }
4439 
4440 void
4441 rfs3_srvrfini(void)
4442 {
4443         /* Nothing to do */
4444 }