1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright 2018 Nexenta Systems, Inc.
  24  * Copyright (c) 1994, 2010, Oracle and/or its affiliates. All rights reserved.
  25  * Copyright (c) 2013 by Delphix. All rights reserved.
  26  */
  27 
  28 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
  29 /* All Rights Reserved */
  30 
  31 
  32 #include <sys/param.h>
  33 #include <sys/types.h>
  34 #include <sys/systm.h>
  35 #include <sys/cred.h>
  36 #include <sys/buf.h>
  37 #include <sys/vfs.h>
  38 #include <sys/vnode.h>
  39 #include <sys/uio.h>
  40 #include <sys/errno.h>
  41 #include <sys/sysmacros.h>
  42 #include <sys/statvfs.h>
  43 #include <sys/kmem.h>
  44 #include <sys/dirent.h>
  45 #include <sys/cmn_err.h>
  46 #include <sys/debug.h>
  47 #include <sys/systeminfo.h>
  48 #include <sys/flock.h>
  49 #include <sys/nbmlock.h>
  50 #include <sys/policy.h>
  51 #include <sys/sdt.h>
  52 
  53 #include <rpc/types.h>
  54 #include <rpc/auth.h>
  55 #include <rpc/svc.h>
  56 #include <rpc/rpc_rdma.h>
  57 
  58 #include <nfs/nfs.h>
  59 #include <nfs/export.h>
  60 #include <nfs/nfs_cmd.h>
  61 
  62 #include <sys/strsubr.h>
  63 #include <sys/tsol/label.h>
  64 #include <sys/tsol/tndb.h>
  65 
  66 #include <sys/zone.h>
  67 
  68 #include <inet/ip.h>
  69 #include <inet/ip6.h>
  70 
  71 /*
  72  * Zone global variables of NFSv3 server
  73  */
  74 typedef struct nfs3_srv {
  75         writeverf3      write3verf;
  76 } nfs3_srv_t;
  77 
  78 /*
  79  * These are the interface routines for the server side of the
  80  * Network File System.  See the NFS version 3 protocol specification
  81  * for a description of this interface.
  82  */
  83 
  84 static int      sattr3_to_vattr(sattr3 *, struct vattr *);
  85 static int      vattr_to_fattr3(struct vattr *, fattr3 *);
  86 static int      vattr_to_wcc_attr(struct vattr *, wcc_attr *);
  87 static void     vattr_to_pre_op_attr(struct vattr *, pre_op_attr *);
  88 static void     vattr_to_wcc_data(struct vattr *, struct vattr *, wcc_data *);
  89 static int      rdma_setup_read_data3(READ3args *, READ3resok *);
  90 
  91 extern int nfs_loaned_buffers;
  92 
  93 u_longlong_t nfs3_srv_caller_id;
  94 
  95 static nfs3_srv_t *
  96 nfs3_get_srv(void)
  97 {
  98         nfs_globals_t *ng = zone_getspecific(nfssrv_zone_key, curzone);
  99         nfs3_srv_t *srv = ng->nfs3_srv;
 100         ASSERT(srv != NULL);
 101         return (srv);
 102 }
 103 
 104 /* ARGSUSED */
 105 void
 106 rfs3_getattr(GETATTR3args *args, GETATTR3res *resp, struct exportinfo *exi,
 107     struct svc_req *req, cred_t *cr, bool_t ro)
 108 {
 109         int error;
 110         vnode_t *vp;
 111         struct vattr va;
 112 
 113         vp = nfs3_fhtovp(&args->object, exi);
 114 
 115         DTRACE_NFSV3_5(op__getattr__start, struct svc_req *, req,
 116             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
 117             GETATTR3args *, args);
 118 
 119         if (vp == NULL) {
 120                 error = ESTALE;
 121                 goto out;
 122         }
 123 
 124         va.va_mask = AT_ALL;
 125         error = rfs4_delegated_getattr(vp, &va, 0, cr);
 126 
 127         if (!error) {
 128                 /* Lie about the object type for a referral */
 129                 if (vn_is_nfs_reparse(vp, cr))
 130                         va.va_type = VLNK;
 131 
 132                 /* overflow error if time or size is out of range */
 133                 error = vattr_to_fattr3(&va, &resp->resok.obj_attributes);
 134                 if (error)
 135                         goto out;
 136                 resp->status = NFS3_OK;
 137 
 138                 DTRACE_NFSV3_5(op__getattr__done, struct svc_req *, req,
 139                     cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
 140                     GETATTR3res *, resp);
 141 
 142                 VN_RELE(vp);
 143 
 144                 return;
 145         }
 146 
 147 out:
 148         if (curthread->t_flag & T_WOULDBLOCK) {
 149                 curthread->t_flag &= ~T_WOULDBLOCK;
 150                 resp->status = NFS3ERR_JUKEBOX;
 151         } else
 152                 resp->status = puterrno3(error);
 153 
 154         DTRACE_NFSV3_5(op__getattr__done, struct svc_req *, req,
 155             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
 156             GETATTR3res *, resp);
 157 
 158         if (vp != NULL)
 159                 VN_RELE(vp);
 160 }
 161 
 162 void *
 163 rfs3_getattr_getfh(GETATTR3args *args)
 164 {
 165 
 166         return (&args->object);
 167 }
 168 
 169 void
 170 rfs3_setattr(SETATTR3args *args, SETATTR3res *resp, struct exportinfo *exi,
 171     struct svc_req *req, cred_t *cr, bool_t ro)
 172 {
 173         int error;
 174         vnode_t *vp;
 175         struct vattr *bvap;
 176         struct vattr bva;
 177         struct vattr *avap;
 178         struct vattr ava;
 179         int flag;
 180         int in_crit = 0;
 181         struct flock64 bf;
 182         caller_context_t ct;
 183 
 184         bvap = NULL;
 185         avap = NULL;
 186 
 187         vp = nfs3_fhtovp(&args->object, exi);
 188 
 189         DTRACE_NFSV3_5(op__setattr__start, struct svc_req *, req,
 190             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
 191             SETATTR3args *, args);
 192 
 193         if (vp == NULL) {
 194                 error = ESTALE;
 195                 goto out;
 196         }
 197 
 198         error = sattr3_to_vattr(&args->new_attributes, &ava);
 199         if (error)
 200                 goto out;
 201 
 202         if (is_system_labeled()) {
 203                 bslabel_t *clabel = req->rq_label;
 204 
 205                 ASSERT(clabel != NULL);
 206                 DTRACE_PROBE2(tx__rfs3__log__info__opsetattr__clabel, char *,
 207                     "got client label from request(1)", struct svc_req *, req);
 208 
 209                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
 210                         if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
 211                             exi)) {
 212                                 resp->status = NFS3ERR_ACCES;
 213                                 goto out1;
 214                         }
 215                 }
 216         }
 217 
 218         /*
 219          * We need to specially handle size changes because of
 220          * possible conflicting NBMAND locks. Get into critical
 221          * region before VOP_GETATTR, so the size attribute is
 222          * valid when checking conflicts.
 223          *
 224          * Also, check to see if the v4 side of the server has
 225          * delegated this file.  If so, then we return JUKEBOX to
 226          * allow the client to retrasmit its request.
 227          */
 228         if (vp->v_type == VREG && (ava.va_mask & AT_SIZE)) {
 229                 if (nbl_need_check(vp)) {
 230                         nbl_start_crit(vp, RW_READER);
 231                         in_crit = 1;
 232                 }
 233         }
 234 
 235         bva.va_mask = AT_ALL;
 236         error = rfs4_delegated_getattr(vp, &bva, 0, cr);
 237 
 238         /*
 239          * If we can't get the attributes, then we can't do the
 240          * right access checking.  So, we'll fail the request.
 241          */
 242         if (error)
 243                 goto out;
 244 
 245         bvap = &bva;
 246 
 247         if (rdonly(ro, vp)) {
 248                 resp->status = NFS3ERR_ROFS;
 249                 goto out1;
 250         }
 251 
 252         if (args->guard.check &&
 253             (args->guard.obj_ctime.seconds != bva.va_ctime.tv_sec ||
 254             args->guard.obj_ctime.nseconds != bva.va_ctime.tv_nsec)) {
 255                 resp->status = NFS3ERR_NOT_SYNC;
 256                 goto out1;
 257         }
 258 
 259         if (args->new_attributes.mtime.set_it == SET_TO_CLIENT_TIME)
 260                 flag = ATTR_UTIME;
 261         else
 262                 flag = 0;
 263 
 264         /*
 265          * If the filesystem is exported with nosuid, then mask off
 266          * the setuid and setgid bits.
 267          */
 268         if ((ava.va_mask & AT_MODE) && vp->v_type == VREG &&
 269             (exi->exi_export.ex_flags & EX_NOSUID))
 270                 ava.va_mode &= ~(VSUID | VSGID);
 271 
 272         ct.cc_sysid = 0;
 273         ct.cc_pid = 0;
 274         ct.cc_caller_id = nfs3_srv_caller_id;
 275         ct.cc_flags = CC_DONTBLOCK;
 276 
 277         /*
 278          * We need to specially handle size changes because it is
 279          * possible for the client to create a file with modes
 280          * which indicate read-only, but with the file opened for
 281          * writing.  If the client then tries to set the size of
 282          * the file, then the normal access checking done in
 283          * VOP_SETATTR would prevent the client from doing so,
 284          * although it should be legal for it to do so.  To get
 285          * around this, we do the access checking for ourselves
 286          * and then use VOP_SPACE which doesn't do the access
 287          * checking which VOP_SETATTR does. VOP_SPACE can only
 288          * operate on VREG files, let VOP_SETATTR handle the other
 289          * extremely rare cases.
 290          * Also the client should not be allowed to change the
 291          * size of the file if there is a conflicting non-blocking
 292          * mandatory lock in the region the change.
 293          */
 294         if (vp->v_type == VREG && (ava.va_mask & AT_SIZE)) {
 295                 if (in_crit) {
 296                         u_offset_t offset;
 297                         ssize_t length;
 298 
 299                         if (ava.va_size < bva.va_size) {
 300                                 offset = ava.va_size;
 301                                 length = bva.va_size - ava.va_size;
 302                         } else {
 303                                 offset = bva.va_size;
 304                                 length = ava.va_size - bva.va_size;
 305                         }
 306                         if (nbl_conflict(vp, NBL_WRITE, offset, length, 0,
 307                             NULL)) {
 308                                 error = EACCES;
 309                                 goto out;
 310                         }
 311                 }
 312 
 313                 if (crgetuid(cr) == bva.va_uid && ava.va_size != bva.va_size) {
 314                         ava.va_mask &= ~AT_SIZE;
 315                         bf.l_type = F_WRLCK;
 316                         bf.l_whence = 0;
 317                         bf.l_start = (off64_t)ava.va_size;
 318                         bf.l_len = 0;
 319                         bf.l_sysid = 0;
 320                         bf.l_pid = 0;
 321                         error = VOP_SPACE(vp, F_FREESP, &bf, FWRITE,
 322                             (offset_t)ava.va_size, cr, &ct);
 323                 }
 324         }
 325 
 326         if (!error && ava.va_mask)
 327                 error = VOP_SETATTR(vp, &ava, flag, cr, &ct);
 328 
 329         /* check if a monitor detected a delegation conflict */
 330         if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
 331                 resp->status = NFS3ERR_JUKEBOX;
 332                 goto out1;
 333         }
 334 
 335         ava.va_mask = AT_ALL;
 336         avap = rfs4_delegated_getattr(vp, &ava, 0, cr) ? NULL : &ava;
 337 
 338         /*
 339          * Force modified metadata out to stable storage.
 340          */
 341         (void) VOP_FSYNC(vp, FNODSYNC, cr, &ct);
 342 
 343         if (error)
 344                 goto out;
 345 
 346         if (in_crit)
 347                 nbl_end_crit(vp);
 348 
 349         resp->status = NFS3_OK;
 350         vattr_to_wcc_data(bvap, avap, &resp->resok.obj_wcc);
 351 
 352         DTRACE_NFSV3_5(op__setattr__done, struct svc_req *, req,
 353             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
 354             SETATTR3res *, resp);
 355 
 356         VN_RELE(vp);
 357 
 358         return;
 359 
 360 out:
 361         if (curthread->t_flag & T_WOULDBLOCK) {
 362                 curthread->t_flag &= ~T_WOULDBLOCK;
 363                 resp->status = NFS3ERR_JUKEBOX;
 364         } else
 365                 resp->status = puterrno3(error);
 366 out1:
 367         DTRACE_NFSV3_5(op__setattr__done, struct svc_req *, req,
 368             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
 369             SETATTR3res *, resp);
 370 
 371         if (vp != NULL) {
 372                 if (in_crit)
 373                         nbl_end_crit(vp);
 374                 VN_RELE(vp);
 375         }
 376         vattr_to_wcc_data(bvap, avap, &resp->resfail.obj_wcc);
 377 }
 378 
 379 void *
 380 rfs3_setattr_getfh(SETATTR3args *args)
 381 {
 382 
 383         return (&args->object);
 384 }
 385 
 386 /* ARGSUSED */
 387 void
 388 rfs3_lookup(LOOKUP3args *args, LOOKUP3res *resp, struct exportinfo *exi,
 389     struct svc_req *req, cred_t *cr, bool_t ro)
 390 {
 391         int error;
 392         vnode_t *vp;
 393         vnode_t *dvp;
 394         struct vattr *vap;
 395         struct vattr va;
 396         struct vattr *dvap;
 397         struct vattr dva;
 398         nfs_fh3 *fhp;
 399         struct sec_ol sec = {0, 0};
 400         bool_t publicfh_flag = FALSE, auth_weak = FALSE;
 401         struct sockaddr *ca;
 402         char *name = NULL;
 403 
 404         dvap = NULL;
 405 
 406         if (exi != NULL)
 407                 exi_hold(exi);
 408 
 409         /*
 410          * Allow lookups from the root - the default
 411          * location of the public filehandle.
 412          */
 413         if (exi != NULL && (exi->exi_export.ex_flags & EX_PUBLIC)) {
 414                 dvp = ZONE_ROOTVP();
 415                 VN_HOLD(dvp);
 416 
 417                 DTRACE_NFSV3_5(op__lookup__start, struct svc_req *, req,
 418                     cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
 419                     LOOKUP3args *, args);
 420         } else {
 421                 dvp = nfs3_fhtovp(&args->what.dir, exi);
 422 
 423                 DTRACE_NFSV3_5(op__lookup__start, struct svc_req *, req,
 424                     cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
 425                     LOOKUP3args *, args);
 426 
 427                 if (dvp == NULL) {
 428                         error = ESTALE;
 429                         goto out;
 430                 }
 431         }
 432 
 433         dva.va_mask = AT_ALL;
 434         dvap = VOP_GETATTR(dvp, &dva, 0, cr, NULL) ? NULL : &dva;
 435 
 436         if (args->what.name == nfs3nametoolong) {
 437                 resp->status = NFS3ERR_NAMETOOLONG;
 438                 goto out1;
 439         }
 440 
 441         if (args->what.name == NULL || *(args->what.name) == '\0') {
 442                 resp->status = NFS3ERR_ACCES;
 443                 goto out1;
 444         }
 445 
 446         fhp = &args->what.dir;
 447         ASSERT3P(curzone, ==, exi->exi_zone); /* exi is guaranteed non-NULL. */
 448         if (strcmp(args->what.name, "..") == 0 &&
 449             EQFID(&exi->exi_fid, FH3TOFIDP(fhp))) {
 450                 if ((exi->exi_export.ex_flags & EX_NOHIDE) &&
 451                     ((dvp->v_flag & VROOT) || VN_IS_CURZONEROOT(dvp))) {
 452                         /*
 453                          * special case for ".." and 'nohide'exported root
 454                          */
 455                         if (rfs_climb_crossmnt(&dvp, &exi, cr) != 0) {
 456                                 resp->status = NFS3ERR_ACCES;
 457                                 goto out1;
 458                         }
 459                 } else {
 460                         resp->status = NFS3ERR_NOENT;
 461                         goto out1;
 462                 }
 463         }
 464 
 465         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
 466         name = nfscmd_convname(ca, exi, args->what.name,
 467             NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
 468 
 469         if (name == NULL) {
 470                 resp->status = NFS3ERR_ACCES;
 471                 goto out1;
 472         }
 473 
 474         /*
 475          * If the public filehandle is used then allow
 476          * a multi-component lookup
 477          */
 478         if (PUBLIC_FH3(&args->what.dir)) {
 479                 publicfh_flag = TRUE;
 480 
 481                 exi_rele(exi);
 482 
 483                 error = rfs_publicfh_mclookup(name, dvp, cr, &vp,
 484                     &exi, &sec);
 485 
 486                 /*
 487                  * Since WebNFS may bypass MOUNT, we need to ensure this
 488                  * request didn't come from an unlabeled admin_low client.
 489                  */
 490                 if (is_system_labeled() && error == 0) {
 491                         int             addr_type;
 492                         void            *ipaddr;
 493                         tsol_tpc_t      *tp;
 494 
 495                         if (ca->sa_family == AF_INET) {
 496                                 addr_type = IPV4_VERSION;
 497                                 ipaddr = &((struct sockaddr_in *)ca)->sin_addr;
 498                         } else if (ca->sa_family == AF_INET6) {
 499                                 addr_type = IPV6_VERSION;
 500                                 ipaddr = &((struct sockaddr_in6 *)
 501                                     ca)->sin6_addr;
 502                         }
 503                         tp = find_tpc(ipaddr, addr_type, B_FALSE);
 504                         if (tp == NULL || tp->tpc_tp.tp_doi !=
 505                             l_admin_low->tsl_doi || tp->tpc_tp.host_type !=
 506                             SUN_CIPSO) {
 507                                 VN_RELE(vp);
 508                                 error = EACCES;
 509                         }
 510                         if (tp != NULL)
 511                                 TPC_RELE(tp);
 512                 }
 513         } else {
 514                 error = VOP_LOOKUP(dvp, name, &vp,
 515                     NULL, 0, NULL, cr, NULL, NULL, NULL);
 516         }
 517 
 518         if (name != args->what.name)
 519                 kmem_free(name, MAXPATHLEN + 1);
 520 
 521         if (error == 0 && vn_ismntpt(vp)) {
 522                 error = rfs_cross_mnt(&vp, &exi);
 523                 if (error)
 524                         VN_RELE(vp);
 525         }
 526 
 527         if (is_system_labeled() && error == 0) {
 528                 bslabel_t *clabel = req->rq_label;
 529 
 530                 ASSERT(clabel != NULL);
 531                 DTRACE_PROBE2(tx__rfs3__log__info__oplookup__clabel, char *,
 532                     "got client label from request(1)", struct svc_req *, req);
 533 
 534                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
 535                         if (!do_rfs_label_check(clabel, dvp,
 536                             DOMINANCE_CHECK, exi)) {
 537                                 VN_RELE(vp);
 538                                 error = EACCES;
 539                         }
 540                 }
 541         }
 542 
 543         dva.va_mask = AT_ALL;
 544         dvap = VOP_GETATTR(dvp, &dva, 0, cr, NULL) ? NULL : &dva;
 545 
 546         if (error)
 547                 goto out;
 548 
 549         if (sec.sec_flags & SEC_QUERY) {
 550                 error = makefh3_ol(&resp->resok.object, exi, sec.sec_index);
 551         } else {
 552                 error = makefh3(&resp->resok.object, vp, exi);
 553                 if (!error && publicfh_flag && !chk_clnt_sec(exi, req))
 554                         auth_weak = TRUE;
 555         }
 556 
 557         if (error) {
 558                 VN_RELE(vp);
 559                 goto out;
 560         }
 561 
 562         va.va_mask = AT_ALL;
 563         vap = rfs4_delegated_getattr(vp, &va, 0, cr) ? NULL : &va;
 564 
 565         exi_rele(exi);
 566         VN_RELE(vp);
 567 
 568         resp->status = NFS3_OK;
 569         vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
 570         vattr_to_post_op_attr(dvap, &resp->resok.dir_attributes);
 571 
 572         /*
 573          * If it's public fh, no 0x81, and client's flavor is
 574          * invalid, set WebNFS status to WNFSERR_CLNT_FLAVOR now.
 575          * Then set RPC status to AUTH_TOOWEAK in common_dispatch.
 576          */
 577         if (auth_weak)
 578                 resp->status = (enum nfsstat3)WNFSERR_CLNT_FLAVOR;
 579 
 580         DTRACE_NFSV3_5(op__lookup__done, struct svc_req *, req,
 581             cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
 582             LOOKUP3res *, resp);
 583         VN_RELE(dvp);
 584 
 585         return;
 586 
 587 out:
 588         if (curthread->t_flag & T_WOULDBLOCK) {
 589                 curthread->t_flag &= ~T_WOULDBLOCK;
 590                 resp->status = NFS3ERR_JUKEBOX;
 591         } else
 592                 resp->status = puterrno3(error);
 593 out1:
 594         DTRACE_NFSV3_5(op__lookup__done, struct svc_req *, req,
 595             cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
 596             LOOKUP3res *, resp);
 597 
 598         if (exi != NULL)
 599                 exi_rele(exi);
 600 
 601         if (dvp != NULL)
 602                 VN_RELE(dvp);
 603         vattr_to_post_op_attr(dvap, &resp->resfail.dir_attributes);
 604 
 605 }
 606 
 607 void *
 608 rfs3_lookup_getfh(LOOKUP3args *args)
 609 {
 610 
 611         return (&args->what.dir);
 612 }
 613 
 614 /* ARGSUSED */
 615 void
 616 rfs3_access(ACCESS3args *args, ACCESS3res *resp, struct exportinfo *exi,
 617     struct svc_req *req, cred_t *cr, bool_t ro)
 618 {
 619         int error;
 620         vnode_t *vp;
 621         struct vattr *vap;
 622         struct vattr va;
 623         int checkwriteperm;
 624         boolean_t dominant_label = B_FALSE;
 625         boolean_t equal_label = B_FALSE;
 626         boolean_t admin_low_client;
 627 
 628         vap = NULL;
 629 
 630         vp = nfs3_fhtovp(&args->object, exi);
 631 
 632         DTRACE_NFSV3_5(op__access__start, struct svc_req *, req,
 633             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
 634             ACCESS3args *, args);
 635 
 636         if (vp == NULL) {
 637                 error = ESTALE;
 638                 goto out;
 639         }
 640 
 641         /*
 642          * If the file system is exported read only, it is not appropriate
 643          * to check write permissions for regular files and directories.
 644          * Special files are interpreted by the client, so the underlying
 645          * permissions are sent back to the client for interpretation.
 646          */
 647         if (rdonly(ro, vp) && (vp->v_type == VREG || vp->v_type == VDIR))
 648                 checkwriteperm = 0;
 649         else
 650                 checkwriteperm = 1;
 651 
 652         /*
 653          * We need the mode so that we can correctly determine access
 654          * permissions relative to a mandatory lock file.  Access to
 655          * mandatory lock files is denied on the server, so it might
 656          * as well be reflected to the server during the open.
 657          */
 658         va.va_mask = AT_MODE;
 659         error = VOP_GETATTR(vp, &va, 0, cr, NULL);
 660         if (error)
 661                 goto out;
 662 
 663         vap = &va;
 664 
 665         resp->resok.access = 0;
 666 
 667         if (is_system_labeled()) {
 668                 bslabel_t *clabel = req->rq_label;
 669 
 670                 ASSERT(clabel != NULL);
 671                 DTRACE_PROBE2(tx__rfs3__log__info__opaccess__clabel, char *,
 672                     "got client label from request(1)", struct svc_req *, req);
 673 
 674                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
 675                         if ((equal_label = do_rfs_label_check(clabel, vp,
 676                             EQUALITY_CHECK, exi)) == B_FALSE) {
 677                                 dominant_label = do_rfs_label_check(clabel,
 678                                     vp, DOMINANCE_CHECK, exi);
 679                         } else
 680                                 dominant_label = B_TRUE;
 681                         admin_low_client = B_FALSE;
 682                 } else
 683                         admin_low_client = B_TRUE;
 684         }
 685 
 686         if (args->access & ACCESS3_READ) {
 687                 error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
 688                 if (error) {
 689                         if (curthread->t_flag & T_WOULDBLOCK)
 690                                 goto out;
 691                 } else if (!MANDLOCK(vp, va.va_mode) &&
 692                     (!is_system_labeled() || admin_low_client ||
 693                     dominant_label))
 694                         resp->resok.access |= ACCESS3_READ;
 695         }
 696         if ((args->access & ACCESS3_LOOKUP) && vp->v_type == VDIR) {
 697                 error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL);
 698                 if (error) {
 699                         if (curthread->t_flag & T_WOULDBLOCK)
 700                                 goto out;
 701                 } else if (!is_system_labeled() || admin_low_client ||
 702                     dominant_label)
 703                         resp->resok.access |= ACCESS3_LOOKUP;
 704         }
 705         if (checkwriteperm &&
 706             (args->access & (ACCESS3_MODIFY|ACCESS3_EXTEND))) {
 707                 error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
 708                 if (error) {
 709                         if (curthread->t_flag & T_WOULDBLOCK)
 710                                 goto out;
 711                 } else if (!MANDLOCK(vp, va.va_mode) &&
 712                     (!is_system_labeled() || admin_low_client || equal_label)) {
 713                         resp->resok.access |=
 714                             (args->access & (ACCESS3_MODIFY|ACCESS3_EXTEND));
 715                 }
 716         }
 717         if (checkwriteperm &&
 718             (args->access & ACCESS3_DELETE) && vp->v_type == VDIR) {
 719                 error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
 720                 if (error) {
 721                         if (curthread->t_flag & T_WOULDBLOCK)
 722                                 goto out;
 723                 } else if (!is_system_labeled() || admin_low_client ||
 724                     equal_label)
 725                         resp->resok.access |= ACCESS3_DELETE;
 726         }
 727         if (args->access & ACCESS3_EXECUTE) {
 728                 error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL);
 729                 if (error) {
 730                         if (curthread->t_flag & T_WOULDBLOCK)
 731                                 goto out;
 732                 } else if (!MANDLOCK(vp, va.va_mode) &&
 733                     (!is_system_labeled() || admin_low_client ||
 734                     dominant_label))
 735                         resp->resok.access |= ACCESS3_EXECUTE;
 736         }
 737 
 738         va.va_mask = AT_ALL;
 739         vap = rfs4_delegated_getattr(vp, &va, 0, cr) ? NULL : &va;
 740 
 741         resp->status = NFS3_OK;
 742         vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
 743 
 744         DTRACE_NFSV3_5(op__access__done, struct svc_req *, req,
 745             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
 746             ACCESS3res *, resp);
 747 
 748         VN_RELE(vp);
 749 
 750         return;
 751 
 752 out:
 753         if (curthread->t_flag & T_WOULDBLOCK) {
 754                 curthread->t_flag &= ~T_WOULDBLOCK;
 755                 resp->status = NFS3ERR_JUKEBOX;
 756         } else
 757                 resp->status = puterrno3(error);
 758         DTRACE_NFSV3_5(op__access__done, struct svc_req *, req,
 759             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
 760             ACCESS3res *, resp);
 761         if (vp != NULL)
 762                 VN_RELE(vp);
 763         vattr_to_post_op_attr(vap, &resp->resfail.obj_attributes);
 764 }
 765 
 766 void *
 767 rfs3_access_getfh(ACCESS3args *args)
 768 {
 769 
 770         return (&args->object);
 771 }
 772 
 773 /* ARGSUSED */
 774 void
 775 rfs3_readlink(READLINK3args *args, READLINK3res *resp, struct exportinfo *exi,
 776     struct svc_req *req, cred_t *cr, bool_t ro)
 777 {
 778         int error;
 779         vnode_t *vp;
 780         struct vattr *vap;
 781         struct vattr va;
 782         struct iovec iov;
 783         struct uio uio;
 784         char *data;
 785         struct sockaddr *ca;
 786         char *name = NULL;
 787         int is_referral = 0;
 788 
 789         vap = NULL;
 790 
 791         vp = nfs3_fhtovp(&args->symlink, exi);
 792 
 793         DTRACE_NFSV3_5(op__readlink__start, struct svc_req *, req,
 794             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
 795             READLINK3args *, args);
 796 
 797         if (vp == NULL) {
 798                 error = ESTALE;
 799                 goto out;
 800         }
 801 
 802         va.va_mask = AT_ALL;
 803         error = VOP_GETATTR(vp, &va, 0, cr, NULL);
 804         if (error)
 805                 goto out;
 806 
 807         vap = &va;
 808 
 809         /* We lied about the object type for a referral */
 810         if (vn_is_nfs_reparse(vp, cr))
 811                 is_referral = 1;
 812 
 813         if (vp->v_type != VLNK && !is_referral) {
 814                 resp->status = NFS3ERR_INVAL;
 815                 goto out1;
 816         }
 817 
 818         if (MANDLOCK(vp, va.va_mode)) {
 819                 resp->status = NFS3ERR_ACCES;
 820                 goto out1;
 821         }
 822 
 823         if (is_system_labeled()) {
 824                 bslabel_t *clabel = req->rq_label;
 825 
 826                 ASSERT(clabel != NULL);
 827                 DTRACE_PROBE2(tx__rfs3__log__info__opreadlink__clabel, char *,
 828                     "got client label from request(1)", struct svc_req *, req);
 829 
 830                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
 831                         if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
 832                             exi)) {
 833                                 resp->status = NFS3ERR_ACCES;
 834                                 goto out1;
 835                         }
 836                 }
 837         }
 838 
 839         data = kmem_alloc(MAXPATHLEN + 1, KM_SLEEP);
 840 
 841         if (is_referral) {
 842                 char *s;
 843                 size_t strsz;
 844 
 845                 /* Get an artificial symlink based on a referral */
 846                 s = build_symlink(vp, cr, &strsz);
 847                 global_svstat_ptr[3][NFS_REFERLINKS].value.ui64++;
 848                 DTRACE_PROBE2(nfs3serv__func__referral__reflink,
 849                     vnode_t *, vp, char *, s);
 850                 if (s == NULL)
 851                         error = EINVAL;
 852                 else {
 853                         error = 0;
 854                         (void) strlcpy(data, s, MAXPATHLEN + 1);
 855                         kmem_free(s, strsz);
 856                 }
 857 
 858         } else {
 859 
 860                 iov.iov_base = data;
 861                 iov.iov_len = MAXPATHLEN;
 862                 uio.uio_iov = &iov;
 863                 uio.uio_iovcnt = 1;
 864                 uio.uio_segflg = UIO_SYSSPACE;
 865                 uio.uio_extflg = UIO_COPY_CACHED;
 866                 uio.uio_loffset = 0;
 867                 uio.uio_resid = MAXPATHLEN;
 868 
 869                 error = VOP_READLINK(vp, &uio, cr, NULL);
 870 
 871                 if (!error)
 872                         *(data + MAXPATHLEN - uio.uio_resid) = '\0';
 873         }
 874 
 875         va.va_mask = AT_ALL;
 876         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
 877 
 878         /* Lie about object type again just to be consistent */
 879         if (is_referral && vap != NULL)
 880                 vap->va_type = VLNK;
 881 
 882 #if 0 /* notyet */
 883         /*
 884          * Don't do this.  It causes local disk writes when just
 885          * reading the file and the overhead is deemed larger
 886          * than the benefit.
 887          */
 888         /*
 889          * Force modified metadata out to stable storage.
 890          */
 891         (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
 892 #endif
 893 
 894         if (error) {
 895                 kmem_free(data, MAXPATHLEN + 1);
 896                 goto out;
 897         }
 898 
 899         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
 900         name = nfscmd_convname(ca, exi, data, NFSCMD_CONV_OUTBOUND,
 901             MAXPATHLEN + 1);
 902 
 903         if (name == NULL) {
 904                 /*
 905                  * Even though the conversion failed, we return
 906                  * something. We just don't translate it.
 907                  */
 908                 name = data;
 909         }
 910 
 911         resp->status = NFS3_OK;
 912         vattr_to_post_op_attr(vap, &resp->resok.symlink_attributes);
 913         resp->resok.data = name;
 914 
 915         DTRACE_NFSV3_5(op__readlink__done, struct svc_req *, req,
 916             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
 917             READLINK3res *, resp);
 918         VN_RELE(vp);
 919 
 920         if (name != data)
 921                 kmem_free(data, MAXPATHLEN + 1);
 922 
 923         return;
 924 
 925 out:
 926         if (curthread->t_flag & T_WOULDBLOCK) {
 927                 curthread->t_flag &= ~T_WOULDBLOCK;
 928                 resp->status = NFS3ERR_JUKEBOX;
 929         } else
 930                 resp->status = puterrno3(error);
 931 out1:
 932         DTRACE_NFSV3_5(op__readlink__done, struct svc_req *, req,
 933             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
 934             READLINK3res *, resp);
 935         if (vp != NULL)
 936                 VN_RELE(vp);
 937         vattr_to_post_op_attr(vap, &resp->resfail.symlink_attributes);
 938 }
 939 
 940 void *
 941 rfs3_readlink_getfh(READLINK3args *args)
 942 {
 943 
 944         return (&args->symlink);
 945 }
 946 
 947 void
 948 rfs3_readlink_free(READLINK3res *resp)
 949 {
 950 
 951         if (resp->status == NFS3_OK)
 952                 kmem_free(resp->resok.data, MAXPATHLEN + 1);
 953 }
 954 
 955 /*
 956  * Server routine to handle read
 957  * May handle RDMA data as well as mblks
 958  */
 959 /* ARGSUSED */
 960 void
 961 rfs3_read(READ3args *args, READ3res *resp, struct exportinfo *exi,
 962     struct svc_req *req, cred_t *cr, bool_t ro)
 963 {
 964         int error;
 965         vnode_t *vp;
 966         struct vattr *vap;
 967         struct vattr va;
 968         struct iovec iov, *iovp = NULL;
 969         int iovcnt;
 970         struct uio uio;
 971         u_offset_t offset;
 972         mblk_t *mp = NULL;
 973         int in_crit = 0;
 974         int need_rwunlock = 0;
 975         caller_context_t ct;
 976         int rdma_used = 0;
 977         int loaned_buffers;
 978         struct uio *uiop;
 979 
 980         vap = NULL;
 981 
 982         vp = nfs3_fhtovp(&args->file, exi);
 983 
 984         DTRACE_NFSV3_5(op__read__start, struct svc_req *, req,
 985             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
 986             READ3args *, args);
 987 
 988 
 989         if (vp == NULL) {
 990                 error = ESTALE;
 991                 goto out;
 992         }
 993 
 994         if (args->wlist) {
 995                 if (args->count > clist_len(args->wlist)) {
 996                         error = EINVAL;
 997                         goto out;
 998                 }
 999                 rdma_used = 1;
1000         }
1001 
1002         /* use loaned buffers for TCP */
1003         loaned_buffers = (nfs_loaned_buffers && !rdma_used) ? 1 : 0;
1004 
1005         if (is_system_labeled()) {
1006                 bslabel_t *clabel = req->rq_label;
1007 
1008                 ASSERT(clabel != NULL);
1009                 DTRACE_PROBE2(tx__rfs3__log__info__opread__clabel, char *,
1010                     "got client label from request(1)", struct svc_req *, req);
1011 
1012                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
1013                         if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
1014                             exi)) {
1015                                 resp->status = NFS3ERR_ACCES;
1016                                 goto out1;
1017                         }
1018                 }
1019         }
1020 
1021         ct.cc_sysid = 0;
1022         ct.cc_pid = 0;
1023         ct.cc_caller_id = nfs3_srv_caller_id;
1024         ct.cc_flags = CC_DONTBLOCK;
1025 
1026         /*
1027          * Enter the critical region before calling VOP_RWLOCK
1028          * to avoid a deadlock with write requests.
1029          */
1030         if (nbl_need_check(vp)) {
1031                 nbl_start_crit(vp, RW_READER);
1032                 in_crit = 1;
1033                 if (nbl_conflict(vp, NBL_READ, args->offset, args->count, 0,
1034                     NULL)) {
1035                         error = EACCES;
1036                         goto out;
1037                 }
1038         }
1039 
1040         error = VOP_RWLOCK(vp, V_WRITELOCK_FALSE, &ct);
1041 
1042         /* check if a monitor detected a delegation conflict */
1043         if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1044                 resp->status = NFS3ERR_JUKEBOX;
1045                 goto out1;
1046         }
1047 
1048         need_rwunlock = 1;
1049 
1050         va.va_mask = AT_ALL;
1051         error = VOP_GETATTR(vp, &va, 0, cr, &ct);
1052 
1053         /*
1054          * If we can't get the attributes, then we can't do the
1055          * right access checking.  So, we'll fail the request.
1056          */
1057         if (error)
1058                 goto out;
1059 
1060         vap = &va;
1061 
1062         if (vp->v_type != VREG) {
1063                 resp->status = NFS3ERR_INVAL;
1064                 goto out1;
1065         }
1066 
1067         if (crgetuid(cr) != va.va_uid) {
1068                 error = VOP_ACCESS(vp, VREAD, 0, cr, &ct);
1069                 if (error) {
1070                         if (curthread->t_flag & T_WOULDBLOCK)
1071                                 goto out;
1072                         error = VOP_ACCESS(vp, VEXEC, 0, cr, &ct);
1073                         if (error)
1074                                 goto out;
1075                 }
1076         }
1077 
1078         if (MANDLOCK(vp, va.va_mode)) {
1079                 resp->status = NFS3ERR_ACCES;
1080                 goto out1;
1081         }
1082 
1083         offset = args->offset;
1084         if (offset >= va.va_size) {
1085                 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
1086                 if (in_crit)
1087                         nbl_end_crit(vp);
1088                 resp->status = NFS3_OK;
1089                 vattr_to_post_op_attr(vap, &resp->resok.file_attributes);
1090                 resp->resok.count = 0;
1091                 resp->resok.eof = TRUE;
1092                 resp->resok.data.data_len = 0;
1093                 resp->resok.data.data_val = NULL;
1094                 resp->resok.data.mp = NULL;
1095                 /* RDMA */
1096                 resp->resok.wlist = args->wlist;
1097                 resp->resok.wlist_len = resp->resok.count;
1098                 if (resp->resok.wlist)
1099                         clist_zero_len(resp->resok.wlist);
1100                 goto done;
1101         }
1102 
1103         if (args->count == 0) {
1104                 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
1105                 if (in_crit)
1106                         nbl_end_crit(vp);
1107                 resp->status = NFS3_OK;
1108                 vattr_to_post_op_attr(vap, &resp->resok.file_attributes);
1109                 resp->resok.count = 0;
1110                 resp->resok.eof = FALSE;
1111                 resp->resok.data.data_len = 0;
1112                 resp->resok.data.data_val = NULL;
1113                 resp->resok.data.mp = NULL;
1114                 /* RDMA */
1115                 resp->resok.wlist = args->wlist;
1116                 resp->resok.wlist_len = resp->resok.count;
1117                 if (resp->resok.wlist)
1118                         clist_zero_len(resp->resok.wlist);
1119                 goto done;
1120         }
1121 
1122         /*
1123          * do not allocate memory more the max. allowed
1124          * transfer size
1125          */
1126         if (args->count > rfs3_tsize(req))
1127                 args->count = rfs3_tsize(req);
1128 
1129         if (loaned_buffers) {
1130                 uiop = (uio_t *)rfs_setup_xuio(vp);
1131                 ASSERT(uiop != NULL);
1132                 uiop->uio_segflg = UIO_SYSSPACE;
1133                 uiop->uio_loffset = args->offset;
1134                 uiop->uio_resid = args->count;
1135 
1136                 /* Jump to do the read if successful */
1137                 if (VOP_REQZCBUF(vp, UIO_READ, (xuio_t *)uiop, cr, &ct) == 0) {
1138                         /*
1139                          * Need to hold the vnode until after VOP_RETZCBUF()
1140                          * is called.
1141                          */
1142                         VN_HOLD(vp);
1143                         goto doio_read;
1144                 }
1145 
1146                 DTRACE_PROBE2(nfss__i__reqzcbuf_failed, int,
1147                     uiop->uio_loffset, int, uiop->uio_resid);
1148 
1149                 uiop->uio_extflg = 0;
1150                 /* failure to setup for zero copy */
1151                 rfs_free_xuio((void *)uiop);
1152                 loaned_buffers = 0;
1153         }
1154 
1155         /*
1156          * If returning data via RDMA Write, then grab the chunk list.
1157          * If we aren't returning READ data w/RDMA_WRITE, then grab
1158          * a mblk.
1159          */
1160         if (rdma_used) {
1161                 (void) rdma_get_wchunk(req, &iov, args->wlist);
1162                 uio.uio_iov = &iov;
1163                 uio.uio_iovcnt = 1;
1164         } else {
1165                 /*
1166                  * mp will contain the data to be sent out in the read reply.
1167                  * For UDP, this will be freed after the reply has been sent
1168                  * out by the driver.  For TCP, it will be freed after the last
1169                  * segment associated with the reply has been ACKed by the
1170                  * client.
1171                  */
1172                 mp = rfs_read_alloc(args->count, &iovp, &iovcnt);
1173                 uio.uio_iov = iovp;
1174                 uio.uio_iovcnt = iovcnt;
1175         }
1176 
1177         uio.uio_segflg = UIO_SYSSPACE;
1178         uio.uio_extflg = UIO_COPY_CACHED;
1179         uio.uio_loffset = args->offset;
1180         uio.uio_resid = args->count;
1181         uiop = &uio;
1182 
1183 doio_read:
1184         error = VOP_READ(vp, uiop, 0, cr, &ct);
1185 
1186         if (error) {
1187                 if (mp)
1188                         freemsg(mp);
1189                 /* check if a monitor detected a delegation conflict */
1190                 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1191                         resp->status = NFS3ERR_JUKEBOX;
1192                         goto out1;
1193                 }
1194                 goto out;
1195         }
1196 
1197         /* make mblk using zc buffers */
1198         if (loaned_buffers) {
1199                 mp = uio_to_mblk(uiop);
1200                 ASSERT(mp != NULL);
1201         }
1202 
1203         va.va_mask = AT_ALL;
1204         error = VOP_GETATTR(vp, &va, 0, cr, &ct);
1205 
1206         if (error)
1207                 vap = NULL;
1208         else
1209                 vap = &va;
1210 
1211         VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
1212 
1213         if (in_crit)
1214                 nbl_end_crit(vp);
1215 
1216         resp->status = NFS3_OK;
1217         vattr_to_post_op_attr(vap, &resp->resok.file_attributes);
1218         resp->resok.count = args->count - uiop->uio_resid;
1219         if (!error && offset + resp->resok.count == va.va_size)
1220                 resp->resok.eof = TRUE;
1221         else
1222                 resp->resok.eof = FALSE;
1223         resp->resok.data.data_len = resp->resok.count;
1224 
1225         if (mp)
1226                 rfs_rndup_mblks(mp, resp->resok.count, loaned_buffers);
1227 
1228         resp->resok.data.mp = mp;
1229         resp->resok.size = (uint_t)args->count;
1230 
1231         if (rdma_used) {
1232                 resp->resok.data.data_val = (caddr_t)iov.iov_base;
1233                 if (!rdma_setup_read_data3(args, &(resp->resok))) {
1234                         resp->status = NFS3ERR_INVAL;
1235                 }
1236         } else {
1237                 resp->resok.data.data_val = (caddr_t)mp->b_datap->db_base;
1238                 (resp->resok).wlist = NULL;
1239         }
1240 
1241 done:
1242         DTRACE_NFSV3_5(op__read__done, struct svc_req *, req,
1243             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
1244             READ3res *, resp);
1245 
1246         VN_RELE(vp);
1247 
1248         if (iovp != NULL)
1249                 kmem_free(iovp, iovcnt * sizeof (struct iovec));
1250 
1251         return;
1252 
1253 out:
1254         if (curthread->t_flag & T_WOULDBLOCK) {
1255                 curthread->t_flag &= ~T_WOULDBLOCK;
1256                 resp->status = NFS3ERR_JUKEBOX;
1257         } else
1258                 resp->status = puterrno3(error);
1259 out1:
1260         DTRACE_NFSV3_5(op__read__done, struct svc_req *, req,
1261             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
1262             READ3res *, resp);
1263 
1264         if (vp != NULL) {
1265                 if (need_rwunlock)
1266                         VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
1267                 if (in_crit)
1268                         nbl_end_crit(vp);
1269                 VN_RELE(vp);
1270         }
1271         vattr_to_post_op_attr(vap, &resp->resfail.file_attributes);
1272 
1273         if (iovp != NULL)
1274                 kmem_free(iovp, iovcnt * sizeof (struct iovec));
1275 }
1276 
1277 void
1278 rfs3_read_free(READ3res *resp)
1279 {
1280         mblk_t *mp;
1281 
1282         if (resp->status == NFS3_OK) {
1283                 mp = resp->resok.data.mp;
1284                 if (mp != NULL)
1285                         freemsg(mp);
1286         }
1287 }
1288 
1289 void *
1290 rfs3_read_getfh(READ3args *args)
1291 {
1292 
1293         return (&args->file);
1294 }
1295 
1296 #define MAX_IOVECS      12
1297 
1298 #ifdef DEBUG
1299 static int rfs3_write_hits = 0;
1300 static int rfs3_write_misses = 0;
1301 #endif
1302 
1303 void
1304 rfs3_write(WRITE3args *args, WRITE3res *resp, struct exportinfo *exi,
1305     struct svc_req *req, cred_t *cr, bool_t ro)
1306 {
1307         nfs3_srv_t *ns;
1308         int error;
1309         vnode_t *vp;
1310         struct vattr *bvap = NULL;
1311         struct vattr bva;
1312         struct vattr *avap = NULL;
1313         struct vattr ava;
1314         u_offset_t rlimit;
1315         struct uio uio;
1316         struct iovec iov[MAX_IOVECS];
1317         mblk_t *m;
1318         struct iovec *iovp;
1319         int iovcnt;
1320         int ioflag;
1321         cred_t *savecred;
1322         int in_crit = 0;
1323         int rwlock_ret = -1;
1324         caller_context_t ct;
1325 
1326         vp = nfs3_fhtovp(&args->file, exi);
1327 
1328         DTRACE_NFSV3_5(op__write__start, struct svc_req *, req,
1329             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
1330             WRITE3args *, args);
1331 
1332         if (vp == NULL) {
1333                 error = ESTALE;
1334                 goto err;
1335         }
1336 
1337         ASSERT3P(curzone, ==, exi->exi_zone); /* exi is guaranteed non-NULL. */
1338         ns = nfs3_get_srv();
1339 
1340         if (is_system_labeled()) {
1341                 bslabel_t *clabel = req->rq_label;
1342 
1343                 ASSERT(clabel != NULL);
1344                 DTRACE_PROBE2(tx__rfs3__log__info__opwrite__clabel, char *,
1345                     "got client label from request(1)", struct svc_req *, req);
1346 
1347                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
1348                         if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
1349                             exi)) {
1350                                 resp->status = NFS3ERR_ACCES;
1351                                 goto err1;
1352                         }
1353                 }
1354         }
1355 
1356         ct.cc_sysid = 0;
1357         ct.cc_pid = 0;
1358         ct.cc_caller_id = nfs3_srv_caller_id;
1359         ct.cc_flags = CC_DONTBLOCK;
1360 
1361         /*
1362          * We have to enter the critical region before calling VOP_RWLOCK
1363          * to avoid a deadlock with ufs.
1364          */
1365         if (nbl_need_check(vp)) {
1366                 nbl_start_crit(vp, RW_READER);
1367                 in_crit = 1;
1368                 if (nbl_conflict(vp, NBL_WRITE, args->offset, args->count, 0,
1369                     NULL)) {
1370                         error = EACCES;
1371                         goto err;
1372                 }
1373         }
1374 
1375         rwlock_ret = VOP_RWLOCK(vp, V_WRITELOCK_TRUE, &ct);
1376 
1377         /* check if a monitor detected a delegation conflict */
1378         if (rwlock_ret == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1379                 resp->status = NFS3ERR_JUKEBOX;
1380                 rwlock_ret = -1;
1381                 goto err1;
1382         }
1383 
1384 
1385         bva.va_mask = AT_ALL;
1386         error = VOP_GETATTR(vp, &bva, 0, cr, &ct);
1387 
1388         /*
1389          * If we can't get the attributes, then we can't do the
1390          * right access checking.  So, we'll fail the request.
1391          */
1392         if (error)
1393                 goto err;
1394 
1395         bvap = &bva;
1396         avap = bvap;
1397 
1398         if (args->count != args->data.data_len) {
1399                 resp->status = NFS3ERR_INVAL;
1400                 goto err1;
1401         }
1402 
1403         if (rdonly(ro, vp)) {
1404                 resp->status = NFS3ERR_ROFS;
1405                 goto err1;
1406         }
1407 
1408         if (vp->v_type != VREG) {
1409                 resp->status = NFS3ERR_INVAL;
1410                 goto err1;
1411         }
1412 
1413         if (crgetuid(cr) != bva.va_uid &&
1414             (error = VOP_ACCESS(vp, VWRITE, 0, cr, &ct)))
1415                 goto err;
1416 
1417         if (MANDLOCK(vp, bva.va_mode)) {
1418                 resp->status = NFS3ERR_ACCES;
1419                 goto err1;
1420         }
1421 
1422         if (args->count == 0) {
1423                 resp->status = NFS3_OK;
1424                 vattr_to_wcc_data(bvap, avap, &resp->resok.file_wcc);
1425                 resp->resok.count = 0;
1426                 resp->resok.committed = args->stable;
1427                 resp->resok.verf = ns->write3verf;
1428                 goto out;
1429         }
1430 
1431         if (args->mblk != NULL) {
1432                 iovcnt = 0;
1433                 for (m = args->mblk; m != NULL; m = m->b_cont)
1434                         iovcnt++;
1435                 if (iovcnt <= MAX_IOVECS) {
1436 #ifdef DEBUG
1437                         rfs3_write_hits++;
1438 #endif
1439                         iovp = iov;
1440                 } else {
1441 #ifdef DEBUG
1442                         rfs3_write_misses++;
1443 #endif
1444                         iovp = kmem_alloc(sizeof (*iovp) * iovcnt, KM_SLEEP);
1445                 }
1446                 mblk_to_iov(args->mblk, iovcnt, iovp);
1447 
1448         } else if (args->rlist != NULL) {
1449                 iovcnt = 1;
1450                 iovp = iov;
1451                 iovp->iov_base = (char *)((args->rlist)->u.c_daddr3);
1452                 iovp->iov_len = args->count;
1453         } else {
1454                 iovcnt = 1;
1455                 iovp = iov;
1456                 iovp->iov_base = args->data.data_val;
1457                 iovp->iov_len = args->count;
1458         }
1459 
1460         uio.uio_iov = iovp;
1461         uio.uio_iovcnt = iovcnt;
1462 
1463         uio.uio_segflg = UIO_SYSSPACE;
1464         uio.uio_extflg = UIO_COPY_DEFAULT;
1465         uio.uio_loffset = args->offset;
1466         uio.uio_resid = args->count;
1467         uio.uio_llimit = curproc->p_fsz_ctl;
1468         rlimit = uio.uio_llimit - args->offset;
1469         if (rlimit < (u_offset_t)uio.uio_resid)
1470                 uio.uio_resid = (int)rlimit;
1471 
1472         if (args->stable == UNSTABLE)
1473                 ioflag = 0;
1474         else if (args->stable == FILE_SYNC)
1475                 ioflag = FSYNC;
1476         else if (args->stable == DATA_SYNC)
1477                 ioflag = FDSYNC;
1478         else {
1479                 if (iovp != iov)
1480                         kmem_free(iovp, sizeof (*iovp) * iovcnt);
1481                 resp->status = NFS3ERR_INVAL;
1482                 goto err1;
1483         }
1484 
1485         /*
1486          * We're changing creds because VM may fault and we need
1487          * the cred of the current thread to be used if quota
1488          * checking is enabled.
1489          */
1490         savecred = curthread->t_cred;
1491         curthread->t_cred = cr;
1492         error = VOP_WRITE(vp, &uio, ioflag, cr, &ct);
1493         curthread->t_cred = savecred;
1494 
1495         if (iovp != iov)
1496                 kmem_free(iovp, sizeof (*iovp) * iovcnt);
1497 
1498         /* check if a monitor detected a delegation conflict */
1499         if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1500                 resp->status = NFS3ERR_JUKEBOX;
1501                 goto err1;
1502         }
1503 
1504         ava.va_mask = AT_ALL;
1505         avap = VOP_GETATTR(vp, &ava, 0, cr, &ct) ? NULL : &ava;
1506 
1507         if (error)
1508                 goto err;
1509 
1510         /*
1511          * If we were unable to get the V_WRITELOCK_TRUE, then we
1512          * may not have accurate after attrs, so check if
1513          * we have both attributes, they have a non-zero va_seq, and
1514          * va_seq has changed by exactly one,
1515          * if not, turn off the before attr.
1516          */
1517         if (rwlock_ret != V_WRITELOCK_TRUE) {
1518                 if (bvap == NULL || avap == NULL ||
1519                     bvap->va_seq == 0 || avap->va_seq == 0 ||
1520                     avap->va_seq != (bvap->va_seq + 1)) {
1521                         bvap = NULL;
1522                 }
1523         }
1524 
1525         resp->status = NFS3_OK;
1526         vattr_to_wcc_data(bvap, avap, &resp->resok.file_wcc);
1527         resp->resok.count = args->count - uio.uio_resid;
1528         resp->resok.committed = args->stable;
1529         resp->resok.verf = ns->write3verf;
1530         goto out;
1531 
1532 err:
1533         if (curthread->t_flag & T_WOULDBLOCK) {
1534                 curthread->t_flag &= ~T_WOULDBLOCK;
1535                 resp->status = NFS3ERR_JUKEBOX;
1536         } else
1537                 resp->status = puterrno3(error);
1538 err1:
1539         vattr_to_wcc_data(bvap, avap, &resp->resfail.file_wcc);
1540 out:
1541         DTRACE_NFSV3_5(op__write__done, struct svc_req *, req,
1542             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
1543             WRITE3res *, resp);
1544 
1545         if (vp != NULL) {
1546                 if (rwlock_ret != -1)
1547                         VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, &ct);
1548                 if (in_crit)
1549                         nbl_end_crit(vp);
1550                 VN_RELE(vp);
1551         }
1552 }
1553 
1554 void *
1555 rfs3_write_getfh(WRITE3args *args)
1556 {
1557 
1558         return (&args->file);
1559 }
1560 
1561 void
1562 rfs3_create(CREATE3args *args, CREATE3res *resp, struct exportinfo *exi,
1563     struct svc_req *req, cred_t *cr, bool_t ro)
1564 {
1565         int error;
1566         int in_crit = 0;
1567         vnode_t *vp;
1568         vnode_t *tvp = NULL;
1569         vnode_t *dvp;
1570         struct vattr *vap;
1571         struct vattr va;
1572         struct vattr *dbvap;
1573         struct vattr dbva;
1574         struct vattr *davap;
1575         struct vattr dava;
1576         enum vcexcl excl;
1577         nfstime3 *mtime;
1578         len_t reqsize;
1579         bool_t trunc;
1580         struct sockaddr *ca;
1581         char *name = NULL;
1582 
1583         dbvap = NULL;
1584         davap = NULL;
1585 
1586         dvp = nfs3_fhtovp(&args->where.dir, exi);
1587 
1588         DTRACE_NFSV3_5(op__create__start, struct svc_req *, req,
1589             cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
1590             CREATE3args *, args);
1591 
1592         if (dvp == NULL) {
1593                 error = ESTALE;
1594                 goto out;
1595         }
1596 
1597         dbva.va_mask = AT_ALL;
1598         dbvap = VOP_GETATTR(dvp, &dbva, 0, cr, NULL) ? NULL : &dbva;
1599         davap = dbvap;
1600 
1601         if (args->where.name == nfs3nametoolong) {
1602                 resp->status = NFS3ERR_NAMETOOLONG;
1603                 goto out1;
1604         }
1605 
1606         if (args->where.name == NULL || *(args->where.name) == '\0') {
1607                 resp->status = NFS3ERR_ACCES;
1608                 goto out1;
1609         }
1610 
1611         if (rdonly(ro, dvp)) {
1612                 resp->status = NFS3ERR_ROFS;
1613                 goto out1;
1614         }
1615 
1616         if (is_system_labeled()) {
1617                 bslabel_t *clabel = req->rq_label;
1618 
1619                 ASSERT(clabel != NULL);
1620                 DTRACE_PROBE2(tx__rfs3__log__info__opcreate__clabel, char *,
1621                     "got client label from request(1)", struct svc_req *, req);
1622 
1623                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
1624                         if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
1625                             exi)) {
1626                                 resp->status = NFS3ERR_ACCES;
1627                                 goto out1;
1628                         }
1629                 }
1630         }
1631 
1632         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
1633         name = nfscmd_convname(ca, exi, args->where.name,
1634             NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
1635 
1636         if (name == NULL) {
1637                 /* This is really a Solaris EILSEQ */
1638                 resp->status = NFS3ERR_INVAL;
1639                 goto out1;
1640         }
1641 
1642         if (args->how.mode == EXCLUSIVE) {
1643                 va.va_mask = AT_TYPE | AT_MODE | AT_MTIME;
1644                 va.va_type = VREG;
1645                 va.va_mode = (mode_t)0;
1646                 /*
1647                  * Ensure no time overflows and that types match
1648                  */
1649                 mtime = (nfstime3 *)&args->how.createhow3_u.verf;
1650                 va.va_mtime.tv_sec = mtime->seconds % INT32_MAX;
1651                 va.va_mtime.tv_nsec = mtime->nseconds;
1652                 excl = EXCL;
1653         } else {
1654                 error = sattr3_to_vattr(&args->how.createhow3_u.obj_attributes,
1655                     &va);
1656                 if (error)
1657                         goto out;
1658                 va.va_mask |= AT_TYPE;
1659                 va.va_type = VREG;
1660                 if (args->how.mode == GUARDED)
1661                         excl = EXCL;
1662                 else {
1663                         excl = NONEXCL;
1664 
1665                         /*
1666                          * During creation of file in non-exclusive mode
1667                          * if size of file is being set then make sure
1668                          * that if the file already exists that no conflicting
1669                          * non-blocking mandatory locks exists in the region
1670                          * being modified. If there are conflicting locks fail
1671                          * the operation with EACCES.
1672                          */
1673                         if (va.va_mask & AT_SIZE) {
1674                                 struct vattr tva;
1675 
1676                                 /*
1677                                  * Does file already exist?
1678                                  */
1679                                 error = VOP_LOOKUP(dvp, name, &tvp,
1680                                     NULL, 0, NULL, cr, NULL, NULL, NULL);
1681 
1682                                 /*
1683                                  * Check to see if the file has been delegated
1684                                  * to a v4 client.  If so, then begin recall of
1685                                  * the delegation and return JUKEBOX to allow
1686                                  * the client to retrasmit its request.
1687                                  */
1688 
1689                                 trunc = va.va_size == 0;
1690                                 if (!error &&
1691                                     rfs4_check_delegated(FWRITE, tvp, trunc)) {
1692                                         resp->status = NFS3ERR_JUKEBOX;
1693                                         goto out1;
1694                                 }
1695 
1696                                 /*
1697                                  * Check for NBMAND lock conflicts
1698                                  */
1699                                 if (!error && nbl_need_check(tvp)) {
1700                                         u_offset_t offset;
1701                                         ssize_t len;
1702 
1703                                         nbl_start_crit(tvp, RW_READER);
1704                                         in_crit = 1;
1705 
1706                                         tva.va_mask = AT_SIZE;
1707                                         error = VOP_GETATTR(tvp, &tva, 0, cr,
1708                                             NULL);
1709                                         /*
1710                                          * Can't check for conflicts, so return
1711                                          * error.
1712                                          */
1713                                         if (error)
1714                                                 goto out;
1715 
1716                                         offset = tva.va_size < va.va_size ?
1717                                             tva.va_size : va.va_size;
1718                                         len = tva.va_size < va.va_size ?
1719                                             va.va_size - tva.va_size :
1720                                             tva.va_size - va.va_size;
1721                                         if (nbl_conflict(tvp, NBL_WRITE,
1722                                             offset, len, 0, NULL)) {
1723                                                 error = EACCES;
1724                                                 goto out;
1725                                         }
1726                                 } else if (tvp) {
1727                                         VN_RELE(tvp);
1728                                         tvp = NULL;
1729                                 }
1730                         }
1731                 }
1732                 if (va.va_mask & AT_SIZE)
1733                         reqsize = va.va_size;
1734         }
1735 
1736         /*
1737          * Must specify the mode.
1738          */
1739         if (!(va.va_mask & AT_MODE)) {
1740                 resp->status = NFS3ERR_INVAL;
1741                 goto out1;
1742         }
1743 
1744         /*
1745          * If the filesystem is exported with nosuid, then mask off
1746          * the setuid and setgid bits.
1747          */
1748         if (va.va_type == VREG && (exi->exi_export.ex_flags & EX_NOSUID))
1749                 va.va_mode &= ~(VSUID | VSGID);
1750 
1751 tryagain:
1752         /*
1753          * The file open mode used is VWRITE.  If the client needs
1754          * some other semantic, then it should do the access checking
1755          * itself.  It would have been nice to have the file open mode
1756          * passed as part of the arguments.
1757          */
1758         error = VOP_CREATE(dvp, name, &va, excl, VWRITE,
1759             &vp, cr, 0, NULL, NULL);
1760 
1761         dava.va_mask = AT_ALL;
1762         davap = VOP_GETATTR(dvp, &dava, 0, cr, NULL) ? NULL : &dava;
1763 
1764         if (error) {
1765                 /*
1766                  * If we got something other than file already exists
1767                  * then just return this error.  Otherwise, we got
1768                  * EEXIST.  If we were doing a GUARDED create, then
1769                  * just return this error.  Otherwise, we need to
1770                  * make sure that this wasn't a duplicate of an
1771                  * exclusive create request.
1772                  *
1773                  * The assumption is made that a non-exclusive create
1774                  * request will never return EEXIST.
1775                  */
1776                 if (error != EEXIST || args->how.mode == GUARDED)
1777                         goto out;
1778                 /*
1779                  * Lookup the file so that we can get a vnode for it.
1780                  */
1781                 error = VOP_LOOKUP(dvp, name, &vp, NULL, 0,
1782                     NULL, cr, NULL, NULL, NULL);
1783                 if (error) {
1784                         /*
1785                          * We couldn't find the file that we thought that
1786                          * we just created.  So, we'll just try creating
1787                          * it again.
1788                          */
1789                         if (error == ENOENT)
1790                                 goto tryagain;
1791                         goto out;
1792                 }
1793 
1794                 /*
1795                  * If the file is delegated to a v4 client, go ahead
1796                  * and initiate recall, this create is a hint that a
1797                  * conflicting v3 open has occurred.
1798                  */
1799 
1800                 if (rfs4_check_delegated(FWRITE, vp, FALSE)) {
1801                         VN_RELE(vp);
1802                         resp->status = NFS3ERR_JUKEBOX;
1803                         goto out1;
1804                 }
1805 
1806                 va.va_mask = AT_ALL;
1807                 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
1808 
1809                 mtime = (nfstime3 *)&args->how.createhow3_u.verf;
1810                 /* % with INT32_MAX to prevent overflows */
1811                 if (args->how.mode == EXCLUSIVE && (vap == NULL ||
1812                     vap->va_mtime.tv_sec !=
1813                     (mtime->seconds % INT32_MAX) ||
1814                     vap->va_mtime.tv_nsec != mtime->nseconds)) {
1815                         VN_RELE(vp);
1816                         error = EEXIST;
1817                         goto out;
1818                 }
1819         } else {
1820 
1821                 if ((args->how.mode == UNCHECKED ||
1822                     args->how.mode == GUARDED) &&
1823                     args->how.createhow3_u.obj_attributes.size.set_it &&
1824                     va.va_size == 0)
1825                         trunc = TRUE;
1826                 else
1827                         trunc = FALSE;
1828 
1829                 if (rfs4_check_delegated(FWRITE, vp, trunc)) {
1830                         VN_RELE(vp);
1831                         resp->status = NFS3ERR_JUKEBOX;
1832                         goto out1;
1833                 }
1834 
1835                 va.va_mask = AT_ALL;
1836                 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
1837 
1838                 /*
1839                  * We need to check to make sure that the file got
1840                  * created to the indicated size.  If not, we do a
1841                  * setattr to try to change the size, but we don't
1842                  * try too hard.  This shouldn't a problem as most
1843                  * clients will only specifiy a size of zero which
1844                  * local file systems handle.  However, even if
1845                  * the client does specify a non-zero size, it can
1846                  * still recover by checking the size of the file
1847                  * after it has created it and then issue a setattr
1848                  * request of its own to set the size of the file.
1849                  */
1850                 if (vap != NULL &&
1851                     (args->how.mode == UNCHECKED ||
1852                     args->how.mode == GUARDED) &&
1853                     args->how.createhow3_u.obj_attributes.size.set_it &&
1854                     vap->va_size != reqsize) {
1855                         va.va_mask = AT_SIZE;
1856                         va.va_size = reqsize;
1857                         (void) VOP_SETATTR(vp, &va, 0, cr, NULL);
1858                         va.va_mask = AT_ALL;
1859                         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
1860                 }
1861         }
1862 
1863         if (name != args->where.name)
1864                 kmem_free(name, MAXPATHLEN + 1);
1865 
1866         error = makefh3(&resp->resok.obj.handle, vp, exi);
1867         if (error)
1868                 resp->resok.obj.handle_follows = FALSE;
1869         else
1870                 resp->resok.obj.handle_follows = TRUE;
1871 
1872         /*
1873          * Force modified data and metadata out to stable storage.
1874          */
1875         (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
1876         (void) VOP_FSYNC(dvp, 0, cr, NULL);
1877 
1878         VN_RELE(vp);
1879         if (tvp != NULL) {
1880                 if (in_crit)
1881                         nbl_end_crit(tvp);
1882                 VN_RELE(tvp);
1883         }
1884 
1885         resp->status = NFS3_OK;
1886         vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
1887         vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
1888 
1889         DTRACE_NFSV3_5(op__create__done, struct svc_req *, req,
1890             cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
1891             CREATE3res *, resp);
1892 
1893         VN_RELE(dvp);
1894         return;
1895 
1896 out:
1897         if (curthread->t_flag & T_WOULDBLOCK) {
1898                 curthread->t_flag &= ~T_WOULDBLOCK;
1899                 resp->status = NFS3ERR_JUKEBOX;
1900         } else
1901                 resp->status = puterrno3(error);
1902 out1:
1903         DTRACE_NFSV3_5(op__create__done, struct svc_req *, req,
1904             cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
1905             CREATE3res *, resp);
1906 
1907         if (name != NULL && name != args->where.name)
1908                 kmem_free(name, MAXPATHLEN + 1);
1909 
1910         if (tvp != NULL) {
1911                 if (in_crit)
1912                         nbl_end_crit(tvp);
1913                 VN_RELE(tvp);
1914         }
1915         if (dvp != NULL)
1916                 VN_RELE(dvp);
1917         vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc);
1918 }
1919 
1920 void *
1921 rfs3_create_getfh(CREATE3args *args)
1922 {
1923 
1924         return (&args->where.dir);
1925 }
1926 
1927 void
1928 rfs3_mkdir(MKDIR3args *args, MKDIR3res *resp, struct exportinfo *exi,
1929     struct svc_req *req, cred_t *cr, bool_t ro)
1930 {
1931         int error;
1932         vnode_t *vp = NULL;
1933         vnode_t *dvp;
1934         struct vattr *vap;
1935         struct vattr va;
1936         struct vattr *dbvap;
1937         struct vattr dbva;
1938         struct vattr *davap;
1939         struct vattr dava;
1940         struct sockaddr *ca;
1941         char *name = NULL;
1942 
1943         dbvap = NULL;
1944         davap = NULL;
1945 
1946         dvp = nfs3_fhtovp(&args->where.dir, exi);
1947 
1948         DTRACE_NFSV3_5(op__mkdir__start, struct svc_req *, req,
1949             cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
1950             MKDIR3args *, args);
1951 
1952         if (dvp == NULL) {
1953                 error = ESTALE;
1954                 goto out;
1955         }
1956 
1957         dbva.va_mask = AT_ALL;
1958         dbvap = VOP_GETATTR(dvp, &dbva, 0, cr, NULL) ? NULL : &dbva;
1959         davap = dbvap;
1960 
1961         if (args->where.name == nfs3nametoolong) {
1962                 resp->status = NFS3ERR_NAMETOOLONG;
1963                 goto out1;
1964         }
1965 
1966         if (args->where.name == NULL || *(args->where.name) == '\0') {
1967                 resp->status = NFS3ERR_ACCES;
1968                 goto out1;
1969         }
1970 
1971         if (rdonly(ro, dvp)) {
1972                 resp->status = NFS3ERR_ROFS;
1973                 goto out1;
1974         }
1975 
1976         if (is_system_labeled()) {
1977                 bslabel_t *clabel = req->rq_label;
1978 
1979                 ASSERT(clabel != NULL);
1980                 DTRACE_PROBE2(tx__rfs3__log__info__opmkdir__clabel, char *,
1981                     "got client label from request(1)", struct svc_req *, req);
1982 
1983                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
1984                         if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
1985                             exi)) {
1986                                 resp->status = NFS3ERR_ACCES;
1987                                 goto out1;
1988                         }
1989                 }
1990         }
1991 
1992         error = sattr3_to_vattr(&args->attributes, &va);
1993         if (error)
1994                 goto out;
1995 
1996         if (!(va.va_mask & AT_MODE)) {
1997                 resp->status = NFS3ERR_INVAL;
1998                 goto out1;
1999         }
2000 
2001         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2002         name = nfscmd_convname(ca, exi, args->where.name,
2003             NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2004 
2005         if (name == NULL) {
2006                 resp->status = NFS3ERR_INVAL;
2007                 goto out1;
2008         }
2009 
2010         va.va_mask |= AT_TYPE;
2011         va.va_type = VDIR;
2012 
2013         error = VOP_MKDIR(dvp, name, &va, &vp, cr, NULL, 0, NULL);
2014 
2015         if (name != args->where.name)
2016                 kmem_free(name, MAXPATHLEN + 1);
2017 
2018         dava.va_mask = AT_ALL;
2019         davap = VOP_GETATTR(dvp, &dava, 0, cr, NULL) ? NULL : &dava;
2020 
2021         /*
2022          * Force modified data and metadata out to stable storage.
2023          */
2024         (void) VOP_FSYNC(dvp, 0, cr, NULL);
2025 
2026         if (error)
2027                 goto out;
2028 
2029         error = makefh3(&resp->resok.obj.handle, vp, exi);
2030         if (error)
2031                 resp->resok.obj.handle_follows = FALSE;
2032         else
2033                 resp->resok.obj.handle_follows = TRUE;
2034 
2035         va.va_mask = AT_ALL;
2036         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
2037 
2038         /*
2039          * Force modified data and metadata out to stable storage.
2040          */
2041         (void) VOP_FSYNC(vp, 0, cr, NULL);
2042 
2043         VN_RELE(vp);
2044 
2045         resp->status = NFS3_OK;
2046         vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
2047         vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
2048 
2049         DTRACE_NFSV3_5(op__mkdir__done, struct svc_req *, req,
2050             cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
2051             MKDIR3res *, resp);
2052         VN_RELE(dvp);
2053 
2054         return;
2055 
2056 out:
2057         if (curthread->t_flag & T_WOULDBLOCK) {
2058                 curthread->t_flag &= ~T_WOULDBLOCK;
2059                 resp->status = NFS3ERR_JUKEBOX;
2060         } else
2061                 resp->status = puterrno3(error);
2062 out1:
2063         DTRACE_NFSV3_5(op__mkdir__done, struct svc_req *, req,
2064             cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
2065             MKDIR3res *, resp);
2066         if (dvp != NULL)
2067                 VN_RELE(dvp);
2068         vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc);
2069 }
2070 
2071 void *
2072 rfs3_mkdir_getfh(MKDIR3args *args)
2073 {
2074 
2075         return (&args->where.dir);
2076 }
2077 
2078 void
2079 rfs3_symlink(SYMLINK3args *args, SYMLINK3res *resp, struct exportinfo *exi,
2080     struct svc_req *req, cred_t *cr, bool_t ro)
2081 {
2082         int error;
2083         vnode_t *vp;
2084         vnode_t *dvp;
2085         struct vattr *vap;
2086         struct vattr va;
2087         struct vattr *dbvap;
2088         struct vattr dbva;
2089         struct vattr *davap;
2090         struct vattr dava;
2091         struct sockaddr *ca;
2092         char *name = NULL;
2093         char *symdata = NULL;
2094 
2095         dbvap = NULL;
2096         davap = NULL;
2097 
2098         dvp = nfs3_fhtovp(&args->where.dir, exi);
2099 
2100         DTRACE_NFSV3_5(op__symlink__start, struct svc_req *, req,
2101             cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
2102             SYMLINK3args *, args);
2103 
2104         if (dvp == NULL) {
2105                 error = ESTALE;
2106                 goto err;
2107         }
2108 
2109         dbva.va_mask = AT_ALL;
2110         dbvap = VOP_GETATTR(dvp, &dbva, 0, cr, NULL) ? NULL : &dbva;
2111         davap = dbvap;
2112 
2113         if (args->where.name == nfs3nametoolong) {
2114                 resp->status = NFS3ERR_NAMETOOLONG;
2115                 goto err1;
2116         }
2117 
2118         if (args->where.name == NULL || *(args->where.name) == '\0') {
2119                 resp->status = NFS3ERR_ACCES;
2120                 goto err1;
2121         }
2122 
2123         if (rdonly(ro, dvp)) {
2124                 resp->status = NFS3ERR_ROFS;
2125                 goto err1;
2126         }
2127 
2128         if (is_system_labeled()) {
2129                 bslabel_t *clabel = req->rq_label;
2130 
2131                 ASSERT(clabel != NULL);
2132                 DTRACE_PROBE2(tx__rfs3__log__info__opsymlink__clabel, char *,
2133                     "got client label from request(1)", struct svc_req *, req);
2134 
2135                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2136                         if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
2137                             exi)) {
2138                                 resp->status = NFS3ERR_ACCES;
2139                                 goto err1;
2140                         }
2141                 }
2142         }
2143 
2144         error = sattr3_to_vattr(&args->symlink.symlink_attributes, &va);
2145         if (error)
2146                 goto err;
2147 
2148         if (!(va.va_mask & AT_MODE)) {
2149                 resp->status = NFS3ERR_INVAL;
2150                 goto err1;
2151         }
2152 
2153         if (args->symlink.symlink_data == nfs3nametoolong) {
2154                 resp->status = NFS3ERR_NAMETOOLONG;
2155                 goto err1;
2156         }
2157 
2158         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2159         name = nfscmd_convname(ca, exi, args->where.name,
2160             NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2161 
2162         if (name == NULL) {
2163                 /* This is really a Solaris EILSEQ */
2164                 resp->status = NFS3ERR_INVAL;
2165                 goto err1;
2166         }
2167 
2168         symdata = nfscmd_convname(ca, exi, args->symlink.symlink_data,
2169             NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2170         if (symdata == NULL) {
2171                 /* This is really a Solaris EILSEQ */
2172                 resp->status = NFS3ERR_INVAL;
2173                 goto err1;
2174         }
2175 
2176 
2177         va.va_mask |= AT_TYPE;
2178         va.va_type = VLNK;
2179 
2180         error = VOP_SYMLINK(dvp, name, &va, symdata, cr, NULL, 0);
2181 
2182         dava.va_mask = AT_ALL;
2183         davap = VOP_GETATTR(dvp, &dava, 0, cr, NULL) ? NULL : &dava;
2184 
2185         if (error)
2186                 goto err;
2187 
2188         error = VOP_LOOKUP(dvp, name, &vp, NULL, 0, NULL, cr,
2189             NULL, NULL, NULL);
2190 
2191         /*
2192          * Force modified data and metadata out to stable storage.
2193          */
2194         (void) VOP_FSYNC(dvp, 0, cr, NULL);
2195 
2196 
2197         resp->status = NFS3_OK;
2198         if (error) {
2199                 resp->resok.obj.handle_follows = FALSE;
2200                 vattr_to_post_op_attr(NULL, &resp->resok.obj_attributes);
2201                 vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
2202                 goto out;
2203         }
2204 
2205         error = makefh3(&resp->resok.obj.handle, vp, exi);
2206         if (error)
2207                 resp->resok.obj.handle_follows = FALSE;
2208         else
2209                 resp->resok.obj.handle_follows = TRUE;
2210 
2211         va.va_mask = AT_ALL;
2212         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
2213 
2214         /*
2215          * Force modified data and metadata out to stable storage.
2216          */
2217         (void) VOP_FSYNC(vp, 0, cr, NULL);
2218 
2219         VN_RELE(vp);
2220 
2221         vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
2222         vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
2223         goto out;
2224 
2225 err:
2226         if (curthread->t_flag & T_WOULDBLOCK) {
2227                 curthread->t_flag &= ~T_WOULDBLOCK;
2228                 resp->status = NFS3ERR_JUKEBOX;
2229         } else
2230                 resp->status = puterrno3(error);
2231 err1:
2232         vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc);
2233 out:
2234         if (name != NULL && name != args->where.name)
2235                 kmem_free(name, MAXPATHLEN + 1);
2236         if (symdata != NULL && symdata != args->symlink.symlink_data)
2237                 kmem_free(symdata, MAXPATHLEN + 1);
2238 
2239         DTRACE_NFSV3_5(op__symlink__done, struct svc_req *, req,
2240             cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
2241             SYMLINK3res *, resp);
2242 
2243         if (dvp != NULL)
2244                 VN_RELE(dvp);
2245 }
2246 
2247 void *
2248 rfs3_symlink_getfh(SYMLINK3args *args)
2249 {
2250 
2251         return (&args->where.dir);
2252 }
2253 
2254 void
2255 rfs3_mknod(MKNOD3args *args, MKNOD3res *resp, struct exportinfo *exi,
2256     struct svc_req *req, cred_t *cr, bool_t ro)
2257 {
2258         int error;
2259         vnode_t *vp;
2260         vnode_t *realvp;
2261         vnode_t *dvp;
2262         struct vattr *vap;
2263         struct vattr va;
2264         struct vattr *dbvap;
2265         struct vattr dbva;
2266         struct vattr *davap;
2267         struct vattr dava;
2268         int mode;
2269         enum vcexcl excl;
2270         struct sockaddr *ca;
2271         char *name = NULL;
2272 
2273         dbvap = NULL;
2274         davap = NULL;
2275 
2276         dvp = nfs3_fhtovp(&args->where.dir, exi);
2277 
2278         DTRACE_NFSV3_5(op__mknod__start, struct svc_req *, req,
2279             cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
2280             MKNOD3args *, args);
2281 
2282         if (dvp == NULL) {
2283                 error = ESTALE;
2284                 goto out;
2285         }
2286 
2287         dbva.va_mask = AT_ALL;
2288         dbvap = VOP_GETATTR(dvp, &dbva, 0, cr, NULL) ? NULL : &dbva;
2289         davap = dbvap;
2290 
2291         if (args->where.name == nfs3nametoolong) {
2292                 resp->status = NFS3ERR_NAMETOOLONG;
2293                 goto out1;
2294         }
2295 
2296         if (args->where.name == NULL || *(args->where.name) == '\0') {
2297                 resp->status = NFS3ERR_ACCES;
2298                 goto out1;
2299         }
2300 
2301         if (rdonly(ro, dvp)) {
2302                 resp->status = NFS3ERR_ROFS;
2303                 goto out1;
2304         }
2305 
2306         if (is_system_labeled()) {
2307                 bslabel_t *clabel = req->rq_label;
2308 
2309                 ASSERT(clabel != NULL);
2310                 DTRACE_PROBE2(tx__rfs3__log__info__opmknod__clabel, char *,
2311                     "got client label from request(1)", struct svc_req *, req);
2312 
2313                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2314                         if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
2315                             exi)) {
2316                                 resp->status = NFS3ERR_ACCES;
2317                                 goto out1;
2318                         }
2319                 }
2320         }
2321 
2322         switch (args->what.type) {
2323         case NF3CHR:
2324         case NF3BLK:
2325                 error = sattr3_to_vattr(
2326                     &args->what.mknoddata3_u.device.dev_attributes, &va);
2327                 if (error)
2328                         goto out;
2329                 if (secpolicy_sys_devices(cr) != 0) {
2330                         resp->status = NFS3ERR_PERM;
2331                         goto out1;
2332                 }
2333                 if (args->what.type == NF3CHR)
2334                         va.va_type = VCHR;
2335                 else
2336                         va.va_type = VBLK;
2337                 va.va_rdev = makedevice(
2338                     args->what.mknoddata3_u.device.spec.specdata1,
2339                     args->what.mknoddata3_u.device.spec.specdata2);
2340                 va.va_mask |= AT_TYPE | AT_RDEV;
2341                 break;
2342         case NF3SOCK:
2343                 error = sattr3_to_vattr(
2344                     &args->what.mknoddata3_u.pipe_attributes, &va);
2345                 if (error)
2346                         goto out;
2347                 va.va_type = VSOCK;
2348                 va.va_mask |= AT_TYPE;
2349                 break;
2350         case NF3FIFO:
2351                 error = sattr3_to_vattr(
2352                     &args->what.mknoddata3_u.pipe_attributes, &va);
2353                 if (error)
2354                         goto out;
2355                 va.va_type = VFIFO;
2356                 va.va_mask |= AT_TYPE;
2357                 break;
2358         default:
2359                 resp->status = NFS3ERR_BADTYPE;
2360                 goto out1;
2361         }
2362 
2363         /*
2364          * Must specify the mode.
2365          */
2366         if (!(va.va_mask & AT_MODE)) {
2367                 resp->status = NFS3ERR_INVAL;
2368                 goto out1;
2369         }
2370 
2371         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2372         name = nfscmd_convname(ca, exi, args->where.name,
2373             NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2374 
2375         if (name == NULL) {
2376                 resp->status = NFS3ERR_INVAL;
2377                 goto out1;
2378         }
2379 
2380         excl = EXCL;
2381 
2382         mode = 0;
2383 
2384         error = VOP_CREATE(dvp, name, &va, excl, mode,
2385             &vp, cr, 0, NULL, NULL);
2386 
2387         if (name != args->where.name)
2388                 kmem_free(name, MAXPATHLEN + 1);
2389 
2390         dava.va_mask = AT_ALL;
2391         davap = VOP_GETATTR(dvp, &dava, 0, cr, NULL) ? NULL : &dava;
2392 
2393         /*
2394          * Force modified data and metadata out to stable storage.
2395          */
2396         (void) VOP_FSYNC(dvp, 0, cr, NULL);
2397 
2398         if (error)
2399                 goto out;
2400 
2401         resp->status = NFS3_OK;
2402 
2403         error = makefh3(&resp->resok.obj.handle, vp, exi);
2404         if (error)
2405                 resp->resok.obj.handle_follows = FALSE;
2406         else
2407                 resp->resok.obj.handle_follows = TRUE;
2408 
2409         va.va_mask = AT_ALL;
2410         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
2411 
2412         /*
2413          * Force modified metadata out to stable storage.
2414          *
2415          * if a underlying vp exists, pass it to VOP_FSYNC
2416          */
2417         if (VOP_REALVP(vp, &realvp, NULL) == 0)
2418                 (void) VOP_FSYNC(realvp, FNODSYNC, cr, NULL);
2419         else
2420                 (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
2421 
2422         VN_RELE(vp);
2423 
2424         vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
2425         vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
2426         DTRACE_NFSV3_5(op__mknod__done, struct svc_req *, req,
2427             cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
2428             MKNOD3res *, resp);
2429         VN_RELE(dvp);
2430         return;
2431 
2432 out:
2433         if (curthread->t_flag & T_WOULDBLOCK) {
2434                 curthread->t_flag &= ~T_WOULDBLOCK;
2435                 resp->status = NFS3ERR_JUKEBOX;
2436         } else
2437                 resp->status = puterrno3(error);
2438 out1:
2439         DTRACE_NFSV3_5(op__mknod__done, struct svc_req *, req,
2440             cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
2441             MKNOD3res *, resp);
2442         if (dvp != NULL)
2443                 VN_RELE(dvp);
2444         vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc);
2445 }
2446 
2447 void *
2448 rfs3_mknod_getfh(MKNOD3args *args)
2449 {
2450 
2451         return (&args->where.dir);
2452 }
2453 
2454 void
2455 rfs3_remove(REMOVE3args *args, REMOVE3res *resp, struct exportinfo *exi,
2456     struct svc_req *req, cred_t *cr, bool_t ro)
2457 {
2458         int error = 0;
2459         vnode_t *vp;
2460         struct vattr *bvap;
2461         struct vattr bva;
2462         struct vattr *avap;
2463         struct vattr ava;
2464         vnode_t *targvp = NULL;
2465         struct sockaddr *ca;
2466         char *name = NULL;
2467 
2468         bvap = NULL;
2469         avap = NULL;
2470 
2471         vp = nfs3_fhtovp(&args->object.dir, exi);
2472 
2473         DTRACE_NFSV3_5(op__remove__start, struct svc_req *, req,
2474             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
2475             REMOVE3args *, args);
2476 
2477         if (vp == NULL) {
2478                 error = ESTALE;
2479                 goto err;
2480         }
2481 
2482         bva.va_mask = AT_ALL;
2483         bvap = VOP_GETATTR(vp, &bva, 0, cr, NULL) ? NULL : &bva;
2484         avap = bvap;
2485 
2486         if (vp->v_type != VDIR) {
2487                 resp->status = NFS3ERR_NOTDIR;
2488                 goto err1;
2489         }
2490 
2491         if (args->object.name == nfs3nametoolong) {
2492                 resp->status = NFS3ERR_NAMETOOLONG;
2493                 goto err1;
2494         }
2495 
2496         if (args->object.name == NULL || *(args->object.name) == '\0') {
2497                 resp->status = NFS3ERR_ACCES;
2498                 goto err1;
2499         }
2500 
2501         if (rdonly(ro, vp)) {
2502                 resp->status = NFS3ERR_ROFS;
2503                 goto err1;
2504         }
2505 
2506         if (is_system_labeled()) {
2507                 bslabel_t *clabel = req->rq_label;
2508 
2509                 ASSERT(clabel != NULL);
2510                 DTRACE_PROBE2(tx__rfs3__log__info__opremove__clabel, char *,
2511                     "got client label from request(1)", struct svc_req *, req);
2512 
2513                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2514                         if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
2515                             exi)) {
2516                                 resp->status = NFS3ERR_ACCES;
2517                                 goto err1;
2518                         }
2519                 }
2520         }
2521 
2522         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2523         name = nfscmd_convname(ca, exi, args->object.name,
2524             NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2525 
2526         if (name == NULL) {
2527                 resp->status = NFS3ERR_INVAL;
2528                 goto err1;
2529         }
2530 
2531         /*
2532          * Check for a conflict with a non-blocking mandatory share
2533          * reservation and V4 delegations
2534          */
2535         error = VOP_LOOKUP(vp, name, &targvp, NULL, 0,
2536             NULL, cr, NULL, NULL, NULL);
2537         if (error != 0)
2538                 goto err;
2539 
2540         if (rfs4_check_delegated(FWRITE, targvp, TRUE)) {
2541                 resp->status = NFS3ERR_JUKEBOX;
2542                 goto err1;
2543         }
2544 
2545         if (!nbl_need_check(targvp)) {
2546                 error = VOP_REMOVE(vp, name, cr, NULL, 0);
2547         } else {
2548                 nbl_start_crit(targvp, RW_READER);
2549                 if (nbl_conflict(targvp, NBL_REMOVE, 0, 0, 0, NULL)) {
2550                         error = EACCES;
2551                 } else {
2552                         error = VOP_REMOVE(vp, name, cr, NULL, 0);
2553                 }
2554                 nbl_end_crit(targvp);
2555         }
2556         VN_RELE(targvp);
2557         targvp = NULL;
2558 
2559         ava.va_mask = AT_ALL;
2560         avap = VOP_GETATTR(vp, &ava, 0, cr, NULL) ? NULL : &ava;
2561 
2562         /*
2563          * Force modified data and metadata out to stable storage.
2564          */
2565         (void) VOP_FSYNC(vp, 0, cr, NULL);
2566 
2567         if (error)
2568                 goto err;
2569 
2570         resp->status = NFS3_OK;
2571         vattr_to_wcc_data(bvap, avap, &resp->resok.dir_wcc);
2572         goto out;
2573 
2574 err:
2575         if (curthread->t_flag & T_WOULDBLOCK) {
2576                 curthread->t_flag &= ~T_WOULDBLOCK;
2577                 resp->status = NFS3ERR_JUKEBOX;
2578         } else
2579                 resp->status = puterrno3(error);
2580 err1:
2581         vattr_to_wcc_data(bvap, avap, &resp->resfail.dir_wcc);
2582 out:
2583         DTRACE_NFSV3_5(op__remove__done, struct svc_req *, req,
2584             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
2585             REMOVE3res *, resp);
2586 
2587         if (name != NULL && name != args->object.name)
2588                 kmem_free(name, MAXPATHLEN + 1);
2589 
2590         if (vp != NULL)
2591                 VN_RELE(vp);
2592 }
2593 
2594 void *
2595 rfs3_remove_getfh(REMOVE3args *args)
2596 {
2597 
2598         return (&args->object.dir);
2599 }
2600 
2601 void
2602 rfs3_rmdir(RMDIR3args *args, RMDIR3res *resp, struct exportinfo *exi,
2603     struct svc_req *req, cred_t *cr, bool_t ro)
2604 {
2605         int error;
2606         vnode_t *vp;
2607         struct vattr *bvap;
2608         struct vattr bva;
2609         struct vattr *avap;
2610         struct vattr ava;
2611         struct sockaddr *ca;
2612         char *name = NULL;
2613 
2614         bvap = NULL;
2615         avap = NULL;
2616 
2617         vp = nfs3_fhtovp(&args->object.dir, exi);
2618 
2619         DTRACE_NFSV3_5(op__rmdir__start, struct svc_req *, req,
2620             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
2621             RMDIR3args *, args);
2622 
2623         if (vp == NULL) {
2624                 error = ESTALE;
2625                 goto err;
2626         }
2627 
2628         bva.va_mask = AT_ALL;
2629         bvap = VOP_GETATTR(vp, &bva, 0, cr, NULL) ? NULL : &bva;
2630         avap = bvap;
2631 
2632         if (vp->v_type != VDIR) {
2633                 resp->status = NFS3ERR_NOTDIR;
2634                 goto err1;
2635         }
2636 
2637         if (args->object.name == nfs3nametoolong) {
2638                 resp->status = NFS3ERR_NAMETOOLONG;
2639                 goto err1;
2640         }
2641 
2642         if (args->object.name == NULL || *(args->object.name) == '\0') {
2643                 resp->status = NFS3ERR_ACCES;
2644                 goto err1;
2645         }
2646 
2647         if (rdonly(ro, vp)) {
2648                 resp->status = NFS3ERR_ROFS;
2649                 goto err1;
2650         }
2651 
2652         if (is_system_labeled()) {
2653                 bslabel_t *clabel = req->rq_label;
2654 
2655                 ASSERT(clabel != NULL);
2656                 DTRACE_PROBE2(tx__rfs3__log__info__opremovedir__clabel, char *,
2657                     "got client label from request(1)", struct svc_req *, req);
2658 
2659                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2660                         if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
2661                             exi)) {
2662                                 resp->status = NFS3ERR_ACCES;
2663                                 goto err1;
2664                         }
2665                 }
2666         }
2667 
2668         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2669         name = nfscmd_convname(ca, exi, args->object.name,
2670             NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2671 
2672         if (name == NULL) {
2673                 resp->status = NFS3ERR_INVAL;
2674                 goto err1;
2675         }
2676 
2677         error = VOP_RMDIR(vp, name, ZONE_ROOTVP(), cr, NULL, 0);
2678 
2679         if (name != args->object.name)
2680                 kmem_free(name, MAXPATHLEN + 1);
2681 
2682         ava.va_mask = AT_ALL;
2683         avap = VOP_GETATTR(vp, &ava, 0, cr, NULL) ? NULL : &ava;
2684 
2685         /*
2686          * Force modified data and metadata out to stable storage.
2687          */
2688         (void) VOP_FSYNC(vp, 0, cr, NULL);
2689 
2690         if (error) {
2691                 /*
2692                  * System V defines rmdir to return EEXIST, not ENOTEMPTY,
2693                  * if the directory is not empty.  A System V NFS server
2694                  * needs to map NFS3ERR_EXIST to NFS3ERR_NOTEMPTY to transmit
2695                  * over the wire.
2696                  */
2697                 if (error == EEXIST)
2698                         error = ENOTEMPTY;
2699                 goto err;
2700         }
2701 
2702         resp->status = NFS3_OK;
2703         vattr_to_wcc_data(bvap, avap, &resp->resok.dir_wcc);
2704         goto out;
2705 
2706 err:
2707         if (curthread->t_flag & T_WOULDBLOCK) {
2708                 curthread->t_flag &= ~T_WOULDBLOCK;
2709                 resp->status = NFS3ERR_JUKEBOX;
2710         } else
2711                 resp->status = puterrno3(error);
2712 err1:
2713         vattr_to_wcc_data(bvap, avap, &resp->resfail.dir_wcc);
2714 out:
2715         DTRACE_NFSV3_5(op__rmdir__done, struct svc_req *, req,
2716             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
2717             RMDIR3res *, resp);
2718         if (vp != NULL)
2719                 VN_RELE(vp);
2720 
2721 }
2722 
2723 void *
2724 rfs3_rmdir_getfh(RMDIR3args *args)
2725 {
2726 
2727         return (&args->object.dir);
2728 }
2729 
2730 void
2731 rfs3_rename(RENAME3args *args, RENAME3res *resp, struct exportinfo *exi,
2732     struct svc_req *req, cred_t *cr, bool_t ro)
2733 {
2734         int error = 0;
2735         vnode_t *fvp;
2736         vnode_t *tvp;
2737         vnode_t *targvp;
2738         struct vattr *fbvap;
2739         struct vattr fbva;
2740         struct vattr *favap;
2741         struct vattr fava;
2742         struct vattr *tbvap;
2743         struct vattr tbva;
2744         struct vattr *tavap;
2745         struct vattr tava;
2746         nfs_fh3 *fh3;
2747         struct exportinfo *to_exi;
2748         vnode_t *srcvp = NULL;
2749         bslabel_t *clabel;
2750         struct sockaddr *ca;
2751         char *name = NULL;
2752         char *toname = NULL;
2753 
2754         fbvap = NULL;
2755         favap = NULL;
2756         tbvap = NULL;
2757         tavap = NULL;
2758         tvp = NULL;
2759 
2760         fvp = nfs3_fhtovp(&args->from.dir, exi);
2761 
2762         DTRACE_NFSV3_5(op__rename__start, struct svc_req *, req,
2763             cred_t *, cr, vnode_t *, fvp, struct exportinfo *, exi,
2764             RENAME3args *, args);
2765 
2766         if (fvp == NULL) {
2767                 error = ESTALE;
2768                 goto err;
2769         }
2770 
2771         if (is_system_labeled()) {
2772                 clabel = req->rq_label;
2773                 ASSERT(clabel != NULL);
2774                 DTRACE_PROBE2(tx__rfs3__log__info__oprename__clabel, char *,
2775                     "got client label from request(1)", struct svc_req *, req);
2776 
2777                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2778                         if (!do_rfs_label_check(clabel, fvp, EQUALITY_CHECK,
2779                             exi)) {
2780                                 resp->status = NFS3ERR_ACCES;
2781                                 goto err1;
2782                         }
2783                 }
2784         }
2785 
2786         fbva.va_mask = AT_ALL;
2787         fbvap = VOP_GETATTR(fvp, &fbva, 0, cr, NULL) ? NULL : &fbva;
2788         favap = fbvap;
2789 
2790         fh3 = &args->to.dir;
2791         to_exi = checkexport(&fh3->fh3_fsid, FH3TOXFIDP(fh3));
2792         if (to_exi == NULL) {
2793                 resp->status = NFS3ERR_ACCES;
2794                 goto err1;
2795         }
2796         exi_rele(to_exi);
2797 
2798         if (to_exi != exi) {
2799                 resp->status = NFS3ERR_XDEV;
2800                 goto err1;
2801         }
2802 
2803         tvp = nfs3_fhtovp(&args->to.dir, exi);
2804         if (tvp == NULL) {
2805                 error = ESTALE;
2806                 goto err;
2807         }
2808 
2809         tbva.va_mask = AT_ALL;
2810         tbvap = VOP_GETATTR(tvp, &tbva, 0, cr, NULL) ? NULL : &tbva;
2811         tavap = tbvap;
2812 
2813         if (fvp->v_type != VDIR || tvp->v_type != VDIR) {
2814                 resp->status = NFS3ERR_NOTDIR;
2815                 goto err1;
2816         }
2817 
2818         if (args->from.name == nfs3nametoolong ||
2819             args->to.name == nfs3nametoolong) {
2820                 resp->status = NFS3ERR_NAMETOOLONG;
2821                 goto err1;
2822         }
2823         if (args->from.name == NULL || *(args->from.name) == '\0' ||
2824             args->to.name == NULL || *(args->to.name) == '\0') {
2825                 resp->status = NFS3ERR_ACCES;
2826                 goto err1;
2827         }
2828 
2829         if (rdonly(ro, tvp)) {
2830                 resp->status = NFS3ERR_ROFS;
2831                 goto err1;
2832         }
2833 
2834         if (is_system_labeled()) {
2835                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2836                         if (!do_rfs_label_check(clabel, tvp, EQUALITY_CHECK,
2837                             exi)) {
2838                                 resp->status = NFS3ERR_ACCES;
2839                                 goto err1;
2840                         }
2841                 }
2842         }
2843 
2844         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2845         name = nfscmd_convname(ca, exi, args->from.name,
2846             NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2847 
2848         if (name == NULL) {
2849                 resp->status = NFS3ERR_INVAL;
2850                 goto err1;
2851         }
2852 
2853         toname = nfscmd_convname(ca, exi, args->to.name,
2854             NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2855 
2856         if (toname == NULL) {
2857                 resp->status = NFS3ERR_INVAL;
2858                 goto err1;
2859         }
2860 
2861         /*
2862          * Check for a conflict with a non-blocking mandatory share
2863          * reservation or V4 delegations.
2864          */
2865         error = VOP_LOOKUP(fvp, name, &srcvp, NULL, 0,
2866             NULL, cr, NULL, NULL, NULL);
2867         if (error != 0)
2868                 goto err;
2869 
2870         /*
2871          * If we rename a delegated file we should recall the
2872          * delegation, since future opens should fail or would
2873          * refer to a new file.
2874          */
2875         if (rfs4_check_delegated(FWRITE, srcvp, FALSE)) {
2876                 resp->status = NFS3ERR_JUKEBOX;
2877                 goto err1;
2878         }
2879 
2880         /*
2881          * Check for renaming over a delegated file.  Check nfs4_deleg_policy
2882          * first to avoid VOP_LOOKUP if possible.
2883          */
2884         if (nfs4_get_deleg_policy() != SRV_NEVER_DELEGATE &&
2885             VOP_LOOKUP(tvp, toname, &targvp, NULL, 0, NULL, cr,
2886             NULL, NULL, NULL) == 0) {
2887 
2888                 if (rfs4_check_delegated(FWRITE, targvp, TRUE)) {
2889                         VN_RELE(targvp);
2890                         resp->status = NFS3ERR_JUKEBOX;
2891                         goto err1;
2892                 }
2893                 VN_RELE(targvp);
2894         }
2895 
2896         if (!nbl_need_check(srcvp)) {
2897                 error = VOP_RENAME(fvp, name, tvp, toname, cr, NULL, 0);
2898         } else {
2899                 nbl_start_crit(srcvp, RW_READER);
2900                 if (nbl_conflict(srcvp, NBL_RENAME, 0, 0, 0, NULL))
2901                         error = EACCES;
2902                 else
2903                         error = VOP_RENAME(fvp, name, tvp, toname, cr, NULL, 0);
2904                 nbl_end_crit(srcvp);
2905         }
2906         if (error == 0)
2907                 vn_renamepath(tvp, srcvp, args->to.name,
2908                     strlen(args->to.name));
2909         VN_RELE(srcvp);
2910         srcvp = NULL;
2911 
2912         fava.va_mask = AT_ALL;
2913         favap = VOP_GETATTR(fvp, &fava, 0, cr, NULL) ? NULL : &fava;
2914         tava.va_mask = AT_ALL;
2915         tavap = VOP_GETATTR(tvp, &tava, 0, cr, NULL) ? NULL : &tava;
2916 
2917         /*
2918          * Force modified data and metadata out to stable storage.
2919          */
2920         (void) VOP_FSYNC(fvp, 0, cr, NULL);
2921         (void) VOP_FSYNC(tvp, 0, cr, NULL);
2922 
2923         if (error)
2924                 goto err;
2925 
2926         resp->status = NFS3_OK;
2927         vattr_to_wcc_data(fbvap, favap, &resp->resok.fromdir_wcc);
2928         vattr_to_wcc_data(tbvap, tavap, &resp->resok.todir_wcc);
2929         goto out;
2930 
2931 err:
2932         if (curthread->t_flag & T_WOULDBLOCK) {
2933                 curthread->t_flag &= ~T_WOULDBLOCK;
2934                 resp->status = NFS3ERR_JUKEBOX;
2935         } else {
2936                 resp->status = puterrno3(error);
2937         }
2938 err1:
2939         vattr_to_wcc_data(fbvap, favap, &resp->resfail.fromdir_wcc);
2940         vattr_to_wcc_data(tbvap, tavap, &resp->resfail.todir_wcc);
2941 
2942 out:
2943         if (name != NULL && name != args->from.name)
2944                 kmem_free(name, MAXPATHLEN + 1);
2945         if (toname != NULL && toname != args->to.name)
2946                 kmem_free(toname, MAXPATHLEN + 1);
2947 
2948         DTRACE_NFSV3_5(op__rename__done, struct svc_req *, req,
2949             cred_t *, cr, vnode_t *, fvp, struct exportinfo *, exi,
2950             RENAME3res *, resp);
2951         if (fvp != NULL)
2952                 VN_RELE(fvp);
2953         if (tvp != NULL)
2954                 VN_RELE(tvp);
2955 }
2956 
2957 void *
2958 rfs3_rename_getfh(RENAME3args *args)
2959 {
2960 
2961         return (&args->from.dir);
2962 }
2963 
2964 void
2965 rfs3_link(LINK3args *args, LINK3res *resp, struct exportinfo *exi,
2966     struct svc_req *req, cred_t *cr, bool_t ro)
2967 {
2968         int error;
2969         vnode_t *vp;
2970         vnode_t *dvp;
2971         struct vattr *vap;
2972         struct vattr va;
2973         struct vattr *bvap;
2974         struct vattr bva;
2975         struct vattr *avap;
2976         struct vattr ava;
2977         nfs_fh3 *fh3;
2978         struct exportinfo *to_exi;
2979         bslabel_t *clabel;
2980         struct sockaddr *ca;
2981         char *name = NULL;
2982 
2983         vap = NULL;
2984         bvap = NULL;
2985         avap = NULL;
2986         dvp = NULL;
2987 
2988         vp = nfs3_fhtovp(&args->file, exi);
2989 
2990         DTRACE_NFSV3_5(op__link__start, struct svc_req *, req,
2991             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
2992             LINK3args *, args);
2993 
2994         if (vp == NULL) {
2995                 error = ESTALE;
2996                 goto out;
2997         }
2998 
2999         va.va_mask = AT_ALL;
3000         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3001 
3002         fh3 = &args->link.dir;
3003         to_exi = checkexport(&fh3->fh3_fsid, FH3TOXFIDP(fh3));
3004         if (to_exi == NULL) {
3005                 resp->status = NFS3ERR_ACCES;
3006                 goto out1;
3007         }
3008         exi_rele(to_exi);
3009 
3010         if (to_exi != exi) {
3011                 resp->status = NFS3ERR_XDEV;
3012                 goto out1;
3013         }
3014 
3015         if (is_system_labeled()) {
3016                 clabel = req->rq_label;
3017 
3018                 ASSERT(clabel != NULL);
3019                 DTRACE_PROBE2(tx__rfs3__log__info__oplink__clabel, char *,
3020                     "got client label from request(1)", struct svc_req *, req);
3021 
3022                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3023                         if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3024                             exi)) {
3025                                 resp->status = NFS3ERR_ACCES;
3026                                 goto out1;
3027                         }
3028                 }
3029         }
3030 
3031         dvp = nfs3_fhtovp(&args->link.dir, exi);
3032         if (dvp == NULL) {
3033                 error = ESTALE;
3034                 goto out;
3035         }
3036 
3037         bva.va_mask = AT_ALL;
3038         bvap = VOP_GETATTR(dvp, &bva, 0, cr, NULL) ? NULL : &bva;
3039 
3040         if (dvp->v_type != VDIR) {
3041                 resp->status = NFS3ERR_NOTDIR;
3042                 goto out1;
3043         }
3044 
3045         if (args->link.name == nfs3nametoolong) {
3046                 resp->status = NFS3ERR_NAMETOOLONG;
3047                 goto out1;
3048         }
3049 
3050         if (args->link.name == NULL || *(args->link.name) == '\0') {
3051                 resp->status = NFS3ERR_ACCES;
3052                 goto out1;
3053         }
3054 
3055         if (rdonly(ro, dvp)) {
3056                 resp->status = NFS3ERR_ROFS;
3057                 goto out1;
3058         }
3059 
3060         if (is_system_labeled()) {
3061                 DTRACE_PROBE2(tx__rfs3__log__info__oplinkdir__clabel, char *,
3062                     "got client label from request(1)", struct svc_req *, req);
3063 
3064                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3065                         if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
3066                             exi)) {
3067                                 resp->status = NFS3ERR_ACCES;
3068                                 goto out1;
3069                         }
3070                 }
3071         }
3072 
3073         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
3074         name = nfscmd_convname(ca, exi, args->link.name,
3075             NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
3076 
3077         if (name == NULL) {
3078                 resp->status = NFS3ERR_SERVERFAULT;
3079                 goto out1;
3080         }
3081 
3082         error = VOP_LINK(dvp, vp, name, cr, NULL, 0);
3083 
3084         va.va_mask = AT_ALL;
3085         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3086         ava.va_mask = AT_ALL;
3087         avap = VOP_GETATTR(dvp, &ava, 0, cr, NULL) ? NULL : &ava;
3088 
3089         /*
3090          * Force modified data and metadata out to stable storage.
3091          */
3092         (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
3093         (void) VOP_FSYNC(dvp, 0, cr, NULL);
3094 
3095         if (error)
3096                 goto out;
3097 
3098         VN_RELE(dvp);
3099 
3100         resp->status = NFS3_OK;
3101         vattr_to_post_op_attr(vap, &resp->resok.file_attributes);
3102         vattr_to_wcc_data(bvap, avap, &resp->resok.linkdir_wcc);
3103 
3104         DTRACE_NFSV3_5(op__link__done, struct svc_req *, req,
3105             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3106             LINK3res *, resp);
3107 
3108         VN_RELE(vp);
3109 
3110         return;
3111 
3112 out:
3113         if (curthread->t_flag & T_WOULDBLOCK) {
3114                 curthread->t_flag &= ~T_WOULDBLOCK;
3115                 resp->status = NFS3ERR_JUKEBOX;
3116         } else
3117                 resp->status = puterrno3(error);
3118 out1:
3119         if (name != NULL && name != args->link.name)
3120                 kmem_free(name, MAXPATHLEN + 1);
3121 
3122         DTRACE_NFSV3_5(op__link__done, struct svc_req *, req,
3123             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3124             LINK3res *, resp);
3125 
3126         if (vp != NULL)
3127                 VN_RELE(vp);
3128         if (dvp != NULL)
3129                 VN_RELE(dvp);
3130         vattr_to_post_op_attr(vap, &resp->resfail.file_attributes);
3131         vattr_to_wcc_data(bvap, avap, &resp->resfail.linkdir_wcc);
3132 }
3133 
3134 void *
3135 rfs3_link_getfh(LINK3args *args)
3136 {
3137 
3138         return (&args->file);
3139 }
3140 
3141 /*
3142  * This macro defines the size of a response which contains attribute
3143  * information and one directory entry (whose length is specified by
3144  * the macro parameter).  If the incoming request is larger than this,
3145  * then we are guaranteed to be able to return at one directory entry
3146  * if one exists.  Therefore, we do not need to check for
3147  * NFS3ERR_TOOSMALL if the requested size is larger then this.  If it
3148  * is not, then we need to check to make sure that this error does not
3149  * need to be returned.
3150  *
3151  * NFS3_READDIR_MIN_COUNT is comprised of following :
3152  *
3153  * status - 1 * BYTES_PER_XDR_UNIT
3154  * attr. flag - 1 * BYTES_PER_XDR_UNIT
3155  * cookie verifier - 2 * BYTES_PER_XDR_UNIT
3156  * attributes  - NFS3_SIZEOF_FATTR3 * BYTES_PER_XDR_UNIT
3157  * boolean - 1 * BYTES_PER_XDR_UNIT
3158  * file id - 2 * BYTES_PER_XDR_UNIT
3159  * directory name length - 1 * BYTES_PER_XDR_UNIT
3160  * cookie - 2 * BYTES_PER_XDR_UNIT
3161  * end of list - 1 * BYTES_PER_XDR_UNIT
3162  * end of file - 1 * BYTES_PER_XDR_UNIT
3163  * Name length of directory to the nearest byte
3164  */
3165 
3166 #define NFS3_READDIR_MIN_COUNT(length)  \
3167         ((1 + 1 + 2 + NFS3_SIZEOF_FATTR3 + 1 + 2 + 1 + 2 + 1 + 1) * \
3168                 BYTES_PER_XDR_UNIT + roundup((length), BYTES_PER_XDR_UNIT))
3169 
3170 /* ARGSUSED */
3171 void
3172 rfs3_readdir(READDIR3args *args, READDIR3res *resp, struct exportinfo *exi,
3173     struct svc_req *req, cred_t *cr, bool_t ro)
3174 {
3175         int error;
3176         vnode_t *vp;
3177         struct vattr *vap;
3178         struct vattr va;
3179         struct iovec iov;
3180         struct uio uio;
3181         char *data;
3182         int iseof;
3183         int bufsize;
3184         int namlen;
3185         uint_t count;
3186         struct sockaddr *ca;
3187 
3188         vap = NULL;
3189 
3190         vp = nfs3_fhtovp(&args->dir, exi);
3191 
3192         DTRACE_NFSV3_5(op__readdir__start, struct svc_req *, req,
3193             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3194             READDIR3args *, args);
3195 
3196         if (vp == NULL) {
3197                 error = ESTALE;
3198                 goto out;
3199         }
3200 
3201         if (is_system_labeled()) {
3202                 bslabel_t *clabel = req->rq_label;
3203 
3204                 ASSERT(clabel != NULL);
3205                 DTRACE_PROBE2(tx__rfs3__log__info__opreaddir__clabel, char *,
3206                     "got client label from request(1)", struct svc_req *, req);
3207 
3208                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3209                         if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3210                             exi)) {
3211                                 resp->status = NFS3ERR_ACCES;
3212                                 goto out1;
3213                         }
3214                 }
3215         }
3216 
3217         (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL);
3218 
3219         va.va_mask = AT_ALL;
3220         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3221 
3222         if (vp->v_type != VDIR) {
3223                 resp->status = NFS3ERR_NOTDIR;
3224                 goto out1;
3225         }
3226 
3227         error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
3228         if (error)
3229                 goto out;
3230 
3231         /*
3232          * Now don't allow arbitrary count to alloc;
3233          * allow the maximum not to exceed rfs3_tsize()
3234          */
3235         if (args->count > rfs3_tsize(req))
3236                 args->count = rfs3_tsize(req);
3237 
3238         /*
3239          * Make sure that there is room to read at least one entry
3240          * if any are available.
3241          */
3242         if (args->count < DIRENT64_RECLEN(MAXNAMELEN))
3243                 count = DIRENT64_RECLEN(MAXNAMELEN);
3244         else
3245                 count = args->count;
3246 
3247         data = kmem_alloc(count, KM_SLEEP);
3248 
3249         iov.iov_base = data;
3250         iov.iov_len = count;
3251         uio.uio_iov = &iov;
3252         uio.uio_iovcnt = 1;
3253         uio.uio_segflg = UIO_SYSSPACE;
3254         uio.uio_extflg = UIO_COPY_CACHED;
3255         uio.uio_loffset = (offset_t)args->cookie;
3256         uio.uio_resid = count;
3257 
3258         error = VOP_READDIR(vp, &uio, cr, &iseof, NULL, 0);
3259 
3260         va.va_mask = AT_ALL;
3261         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3262 
3263         if (error) {
3264                 kmem_free(data, count);
3265                 goto out;
3266         }
3267 
3268         /*
3269          * If the count was not large enough to be able to guarantee
3270          * to be able to return at least one entry, then need to
3271          * check to see if NFS3ERR_TOOSMALL should be returned.
3272          */
3273         if (args->count < NFS3_READDIR_MIN_COUNT(MAXNAMELEN)) {
3274                 /*
3275                  * bufsize is used to keep track of the size of the response.
3276                  * It is primed with:
3277                  *      1 for the status +
3278                  *      1 for the dir_attributes.attributes boolean +
3279                  *      2 for the cookie verifier
3280                  * all times BYTES_PER_XDR_UNIT to convert from XDR units
3281                  * to bytes.  If there are directory attributes to be
3282                  * returned, then:
3283                  *      NFS3_SIZEOF_FATTR3 for the dir_attributes.attr fattr3
3284                  * time BYTES_PER_XDR_UNIT is added to account for them.
3285                  */
3286                 bufsize = (1 + 1 + 2) * BYTES_PER_XDR_UNIT;
3287                 if (vap != NULL)
3288                         bufsize += NFS3_SIZEOF_FATTR3 * BYTES_PER_XDR_UNIT;
3289                 /*
3290                  * An entry is composed of:
3291                  *      1 for the true/false list indicator +
3292                  *      2 for the fileid +
3293                  *      1 for the length of the name +
3294                  *      2 for the cookie +
3295                  * all times BYTES_PER_XDR_UNIT to convert from
3296                  * XDR units to bytes, plus the length of the name
3297                  * rounded up to the nearest BYTES_PER_XDR_UNIT.
3298                  */
3299                 if (count != uio.uio_resid) {
3300                         namlen = strlen(((struct dirent64 *)data)->d_name);
3301                         bufsize += (1 + 2 + 1 + 2) * BYTES_PER_XDR_UNIT +
3302                             roundup(namlen, BYTES_PER_XDR_UNIT);
3303                 }
3304                 /*
3305                  * We need to check to see if the number of bytes left
3306                  * to go into the buffer will actually fit into the
3307                  * buffer.  This is calculated as the size of this
3308                  * entry plus:
3309                  *      1 for the true/false list indicator +
3310                  *      1 for the eof indicator
3311                  * times BYTES_PER_XDR_UNIT to convert from from
3312                  * XDR units to bytes.
3313                  */
3314                 bufsize += (1 + 1) * BYTES_PER_XDR_UNIT;
3315                 if (bufsize > args->count) {
3316                         kmem_free(data, count);
3317                         resp->status = NFS3ERR_TOOSMALL;
3318                         goto out1;
3319                 }
3320         }
3321 
3322         /*
3323          * Have a valid readir buffer for the native character
3324          * set. Need to check if a conversion is necessary and
3325          * potentially rewrite the whole buffer. Note that if the
3326          * conversion expands names enough, the structure may not
3327          * fit. In this case, we need to drop entries until if fits
3328          * and patch the counts in order that the next readdir will
3329          * get the correct entries.
3330          */
3331         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
3332         data = nfscmd_convdirent(ca, exi, data, count, &resp->status);
3333 
3334 
3335         VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
3336 
3337 #if 0 /* notyet */
3338         /*
3339          * Don't do this.  It causes local disk writes when just
3340          * reading the file and the overhead is deemed larger
3341          * than the benefit.
3342          */
3343         /*
3344          * Force modified metadata out to stable storage.
3345          */
3346         (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
3347 #endif
3348 
3349         resp->status = NFS3_OK;
3350         vattr_to_post_op_attr(vap, &resp->resok.dir_attributes);
3351         resp->resok.cookieverf = 0;
3352         resp->resok.reply.entries = (entry3 *)data;
3353         resp->resok.reply.eof = iseof;
3354         resp->resok.size = count - uio.uio_resid;
3355         resp->resok.count = args->count;
3356         resp->resok.freecount = count;
3357 
3358         DTRACE_NFSV3_5(op__readdir__done, struct svc_req *, req,
3359             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3360             READDIR3res *, resp);
3361 
3362         VN_RELE(vp);
3363 
3364         return;
3365 
3366 out:
3367         if (curthread->t_flag & T_WOULDBLOCK) {
3368                 curthread->t_flag &= ~T_WOULDBLOCK;
3369                 resp->status = NFS3ERR_JUKEBOX;
3370         } else
3371                 resp->status = puterrno3(error);
3372 out1:
3373         vattr_to_post_op_attr(vap, &resp->resfail.dir_attributes);
3374 
3375         DTRACE_NFSV3_5(op__readdir__done, struct svc_req *, req,
3376             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3377             READDIR3res *, resp);
3378 
3379         if (vp != NULL) {
3380                 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
3381                 VN_RELE(vp);
3382         }
3383         vattr_to_post_op_attr(vap, &resp->resfail.dir_attributes);
3384 }
3385 
3386 void *
3387 rfs3_readdir_getfh(READDIR3args *args)
3388 {
3389 
3390         return (&args->dir);
3391 }
3392 
3393 void
3394 rfs3_readdir_free(READDIR3res *resp)
3395 {
3396 
3397         if (resp->status == NFS3_OK)
3398                 kmem_free(resp->resok.reply.entries, resp->resok.freecount);
3399 }
3400 
3401 #ifdef nextdp
3402 #undef nextdp
3403 #endif
3404 #define nextdp(dp)      ((struct dirent64 *)((char *)(dp) + (dp)->d_reclen))
3405 
3406 /*
3407  * This macro computes the size of a response which contains
3408  * one directory entry including the attributes as well as file handle.
3409  * If the incoming request is larger than this, then we are guaranteed to be
3410  * able to return at least one more directory entry if one exists.
3411  *
3412  * NFS3_READDIRPLUS_ENTRY is made up of the following:
3413  *
3414  * boolean - 1 * BYTES_PER_XDR_UNIT
3415  * file id - 2 * BYTES_PER_XDR_UNIT
3416  * directory name length - 1 * BYTES_PER_XDR_UNIT
3417  * cookie - 2 * BYTES_PER_XDR_UNIT
3418  * attribute flag - 1 * BYTES_PER_XDR_UNIT
3419  * attributes - NFS3_SIZEOF_FATTR3 * BYTES_PER_XDR_UNIT
3420  * status byte for file handle - 1 *  BYTES_PER_XDR_UNIT
3421  * length of a file handle - 1 * BYTES_PER_XDR_UNIT
3422  * Maximum length of a file handle (NFS3_MAXFHSIZE)
3423  * name length of the entry to the nearest bytes
3424  */
3425 #define NFS3_READDIRPLUS_ENTRY(namelen) \
3426         ((1 + 2 + 1 + 2 + 1 + NFS3_SIZEOF_FATTR3 + 1 + 1) * \
3427                 BYTES_PER_XDR_UNIT + \
3428         NFS3_MAXFHSIZE + roundup(namelen, BYTES_PER_XDR_UNIT))
3429 
3430 static int rfs3_readdir_unit = MAXBSIZE;
3431 
3432 /* ARGSUSED */
3433 void
3434 rfs3_readdirplus(READDIRPLUS3args *args, READDIRPLUS3res *resp,
3435     struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
3436 {
3437         int error;
3438         vnode_t *vp;
3439         struct vattr *vap;
3440         struct vattr va;
3441         struct iovec iov;
3442         struct uio uio;
3443         char *data;
3444         int iseof;
3445         struct dirent64 *dp;
3446         vnode_t *nvp;
3447         struct vattr *nvap;
3448         struct vattr nva;
3449         entryplus3_info *infop = NULL;
3450         int size = 0;
3451         int nents = 0;
3452         int bufsize = 0;
3453         int entrysize = 0;
3454         int tofit = 0;
3455         int rd_unit = rfs3_readdir_unit;
3456         int prev_len;
3457         int space_left;
3458         int i;
3459         uint_t *namlen = NULL;
3460         char *ndata = NULL;
3461         struct sockaddr *ca;
3462         size_t ret;
3463 
3464         vap = NULL;
3465 
3466         vp = nfs3_fhtovp(&args->dir, exi);
3467 
3468         DTRACE_NFSV3_5(op__readdirplus__start, struct svc_req *, req,
3469             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3470             READDIRPLUS3args *, args);
3471 
3472         if (vp == NULL) {
3473                 error = ESTALE;
3474                 goto out;
3475         }
3476 
3477         if (is_system_labeled()) {
3478                 bslabel_t *clabel = req->rq_label;
3479 
3480                 ASSERT(clabel != NULL);
3481                 DTRACE_PROBE2(tx__rfs3__log__info__opreaddirplus__clabel,
3482                     char *, "got client label from request(1)",
3483                     struct svc_req *, req);
3484 
3485                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3486                         if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3487                             exi)) {
3488                                 resp->status = NFS3ERR_ACCES;
3489                                 goto out1;
3490                         }
3491                 }
3492         }
3493 
3494         (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL);
3495 
3496         va.va_mask = AT_ALL;
3497         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3498 
3499         if (vp->v_type != VDIR) {
3500                 error = ENOTDIR;
3501                 goto out;
3502         }
3503 
3504         error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
3505         if (error)
3506                 goto out;
3507 
3508         /*
3509          * Don't allow arbitrary counts for allocation
3510          */
3511         if (args->maxcount > rfs3_tsize(req))
3512                 args->maxcount = rfs3_tsize(req);
3513 
3514         /*
3515          * Make sure that there is room to read at least one entry
3516          * if any are available
3517          */
3518         args->dircount = MIN(args->dircount, args->maxcount);
3519 
3520         if (args->dircount < DIRENT64_RECLEN(MAXNAMELEN))
3521                 args->dircount = DIRENT64_RECLEN(MAXNAMELEN);
3522 
3523         /*
3524          * This allocation relies on a minimum directory entry
3525          * being roughly 24 bytes.  Therefore, the namlen array
3526          * will have enough space based on the maximum number of
3527          * entries to read.
3528          */
3529         namlen = kmem_alloc(args->dircount, KM_SLEEP);
3530 
3531         space_left = args->dircount;
3532         data = kmem_alloc(args->dircount, KM_SLEEP);
3533         dp = (struct dirent64 *)data;
3534         uio.uio_iov = &iov;
3535         uio.uio_iovcnt = 1;
3536         uio.uio_segflg = UIO_SYSSPACE;
3537         uio.uio_extflg = UIO_COPY_CACHED;
3538         uio.uio_loffset = (offset_t)args->cookie;
3539 
3540         /*
3541          * bufsize is used to keep track of the size of the response as we
3542          * get post op attributes and filehandles for each entry.  This is
3543          * an optimization as the server may have read more entries than will
3544          * fit in the buffer specified by maxcount.  We stop calculating
3545          * post op attributes and filehandles once we have exceeded maxcount.
3546          * This will minimize the effect of truncation.
3547          *
3548          * It is primed with:
3549          *      1 for the status +
3550          *      1 for the dir_attributes.attributes boolean +
3551          *      2 for the cookie verifier
3552          * all times BYTES_PER_XDR_UNIT to convert from XDR units
3553          * to bytes.  If there are directory attributes to be
3554          * returned, then:
3555          *      NFS3_SIZEOF_FATTR3 for the dir_attributes.attr fattr3
3556          * time BYTES_PER_XDR_UNIT is added to account for them.
3557          */
3558         bufsize = (1 + 1 + 2) * BYTES_PER_XDR_UNIT;
3559         if (vap != NULL)
3560                 bufsize += NFS3_SIZEOF_FATTR3 * BYTES_PER_XDR_UNIT;
3561 
3562 getmoredents:
3563         /*
3564          * Here we make a check so that our read unit is not larger than
3565          * the space left in the buffer.
3566          */
3567         rd_unit = MIN(rd_unit, space_left);
3568         iov.iov_base = (char *)dp;
3569         iov.iov_len = rd_unit;
3570         uio.uio_resid = rd_unit;
3571         prev_len = rd_unit;
3572 
3573         error = VOP_READDIR(vp, &uio, cr, &iseof, NULL, 0);
3574 
3575         if (error) {
3576                 kmem_free(data, args->dircount);
3577                 goto out;
3578         }
3579 
3580         if (uio.uio_resid == prev_len && !iseof) {
3581                 if (nents == 0) {
3582                         kmem_free(data, args->dircount);
3583                         resp->status = NFS3ERR_TOOSMALL;
3584                         goto out1;
3585                 }
3586 
3587                 /*
3588                  * We could not get any more entries, so get the attributes
3589                  * and filehandle for the entries already obtained.
3590                  */
3591                 goto good;
3592         }
3593 
3594         /*
3595          * We estimate the size of the response by assuming the
3596          * entry exists and attributes and filehandle are also valid
3597          */
3598         for (size = prev_len - uio.uio_resid;
3599             size > 0;
3600             size -= dp->d_reclen, dp = nextdp(dp)) {
3601 
3602                 if (dp->d_ino == 0) {
3603                         nents++;
3604                         continue;
3605                 }
3606 
3607                 namlen[nents] = strlen(dp->d_name);
3608                 entrysize = NFS3_READDIRPLUS_ENTRY(namlen[nents]);
3609 
3610                 /*
3611                  * We need to check to see if the number of bytes left
3612                  * to go into the buffer will actually fit into the
3613                  * buffer.  This is calculated as the size of this
3614                  * entry plus:
3615                  *      1 for the true/false list indicator +
3616                  *      1 for the eof indicator
3617                  * times BYTES_PER_XDR_UNIT to convert from XDR units
3618                  * to bytes.
3619                  *
3620                  * Also check the dircount limit against the first entry read
3621                  *
3622                  */
3623                 tofit = entrysize + (1 + 1) * BYTES_PER_XDR_UNIT;
3624                 if (bufsize + tofit > args->maxcount) {
3625                         /*
3626                          * We make a check here to see if this was the
3627                          * first entry being measured.  If so, then maxcount
3628                          * was too small to begin with and so we need to
3629                          * return with NFS3ERR_TOOSMALL.
3630                          */
3631                         if (nents == 0) {
3632                                 kmem_free(data, args->dircount);
3633                                 resp->status = NFS3ERR_TOOSMALL;
3634                                 goto out1;
3635                         }
3636                         iseof = FALSE;
3637                         goto good;
3638                 }
3639                 bufsize += entrysize;
3640                 nents++;
3641         }
3642 
3643         /*
3644          * If there is enough room to fit at least 1 more entry including
3645          * post op attributes and filehandle in the buffer AND that we haven't
3646          * exceeded dircount then go back and get some more.
3647          */
3648         if (!iseof &&
3649             (args->maxcount - bufsize) >= NFS3_READDIRPLUS_ENTRY(MAXNAMELEN)) {
3650                 space_left -= (prev_len - uio.uio_resid);
3651                 if (space_left >= DIRENT64_RECLEN(MAXNAMELEN))
3652                         goto getmoredents;
3653 
3654                 /* else, fall through */
3655         }
3656 good:
3657         va.va_mask = AT_ALL;
3658         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3659 
3660         VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
3661 
3662         infop = kmem_alloc(nents * sizeof (struct entryplus3_info), KM_SLEEP);
3663         resp->resok.infop = infop;
3664 
3665         dp = (struct dirent64 *)data;
3666         for (i = 0; i < nents; i++) {
3667 
3668                 if (dp->d_ino == 0) {
3669                         infop[i].attr.attributes = FALSE;
3670                         infop[i].fh.handle_follows = FALSE;
3671                         dp = nextdp(dp);
3672                         continue;
3673                 }
3674 
3675                 infop[i].namelen = namlen[i];
3676 
3677                 error = VOP_LOOKUP(vp, dp->d_name, &nvp, NULL, 0, NULL, cr,
3678                     NULL, NULL, NULL);
3679                 if (error) {
3680                         infop[i].attr.attributes = FALSE;
3681                         infop[i].fh.handle_follows = FALSE;
3682                         dp = nextdp(dp);
3683                         continue;
3684                 }
3685 
3686                 nva.va_mask = AT_ALL;
3687                 nvap = rfs4_delegated_getattr(nvp, &nva, 0, cr) ? NULL : &nva;
3688 
3689                 /* Lie about the object type for a referral */
3690                 if (vn_is_nfs_reparse(nvp, cr))
3691                         nvap->va_type = VLNK;
3692 
3693                 if (vn_ismntpt(nvp)) {
3694                         infop[i].attr.attributes = FALSE;
3695                         infop[i].fh.handle_follows = FALSE;
3696                 } else {
3697                         vattr_to_post_op_attr(nvap, &infop[i].attr);
3698 
3699                         error = makefh3(&infop[i].fh.handle, nvp, exi);
3700                         if (!error)
3701                                 infop[i].fh.handle_follows = TRUE;
3702                         else
3703                                 infop[i].fh.handle_follows = FALSE;
3704                 }
3705 
3706                 VN_RELE(nvp);
3707                 dp = nextdp(dp);
3708         }
3709 
3710         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
3711         ret = nfscmd_convdirplus(ca, exi, data, nents, args->dircount, &ndata);
3712         if (ndata == NULL)
3713                 ndata = data;
3714 
3715         if (ret > 0) {
3716                 /*
3717                  * We had to drop one or more entries in order to fit
3718                  * during the character conversion.  We need to patch
3719                  * up the size and eof info.
3720                  */
3721                 if (iseof)
3722                         iseof = FALSE;
3723 
3724                 ret = nfscmd_dropped_entrysize((struct dirent64 *)data,
3725                     nents, ret);
3726         }
3727 
3728 
3729 #if 0 /* notyet */
3730         /*
3731          * Don't do this.  It causes local disk writes when just
3732          * reading the file and the overhead is deemed larger
3733          * than the benefit.
3734          */
3735         /*
3736          * Force modified metadata out to stable storage.
3737          */
3738         (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
3739 #endif
3740 
3741         kmem_free(namlen, args->dircount);
3742 
3743         resp->status = NFS3_OK;
3744         vattr_to_post_op_attr(vap, &resp->resok.dir_attributes);
3745         resp->resok.cookieverf = 0;
3746         resp->resok.reply.entries = (entryplus3 *)ndata;
3747         resp->resok.reply.eof = iseof;
3748         resp->resok.size = nents;
3749         resp->resok.count = args->dircount - ret;
3750         resp->resok.maxcount = args->maxcount;
3751 
3752         DTRACE_NFSV3_5(op__readdirplus__done, struct svc_req *, req,
3753             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3754             READDIRPLUS3res *, resp);
3755 
3756         VN_RELE(vp);
3757 
3758         return;
3759 
3760 out:
3761         if (curthread->t_flag & T_WOULDBLOCK) {
3762                 curthread->t_flag &= ~T_WOULDBLOCK;
3763                 resp->status = NFS3ERR_JUKEBOX;
3764         } else {
3765                 resp->status = puterrno3(error);
3766         }
3767 out1:
3768         vattr_to_post_op_attr(vap, &resp->resfail.dir_attributes);
3769 
3770         DTRACE_NFSV3_5(op__readdirplus__done, struct svc_req *, req,
3771             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3772             READDIRPLUS3res *, resp);
3773 
3774         if (vp != NULL) {
3775                 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
3776                 VN_RELE(vp);
3777         }
3778 
3779         if (namlen != NULL)
3780                 kmem_free(namlen, args->dircount);
3781 
3782         vattr_to_post_op_attr(vap, &resp->resfail.dir_attributes);
3783 }
3784 
3785 void *
3786 rfs3_readdirplus_getfh(READDIRPLUS3args *args)
3787 {
3788 
3789         return (&args->dir);
3790 }
3791 
3792 void
3793 rfs3_readdirplus_free(READDIRPLUS3res *resp)
3794 {
3795 
3796         if (resp->status == NFS3_OK) {
3797                 kmem_free(resp->resok.reply.entries, resp->resok.count);
3798                 kmem_free(resp->resok.infop,
3799                     resp->resok.size * sizeof (struct entryplus3_info));
3800         }
3801 }
3802 
3803 /* ARGSUSED */
3804 void
3805 rfs3_fsstat(FSSTAT3args *args, FSSTAT3res *resp, struct exportinfo *exi,
3806     struct svc_req *req, cred_t *cr, bool_t ro)
3807 {
3808         int error;
3809         vnode_t *vp;
3810         struct vattr *vap;
3811         struct vattr va;
3812         struct statvfs64 sb;
3813 
3814         vap = NULL;
3815 
3816         vp = nfs3_fhtovp(&args->fsroot, exi);
3817 
3818         DTRACE_NFSV3_5(op__fsstat__start, struct svc_req *, req,
3819             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3820             FSSTAT3args *, args);
3821 
3822         if (vp == NULL) {
3823                 error = ESTALE;
3824                 goto out;
3825         }
3826 
3827         if (is_system_labeled()) {
3828                 bslabel_t *clabel = req->rq_label;
3829 
3830                 ASSERT(clabel != NULL);
3831                 DTRACE_PROBE2(tx__rfs3__log__info__opfsstat__clabel, char *,
3832                     "got client label from request(1)", struct svc_req *, req);
3833 
3834                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3835                         if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3836                             exi)) {
3837                                 resp->status = NFS3ERR_ACCES;
3838                                 goto out1;
3839                         }
3840                 }
3841         }
3842 
3843         error = VFS_STATVFS(vp->v_vfsp, &sb);
3844 
3845         va.va_mask = AT_ALL;
3846         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3847 
3848         if (error)
3849                 goto out;
3850 
3851         resp->status = NFS3_OK;
3852         vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
3853         if (sb.f_blocks != (fsblkcnt64_t)-1)
3854                 resp->resok.tbytes = (size3)sb.f_frsize * (size3)sb.f_blocks;
3855         else
3856                 resp->resok.tbytes = (size3)sb.f_blocks;
3857         if (sb.f_bfree != (fsblkcnt64_t)-1)
3858                 resp->resok.fbytes = (size3)sb.f_frsize * (size3)sb.f_bfree;
3859         else
3860                 resp->resok.fbytes = (size3)sb.f_bfree;
3861         if (sb.f_bavail != (fsblkcnt64_t)-1)
3862                 resp->resok.abytes = (size3)sb.f_frsize * (size3)sb.f_bavail;
3863         else
3864                 resp->resok.abytes = (size3)sb.f_bavail;
3865         resp->resok.tfiles = (size3)sb.f_files;
3866         resp->resok.ffiles = (size3)sb.f_ffree;
3867         resp->resok.afiles = (size3)sb.f_favail;
3868         resp->resok.invarsec = 0;
3869 
3870         DTRACE_NFSV3_5(op__fsstat__done, struct svc_req *, req,
3871             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3872             FSSTAT3res *, resp);
3873         VN_RELE(vp);
3874 
3875         return;
3876 
3877 out:
3878         if (curthread->t_flag & T_WOULDBLOCK) {
3879                 curthread->t_flag &= ~T_WOULDBLOCK;
3880                 resp->status = NFS3ERR_JUKEBOX;
3881         } else
3882                 resp->status = puterrno3(error);
3883 out1:
3884         DTRACE_NFSV3_5(op__fsstat__done, struct svc_req *, req,
3885             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3886             FSSTAT3res *, resp);
3887 
3888         if (vp != NULL)
3889                 VN_RELE(vp);
3890         vattr_to_post_op_attr(vap, &resp->resfail.obj_attributes);
3891 }
3892 
3893 void *
3894 rfs3_fsstat_getfh(FSSTAT3args *args)
3895 {
3896 
3897         return (&args->fsroot);
3898 }
3899 
3900 /* ARGSUSED */
3901 void
3902 rfs3_fsinfo(FSINFO3args *args, FSINFO3res *resp, struct exportinfo *exi,
3903     struct svc_req *req, cred_t *cr, bool_t ro)
3904 {
3905         vnode_t *vp;
3906         struct vattr *vap;
3907         struct vattr va;
3908         uint32_t xfer_size;
3909         ulong_t l = 0;
3910         int error;
3911 
3912         vp = nfs3_fhtovp(&args->fsroot, exi);
3913 
3914         DTRACE_NFSV3_5(op__fsinfo__start, struct svc_req *, req,
3915             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3916             FSINFO3args *, args);
3917 
3918         if (vp == NULL) {
3919                 if (curthread->t_flag & T_WOULDBLOCK) {
3920                         curthread->t_flag &= ~T_WOULDBLOCK;
3921                         resp->status = NFS3ERR_JUKEBOX;
3922                 } else
3923                         resp->status = NFS3ERR_STALE;
3924                 vattr_to_post_op_attr(NULL, &resp->resfail.obj_attributes);
3925                 goto out;
3926         }
3927 
3928         if (is_system_labeled()) {
3929                 bslabel_t *clabel = req->rq_label;
3930 
3931                 ASSERT(clabel != NULL);
3932                 DTRACE_PROBE2(tx__rfs3__log__info__opfsinfo__clabel, char *,
3933                     "got client label from request(1)", struct svc_req *, req);
3934 
3935                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3936                         if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3937                             exi)) {
3938                                 resp->status = NFS3ERR_STALE;
3939                                 vattr_to_post_op_attr(NULL,
3940                                     &resp->resfail.obj_attributes);
3941                                 goto out;
3942                         }
3943                 }
3944         }
3945 
3946         va.va_mask = AT_ALL;
3947         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3948 
3949         resp->status = NFS3_OK;
3950         vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
3951         xfer_size = rfs3_tsize(req);
3952         resp->resok.rtmax = xfer_size;
3953         resp->resok.rtpref = xfer_size;
3954         resp->resok.rtmult = DEV_BSIZE;
3955         resp->resok.wtmax = xfer_size;
3956         resp->resok.wtpref = xfer_size;
3957         resp->resok.wtmult = DEV_BSIZE;
3958         resp->resok.dtpref = MAXBSIZE;
3959 
3960         /*
3961          * Large file spec: want maxfilesize based on limit of
3962          * underlying filesystem.  We can guess 2^31-1 if need be.
3963          */
3964         error = VOP_PATHCONF(vp, _PC_FILESIZEBITS, &l, cr, NULL);
3965         if (error) {
3966                 resp->status = puterrno3(error);
3967                 goto out;
3968         }
3969 
3970         /*
3971          * If the underlying file system does not support _PC_FILESIZEBITS,
3972          * return a reasonable default. Note that error code on VOP_PATHCONF
3973          * will be 0, even if the underlying file system does not support
3974          * _PC_FILESIZEBITS.
3975          */
3976         if (l == (ulong_t)-1) {
3977                 resp->resok.maxfilesize = MAXOFF32_T;
3978         } else {
3979                 if (l >= (sizeof (uint64_t) * 8))
3980                         resp->resok.maxfilesize = INT64_MAX;
3981                 else
3982                         resp->resok.maxfilesize = (1LL << (l-1)) - 1;
3983         }
3984 
3985         resp->resok.time_delta.seconds = 0;
3986         resp->resok.time_delta.nseconds = 1000;
3987         resp->resok.properties = FSF3_LINK | FSF3_SYMLINK |
3988             FSF3_HOMOGENEOUS | FSF3_CANSETTIME;
3989 
3990         DTRACE_NFSV3_5(op__fsinfo__done, struct svc_req *, req,
3991             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3992             FSINFO3res *, resp);
3993 
3994         VN_RELE(vp);
3995 
3996         return;
3997 
3998 out:
3999         DTRACE_NFSV3_5(op__fsinfo__done, struct svc_req *, req,
4000             cred_t *, cr, vnode_t *, NULL, struct exportinfo *, exi,
4001             FSINFO3res *, resp);
4002         if (vp != NULL)
4003                 VN_RELE(vp);
4004 }
4005 
4006 void *
4007 rfs3_fsinfo_getfh(FSINFO3args *args)
4008 {
4009         return (&args->fsroot);
4010 }
4011 
4012 /* ARGSUSED */
4013 void
4014 rfs3_pathconf(PATHCONF3args *args, PATHCONF3res *resp, struct exportinfo *exi,
4015     struct svc_req *req, cred_t *cr, bool_t ro)
4016 {
4017         int error;
4018         vnode_t *vp;
4019         struct vattr *vap;
4020         struct vattr va;
4021         ulong_t val;
4022 
4023         vap = NULL;
4024 
4025         vp = nfs3_fhtovp(&args->object, exi);
4026 
4027         DTRACE_NFSV3_5(op__pathconf__start, struct svc_req *, req,
4028             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
4029             PATHCONF3args *, args);
4030 
4031         if (vp == NULL) {
4032                 error = ESTALE;
4033                 goto out;
4034         }
4035 
4036         if (is_system_labeled()) {
4037                 bslabel_t *clabel = req->rq_label;
4038 
4039                 ASSERT(clabel != NULL);
4040                 DTRACE_PROBE2(tx__rfs3__log__info__oppathconf__clabel, char *,
4041                     "got client label from request(1)", struct svc_req *, req);
4042 
4043                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
4044                         if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
4045                             exi)) {
4046                                 resp->status = NFS3ERR_ACCES;
4047                                 goto out1;
4048                         }
4049                 }
4050         }
4051 
4052         va.va_mask = AT_ALL;
4053         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
4054 
4055         error = VOP_PATHCONF(vp, _PC_LINK_MAX, &val, cr, NULL);
4056         if (error)
4057                 goto out;
4058         resp->resok.info.link_max = (uint32)val;
4059 
4060         error = VOP_PATHCONF(vp, _PC_NAME_MAX, &val, cr, NULL);
4061         if (error)
4062                 goto out;
4063         resp->resok.info.name_max = (uint32)val;
4064 
4065         error = VOP_PATHCONF(vp, _PC_NO_TRUNC, &val, cr, NULL);
4066         if (error)
4067                 goto out;
4068         if (val == 1)
4069                 resp->resok.info.no_trunc = TRUE;
4070         else
4071                 resp->resok.info.no_trunc = FALSE;
4072 
4073         error = VOP_PATHCONF(vp, _PC_CHOWN_RESTRICTED, &val, cr, NULL);
4074         if (error)
4075                 goto out;
4076         if (val == 1)
4077                 resp->resok.info.chown_restricted = TRUE;
4078         else
4079                 resp->resok.info.chown_restricted = FALSE;
4080 
4081         resp->status = NFS3_OK;
4082         vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
4083         resp->resok.info.case_insensitive = FALSE;
4084         resp->resok.info.case_preserving = TRUE;
4085         DTRACE_NFSV3_5(op__pathconf__done, struct svc_req *, req,
4086             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
4087             PATHCONF3res *, resp);
4088         VN_RELE(vp);
4089         return;
4090 
4091 out:
4092         if (curthread->t_flag & T_WOULDBLOCK) {
4093                 curthread->t_flag &= ~T_WOULDBLOCK;
4094                 resp->status = NFS3ERR_JUKEBOX;
4095         } else
4096                 resp->status = puterrno3(error);
4097 out1:
4098         DTRACE_NFSV3_5(op__pathconf__done, struct svc_req *, req,
4099             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
4100             PATHCONF3res *, resp);
4101         if (vp != NULL)
4102                 VN_RELE(vp);
4103         vattr_to_post_op_attr(vap, &resp->resfail.obj_attributes);
4104 }
4105 
4106 void *
4107 rfs3_pathconf_getfh(PATHCONF3args *args)
4108 {
4109 
4110         return (&args->object);
4111 }
4112 
4113 void
4114 rfs3_commit(COMMIT3args *args, COMMIT3res *resp, struct exportinfo *exi,
4115     struct svc_req *req, cred_t *cr, bool_t ro)
4116 {
4117         nfs3_srv_t *ns;
4118         int error;
4119         vnode_t *vp;
4120         struct vattr *bvap;
4121         struct vattr bva;
4122         struct vattr *avap;
4123         struct vattr ava;
4124 
4125         bvap = NULL;
4126         avap = NULL;
4127 
4128         vp = nfs3_fhtovp(&args->file, exi);
4129 
4130         DTRACE_NFSV3_5(op__commit__start, struct svc_req *, req,
4131             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
4132             COMMIT3args *, args);
4133 
4134         if (vp == NULL) {
4135                 error = ESTALE;
4136                 goto out;
4137         }
4138 
4139         ASSERT3P(curzone, ==, exi->exi_zone); /* exi is guaranteed non-NULL. */
4140         ns = nfs3_get_srv();
4141         bva.va_mask = AT_ALL;
4142         error = VOP_GETATTR(vp, &bva, 0, cr, NULL);
4143 
4144         /*
4145          * If we can't get the attributes, then we can't do the
4146          * right access checking.  So, we'll fail the request.
4147          */
4148         if (error)
4149                 goto out;
4150 
4151         bvap = &bva;
4152 
4153         if (rdonly(ro, vp)) {
4154                 resp->status = NFS3ERR_ROFS;
4155                 goto out1;
4156         }
4157 
4158         if (vp->v_type != VREG) {
4159                 resp->status = NFS3ERR_INVAL;
4160                 goto out1;
4161         }
4162 
4163         if (is_system_labeled()) {
4164                 bslabel_t *clabel = req->rq_label;
4165 
4166                 ASSERT(clabel != NULL);
4167                 DTRACE_PROBE2(tx__rfs3__log__info__opcommit__clabel, char *,
4168                     "got client label from request(1)", struct svc_req *, req);
4169 
4170                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
4171                         if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
4172                             exi)) {
4173                                 resp->status = NFS3ERR_ACCES;
4174                                 goto out1;
4175                         }
4176                 }
4177         }
4178 
4179         if (crgetuid(cr) != bva.va_uid &&
4180             (error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL)))
4181                 goto out;
4182 
4183         error = VOP_FSYNC(vp, FSYNC, cr, NULL);
4184 
4185         ava.va_mask = AT_ALL;
4186         avap = VOP_GETATTR(vp, &ava, 0, cr, NULL) ? NULL : &ava;
4187 
4188         if (error)
4189                 goto out;
4190 
4191         resp->status = NFS3_OK;
4192         vattr_to_wcc_data(bvap, avap, &resp->resok.file_wcc);
4193         resp->resok.verf = ns->write3verf;
4194 
4195         DTRACE_NFSV3_5(op__commit__done, struct svc_req *, req,
4196             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
4197             COMMIT3res *, resp);
4198 
4199         VN_RELE(vp);
4200 
4201         return;
4202 
4203 out:
4204         if (curthread->t_flag & T_WOULDBLOCK) {
4205                 curthread->t_flag &= ~T_WOULDBLOCK;
4206                 resp->status = NFS3ERR_JUKEBOX;
4207         } else
4208                 resp->status = puterrno3(error);
4209 out1:
4210         DTRACE_NFSV3_5(op__commit__done, struct svc_req *, req,
4211             cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
4212             COMMIT3res *, resp);
4213 
4214         if (vp != NULL)
4215                 VN_RELE(vp);
4216         vattr_to_wcc_data(bvap, avap, &resp->resfail.file_wcc);
4217 }
4218 
4219 void *
4220 rfs3_commit_getfh(COMMIT3args *args)
4221 {
4222 
4223         return (&args->file);
4224 }
4225 
4226 static int
4227 sattr3_to_vattr(sattr3 *sap, struct vattr *vap)
4228 {
4229 
4230         vap->va_mask = 0;
4231 
4232         if (sap->mode.set_it) {
4233                 vap->va_mode = (mode_t)sap->mode.mode;
4234                 vap->va_mask |= AT_MODE;
4235         }
4236         if (sap->uid.set_it) {
4237                 vap->va_uid = (uid_t)sap->uid.uid;
4238                 vap->va_mask |= AT_UID;
4239         }
4240         if (sap->gid.set_it) {
4241                 vap->va_gid = (gid_t)sap->gid.gid;
4242                 vap->va_mask |= AT_GID;
4243         }
4244         if (sap->size.set_it) {
4245                 if (sap->size.size > (size3)((u_longlong_t)-1))
4246                         return (EINVAL);
4247                 vap->va_size = sap->size.size;
4248                 vap->va_mask |= AT_SIZE;
4249         }
4250         if (sap->atime.set_it == SET_TO_CLIENT_TIME) {
4251 #ifndef _LP64
4252                 /* check time validity */
4253                 if (!NFS3_TIME_OK(sap->atime.atime.seconds))
4254                         return (EOVERFLOW);
4255 #endif
4256                 /*
4257                  * nfs protocol defines times as unsigned so don't extend sign,
4258                  * unless sysadmin set nfs_allow_preepoch_time.
4259                  */
4260                 NFS_TIME_T_CONVERT(vap->va_atime.tv_sec,
4261                     sap->atime.atime.seconds);
4262                 vap->va_atime.tv_nsec = (uint32_t)sap->atime.atime.nseconds;
4263                 vap->va_mask |= AT_ATIME;
4264         } else if (sap->atime.set_it == SET_TO_SERVER_TIME) {
4265                 gethrestime(&vap->va_atime);
4266                 vap->va_mask |= AT_ATIME;
4267         }
4268         if (sap->mtime.set_it == SET_TO_CLIENT_TIME) {
4269 #ifndef _LP64
4270                 /* check time validity */
4271                 if (!NFS3_TIME_OK(sap->mtime.mtime.seconds))
4272                         return (EOVERFLOW);
4273 #endif
4274                 /*
4275                  * nfs protocol defines times as unsigned so don't extend sign,
4276                  * unless sysadmin set nfs_allow_preepoch_time.
4277                  */
4278                 NFS_TIME_T_CONVERT(vap->va_mtime.tv_sec,
4279                     sap->mtime.mtime.seconds);
4280                 vap->va_mtime.tv_nsec = (uint32_t)sap->mtime.mtime.nseconds;
4281                 vap->va_mask |= AT_MTIME;
4282         } else if (sap->mtime.set_it == SET_TO_SERVER_TIME) {
4283                 gethrestime(&vap->va_mtime);
4284                 vap->va_mask |= AT_MTIME;
4285         }
4286 
4287         return (0);
4288 }
4289 
4290 static const ftype3 vt_to_nf3[] = {
4291         0, NF3REG, NF3DIR, NF3BLK, NF3CHR, NF3LNK, NF3FIFO, 0, 0, NF3SOCK, 0
4292 };
4293 
4294 static int
4295 vattr_to_fattr3(struct vattr *vap, fattr3 *fap)
4296 {
4297 
4298         ASSERT(vap->va_type >= VNON && vap->va_type <= VBAD);
4299         /* Return error if time or size overflow */
4300         if (! (NFS_VAP_TIME_OK(vap) && NFS3_SIZE_OK(vap->va_size))) {
4301                 return (EOVERFLOW);
4302         }
4303         fap->type = vt_to_nf3[vap->va_type];
4304         fap->mode = (mode3)(vap->va_mode & MODEMASK);
4305         fap->nlink = (uint32)vap->va_nlink;
4306         if (vap->va_uid == UID_NOBODY)
4307                 fap->uid = (uid3)NFS_UID_NOBODY;
4308         else
4309                 fap->uid = (uid3)vap->va_uid;
4310         if (vap->va_gid == GID_NOBODY)
4311                 fap->gid = (gid3)NFS_GID_NOBODY;
4312         else
4313                 fap->gid = (gid3)vap->va_gid;
4314         fap->size = (size3)vap->va_size;
4315         fap->used = (size3)DEV_BSIZE * (size3)vap->va_nblocks;
4316         fap->rdev.specdata1 = (uint32)getmajor(vap->va_rdev);
4317         fap->rdev.specdata2 = (uint32)getminor(vap->va_rdev);
4318         fap->fsid = (uint64)vap->va_fsid;
4319         fap->fileid = (fileid3)vap->va_nodeid;
4320         fap->atime.seconds = vap->va_atime.tv_sec;
4321         fap->atime.nseconds = vap->va_atime.tv_nsec;
4322         fap->mtime.seconds = vap->va_mtime.tv_sec;
4323         fap->mtime.nseconds = vap->va_mtime.tv_nsec;
4324         fap->ctime.seconds = vap->va_ctime.tv_sec;
4325         fap->ctime.nseconds = vap->va_ctime.tv_nsec;
4326         return (0);
4327 }
4328 
4329 static int
4330 vattr_to_wcc_attr(struct vattr *vap, wcc_attr *wccap)
4331 {
4332 
4333         /* Return error if time or size overflow */
4334         if (!(NFS_TIME_T_OK(vap->va_mtime.tv_sec) &&
4335             NFS_TIME_T_OK(vap->va_ctime.tv_sec) &&
4336             NFS3_SIZE_OK(vap->va_size))) {
4337                 return (EOVERFLOW);
4338         }
4339         wccap->size = (size3)vap->va_size;
4340         wccap->mtime.seconds = vap->va_mtime.tv_sec;
4341         wccap->mtime.nseconds = vap->va_mtime.tv_nsec;
4342         wccap->ctime.seconds = vap->va_ctime.tv_sec;
4343         wccap->ctime.nseconds = vap->va_ctime.tv_nsec;
4344         return (0);
4345 }
4346 
4347 static void
4348 vattr_to_pre_op_attr(struct vattr *vap, pre_op_attr *poap)
4349 {
4350 
4351         /* don't return attrs if time overflow */
4352         if ((vap != NULL) && !vattr_to_wcc_attr(vap, &poap->attr)) {
4353                 poap->attributes = TRUE;
4354         } else
4355                 poap->attributes = FALSE;
4356 }
4357 
4358 void
4359 vattr_to_post_op_attr(struct vattr *vap, post_op_attr *poap)
4360 {
4361 
4362         /* don't return attrs if time overflow */
4363         if ((vap != NULL) && !vattr_to_fattr3(vap, &poap->attr)) {
4364                 poap->attributes = TRUE;
4365         } else
4366                 poap->attributes = FALSE;
4367 }
4368 
4369 static void
4370 vattr_to_wcc_data(struct vattr *bvap, struct vattr *avap, wcc_data *wccp)
4371 {
4372         vattr_to_pre_op_attr(bvap, &wccp->before);
4373         vattr_to_post_op_attr(avap, &wccp->after);
4374 }
4375 
4376 static int
4377 rdma_setup_read_data3(READ3args *args, READ3resok *rok)
4378 {
4379         struct clist    *wcl;
4380         int             wlist_len;
4381         count3          count = rok->count;
4382 
4383         wcl = args->wlist;
4384         if (rdma_setup_read_chunks(wcl, count, &wlist_len) == FALSE)
4385                 return (FALSE);
4386 
4387         wcl = args->wlist;
4388         rok->wlist_len = wlist_len;
4389         rok->wlist = wcl;
4390         return (TRUE);
4391 }
4392 
4393 void
4394 rfs3_srv_zone_init(nfs_globals_t *ng)
4395 {
4396         nfs3_srv_t *ns;
4397         struct rfs3_verf_overlay {
4398                 uint_t id; /* a "unique" identifier */
4399                 int ts; /* a unique timestamp */
4400         } *verfp;
4401         timestruc_t now;
4402 
4403         ns = kmem_zalloc(sizeof (*ns), KM_SLEEP);
4404 
4405         /*
4406          * The following algorithm attempts to find a unique verifier
4407          * to be used as the write verifier returned from the server
4408          * to the client.  It is important that this verifier change
4409          * whenever the server reboots.  Of secondary importance, it
4410          * is important for the verifier to be unique between two
4411          * different servers.
4412          *
4413          * Thus, an attempt is made to use the system hostid and the
4414          * current time in seconds when the nfssrv kernel module is
4415          * loaded.  It is assumed that an NFS server will not be able
4416          * to boot and then to reboot in less than a second.  If the
4417          * hostid has not been set, then the current high resolution
4418          * time is used.  This will ensure different verifiers each
4419          * time the server reboots and minimize the chances that two
4420          * different servers will have the same verifier.
4421          */
4422 
4423 #ifndef lint
4424         /*
4425          * We ASSERT that this constant logic expression is
4426          * always true because in the past, it wasn't.
4427          */
4428         ASSERT(sizeof (*verfp) <= sizeof (ns->write3verf));
4429 #endif
4430 
4431         gethrestime(&now);
4432         verfp = (struct rfs3_verf_overlay *)&ns->write3verf;
4433         verfp->ts = (int)now.tv_sec;
4434         verfp->id = zone_get_hostid(NULL);
4435 
4436         if (verfp->id == 0)
4437                 verfp->id = (uint_t)now.tv_nsec;
4438 
4439         ng->nfs3_srv = ns;
4440 }
4441 
4442 void
4443 rfs3_srv_zone_fini(nfs_globals_t *ng)
4444 {
4445         nfs3_srv_t *ns = ng->nfs3_srv;
4446 
4447         ng->nfs3_srv = NULL;
4448 
4449         kmem_free(ns, sizeof (*ns));
4450 }
4451 
4452 void
4453 rfs3_srvrinit(void)
4454 {
4455         nfs3_srv_caller_id = fs_new_caller_id();
4456 }
4457 
4458 void
4459 rfs3_srvrfini(void)
4460 {
4461         /* Nothing to do */
4462 }