Print this page
NEX-16712 NFS dtrace providers do not support per-share filtering
Reviewed by: Evan Layton <evan.layton@nexenta.com>
Reviewed by: Gordon Ross <gordon.ross@nexenta.com>
Reviewed by: Yuri Pankon <yuri.pankov@nexenta.com>
NEX-15279 support NFS server in zone
NEX-15520 online NFS shares cause zoneadm halt to hang in nfs_export_zone_fini
Portions contributed by: Dan Kruchinin dan.kruchinin@nexenta.com
Portions contributed by: Stepan Zastupov stepan.zastupov@gmail.com
Reviewed by: Joyce McIntosh <joyce.mcintosh@nexenta.com>
Reviewed by: Rob Gittins <rob.gittins@nexenta.com>
Reviewed by: Gordon Ross <gordon.ross@nexenta.com>
NEX-9275 Got "bad mutex" panic when run IO to nfs share from clients
Reviewed by: Yuri Pankov <yuri.pankov@nexenta.com>
Reviewed by: Sanjay Nadkarni <sanjay.nadkarni@nexenta.com>
Reviewed by: Rob Gittins <rob.gittins@nexenta.com>
NEX-3524 CLONE - Port NEX-3505 "wrong authentication" messages with root=@0.0.0.0/0 set, result in loss of client access
Reviewed by: Marcel Telka <marcel.telka@nexenta.com>
NEX-3533 CLONE - Port NEX-3019 NFSv3 writes underneath mounted filesystem to directory
Reviewed by: Dan Fields <dan.fields@nexenta.com>
NEX-3095 Issues related to NFS nohide
Reviewed by: Dan Fields <dan.fields@nexenta.com>
NEX-1128 NFS server: Generic uid and gid remapping for AUTH_SYS
Reviewed by: Jan Kryl <jan.kryl@nexenta.com>
OS-20 share_nfs(1m) charset handling is unreliable
OS-22 Page fault at nfscmd_dropped_entrysize+0x1e()
OS-23 NFSv2/3/4: READDIR responses are inconsistent when charset conversion fails
OS-24 rfs3_readdir(): Issues related to nfscmd_convdirent()
Reviewed by: Jan Kryl <jan.kryl@nexenta.com>
Reviewed by: Gordon Ross <gordon.ross@nexenta.com>
closes #12112 rb3823 - nfs-nohide: lookup("..") for submount should be correct
re #3541 rb11254 - nfs nohide - "nfssrv: need ability to go to submounts for v3 and v2 protocols"

@@ -18,18 +18,19 @@
  *
  * CDDL HEADER END
  */
 
 /*
- * Copyright 2015 Nexenta Systems, Inc.  All rights reserved.
+ * Copyright 2018 Nexenta Systems, Inc.
  * Copyright (c) 1994, 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2013 by Delphix. All rights reserved.
  */
 
 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
 /* All Rights Reserved */
 
+
 #include <sys/param.h>
 #include <sys/types.h>
 #include <sys/systm.h>
 #include <sys/cred.h>
 #include <sys/buf.h>

@@ -66,17 +67,22 @@
 
 #include <inet/ip.h>
 #include <inet/ip6.h>
 
 /*
+ * Zone global variables of NFSv3 server
+ */
+typedef struct nfs3_srv {
+        writeverf3      write3verf;
+} nfs3_srv_t;
+
+/*
  * These are the interface routines for the server side of the
  * Network File System.  See the NFS version 3 protocol specification
  * for a description of this interface.
  */
 
-static writeverf3 write3verf;
-
 static int      sattr3_to_vattr(sattr3 *, struct vattr *);
 static int      vattr_to_fattr3(struct vattr *, fattr3 *);
 static int      vattr_to_wcc_attr(struct vattr *, wcc_attr *);
 static void     vattr_to_pre_op_attr(struct vattr *, pre_op_attr *);
 static void     vattr_to_wcc_data(struct vattr *, struct vattr *, wcc_data *);

@@ -83,10 +89,11 @@
 static int      rdma_setup_read_data3(READ3args *, READ3resok *);
 
 extern int nfs_loaned_buffers;
 
 u_longlong_t nfs3_srv_caller_id;
+static zone_key_t rfs3_zone_key;
 
 /* ARGSUSED */
 void
 rfs3_getattr(GETATTR3args *args, GETATTR3res *resp, struct exportinfo *exi,
     struct svc_req *req, cred_t *cr, bool_t ro)

@@ -95,12 +102,13 @@
         vnode_t *vp;
         struct vattr va;
 
         vp = nfs3_fhtovp(&args->object, exi);
 
-        DTRACE_NFSV3_4(op__getattr__start, struct svc_req *, req,
-            cred_t *, cr, vnode_t *, vp, GETATTR3args *, args);
+        DTRACE_NFSV3_5(op__getattr__start, struct svc_req *, req,
+            cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
+            GETATTR3args *, args);
 
         if (vp == NULL) {
                 error = ESTALE;
                 goto out;
         }

@@ -117,12 +125,13 @@
                 error = vattr_to_fattr3(&va, &resp->resok.obj_attributes);
                 if (error)
                         goto out;
                 resp->status = NFS3_OK;
 
-                DTRACE_NFSV3_4(op__getattr__done, struct svc_req *, req,
-                    cred_t *, cr, vnode_t *, vp, GETATTR3res *, resp);
+                DTRACE_NFSV3_5(op__getattr__done, struct svc_req *, req,
+                    cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
+                    GETATTR3res *, resp);
 
                 VN_RELE(vp);
 
                 return;
         }

@@ -132,21 +141,21 @@
                 curthread->t_flag &= ~T_WOULDBLOCK;
                 resp->status = NFS3ERR_JUKEBOX;
         } else
                 resp->status = puterrno3(error);
 
-        DTRACE_NFSV3_4(op__getattr__done, struct svc_req *, req,
-            cred_t *, cr, vnode_t *, vp, GETATTR3res *, resp);
+        DTRACE_NFSV3_5(op__getattr__done, struct svc_req *, req,
+            cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
+            GETATTR3res *, resp);
 
         if (vp != NULL)
                 VN_RELE(vp);
 }
 
 void *
 rfs3_getattr_getfh(GETATTR3args *args)
 {
-
         return (&args->object);
 }
 
 void
 rfs3_setattr(SETATTR3args *args, SETATTR3res *resp, struct exportinfo *exi,

@@ -166,12 +175,13 @@
         bvap = NULL;
         avap = NULL;
 
         vp = nfs3_fhtovp(&args->object, exi);
 
-        DTRACE_NFSV3_4(op__setattr__start, struct svc_req *, req,
-            cred_t *, cr, vnode_t *, vp, SETATTR3args *, args);
+        DTRACE_NFSV3_5(op__setattr__start, struct svc_req *, req,
+            cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
+            SETATTR3args *, args);
 
         if (vp == NULL) {
                 error = ESTALE;
                 goto out;
         }

@@ -328,12 +338,13 @@
                 nbl_end_crit(vp);
 
         resp->status = NFS3_OK;
         vattr_to_wcc_data(bvap, avap, &resp->resok.obj_wcc);
 
-        DTRACE_NFSV3_4(op__setattr__done, struct svc_req *, req,
-            cred_t *, cr, vnode_t *, vp, SETATTR3res *, resp);
+        DTRACE_NFSV3_5(op__setattr__done, struct svc_req *, req,
+            cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
+            SETATTR3res *, resp);
 
         VN_RELE(vp);
 
         return;
 

@@ -342,12 +353,13 @@
                 curthread->t_flag &= ~T_WOULDBLOCK;
                 resp->status = NFS3ERR_JUKEBOX;
         } else
                 resp->status = puterrno3(error);
 out1:
-        DTRACE_NFSV3_4(op__setattr__done, struct svc_req *, req,
-            cred_t *, cr, vnode_t *, vp, SETATTR3res *, resp);
+        DTRACE_NFSV3_5(op__setattr__done, struct svc_req *, req,
+            cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
+            SETATTR3res *, resp);
 
         if (vp != NULL) {
                 if (in_crit)
                         nbl_end_crit(vp);
                 VN_RELE(vp);

@@ -356,11 +368,10 @@
 }
 
 void *
 rfs3_setattr_getfh(SETATTR3args *args)
 {
-
         return (&args->object);
 }
 
 /* ARGSUSED */
 void

@@ -380,25 +391,30 @@
         struct sockaddr *ca;
         char *name = NULL;
 
         dvap = NULL;
 
+        if (exi != NULL)
+                exi_hold(exi);
+
         /*
          * Allow lookups from the root - the default
          * location of the public filehandle.
          */
         if (exi != NULL && (exi->exi_export.ex_flags & EX_PUBLIC)) {
-                dvp = rootdir;
+                dvp = ZONE_ROOTVP();
                 VN_HOLD(dvp);
 
-                DTRACE_NFSV3_4(op__lookup__start, struct svc_req *, req,
-                    cred_t *, cr, vnode_t *, dvp, LOOKUP3args *, args);
+                DTRACE_NFSV3_5(op__lookup__start, struct svc_req *, req,
+                    cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
+                    LOOKUP3args *, args);
         } else {
                 dvp = nfs3_fhtovp(&args->what.dir, exi);
 
-                DTRACE_NFSV3_4(op__lookup__start, struct svc_req *, req,
-                    cred_t *, cr, vnode_t *, dvp, LOOKUP3args *, args);
+                DTRACE_NFSV3_5(op__lookup__start, struct svc_req *, req,
+                    cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
+                    LOOKUP3args *, args);
 
                 if (dvp == NULL) {
                         error = ESTALE;
                         goto out;
                 }

@@ -418,13 +434,24 @@
         }
 
         fhp = &args->what.dir;
         if (strcmp(args->what.name, "..") == 0 &&
             EQFID(&exi->exi_fid, FH3TOFIDP(fhp))) {
+                if ((exi->exi_export.ex_flags & EX_NOHIDE) &&
+                    (dvp->v_flag & VROOT)) {
+                        /*
+                         * special case for ".." and 'nohide'exported root
+                         */
+                        if (rfs_climb_crossmnt(&dvp, &exi, cr) != 0) {
+                                resp->status = NFS3ERR_ACCES;
+                                goto out1;
+                        }
+                } else {
                 resp->status = NFS3ERR_NOENT;
                 goto out1;
         }
+        }
 
         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
         name = nfscmd_convname(ca, exi, args->what.name,
             NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
 

@@ -437,14 +464,16 @@
          * If the public filehandle is used then allow
          * a multi-component lookup
          */
         if (PUBLIC_FH3(&args->what.dir)) {
                 publicfh_flag = TRUE;
+
+                exi_rele(&exi);
+
                 error = rfs_publicfh_mclookup(name, dvp, cr, &vp,
                     &exi, &sec);
-                if (error && exi != NULL)
-                        exi_rele(exi); /* See comment below Re: publicfh_flag */
+
                 /*
                  * Since WebNFS may bypass MOUNT, we need to ensure this
                  * request didn't come from an unlabeled admin_low client.
                  */
                 if (is_system_labeled() && error == 0) {

@@ -462,12 +491,10 @@
                         }
                         tp = find_tpc(ipaddr, addr_type, B_FALSE);
                         if (tp == NULL || tp->tpc_tp.tp_doi !=
                             l_admin_low->tsl_doi || tp->tpc_tp.host_type !=
                             SUN_CIPSO) {
-                                if (exi != NULL)
-                                        exi_rele(exi);
                                 VN_RELE(vp);
                                 error = EACCES;
                         }
                         if (tp != NULL)
                                 TPC_RELE(tp);

@@ -478,10 +505,16 @@
         }
 
         if (name != args->what.name)
                 kmem_free(name, MAXPATHLEN + 1);
 
+        if (error == 0 && vn_ismntpt(vp)) {
+                error = rfs_cross_mnt(&vp, &exi);
+                if (error)
+                        VN_RELE(vp);
+        }
+
         if (is_system_labeled() && error == 0) {
                 bslabel_t *clabel = req->rq_label;
 
                 ASSERT(clabel != NULL);
                 DTRACE_PROBE2(tx__rfs3__log__info__oplookup__clabel, char *,

@@ -488,12 +521,10 @@
                     "got client label from request(1)", struct svc_req *, req);
 
                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
                         if (!do_rfs_label_check(clabel, dvp,
                             DOMINANCE_CHECK, exi)) {
-                                if (publicfh_flag && exi != NULL)
-                                        exi_rele(exi);
                                 VN_RELE(vp);
                                 error = EACCES;
                         }
                 }
         }

@@ -510,27 +541,19 @@
                 error = makefh3(&resp->resok.object, vp, exi);
                 if (!error && publicfh_flag && !chk_clnt_sec(exi, req))
                         auth_weak = TRUE;
         }
 
-        /*
-         * If publicfh_flag is true then we have called rfs_publicfh_mclookup
-         * and have obtained a new exportinfo in exi which needs to be
-         * released. Note that the original exportinfo pointed to by exi
-         * will be released by the caller, common_dispatch.
-         */
-        if (publicfh_flag)
-                exi_rele(exi);
-
         if (error) {
                 VN_RELE(vp);
                 goto out;
         }
 
         va.va_mask = AT_ALL;
         vap = rfs4_delegated_getattr(vp, &va, 0, cr) ? NULL : &va;
 
+        exi_rele(&exi);
         VN_RELE(vp);
 
         resp->status = NFS3_OK;
         vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
         vattr_to_post_op_attr(dvap, &resp->resok.dir_attributes);

@@ -541,12 +564,13 @@
          * Then set RPC status to AUTH_TOOWEAK in common_dispatch.
          */
         if (auth_weak)
                 resp->status = (enum nfsstat3)WNFSERR_CLNT_FLAVOR;
 
-        DTRACE_NFSV3_4(op__lookup__done, struct svc_req *, req,
-            cred_t *, cr, vnode_t *, dvp, LOOKUP3res *, resp);
+        DTRACE_NFSV3_5(op__lookup__done, struct svc_req *, req,
+            cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
+            LOOKUP3res *, resp);
         VN_RELE(dvp);
 
         return;
 
 out:

@@ -554,27 +578,29 @@
                 curthread->t_flag &= ~T_WOULDBLOCK;
                 resp->status = NFS3ERR_JUKEBOX;
         } else
                 resp->status = puterrno3(error);
 out1:
-        DTRACE_NFSV3_4(op__lookup__done, struct svc_req *, req,
-            cred_t *, cr, vnode_t *, dvp, LOOKUP3res *, resp);
+        if (exi != NULL)
+                exi_rele(&exi);
 
+        DTRACE_NFSV3_5(op__lookup__done, struct svc_req *, req,
+            cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
+            LOOKUP3res *, resp);
+
         if (dvp != NULL)
                 VN_RELE(dvp);
         vattr_to_post_op_attr(dvap, &resp->resfail.dir_attributes);
 
 }
 
 void *
 rfs3_lookup_getfh(LOOKUP3args *args)
 {
-
         return (&args->what.dir);
 }
 
-/* ARGSUSED */
 void
 rfs3_access(ACCESS3args *args, ACCESS3res *resp, struct exportinfo *exi,
     struct svc_req *req, cred_t *cr, bool_t ro)
 {
         int error;

@@ -588,12 +614,13 @@
 
         vap = NULL;
 
         vp = nfs3_fhtovp(&args->object, exi);
 
-        DTRACE_NFSV3_4(op__access__start, struct svc_req *, req,
-            cred_t *, cr, vnode_t *, vp, ACCESS3args *, args);
+        DTRACE_NFSV3_5(op__access__start, struct svc_req *, req,
+            cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
+            ACCESS3args *, args);
 
         if (vp == NULL) {
                 error = ESTALE;
                 goto out;
         }

@@ -699,12 +726,13 @@
         vap = rfs4_delegated_getattr(vp, &va, 0, cr) ? NULL : &va;
 
         resp->status = NFS3_OK;
         vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
 
-        DTRACE_NFSV3_4(op__access__done, struct svc_req *, req,
-            cred_t *, cr, vnode_t *, vp, ACCESS3res *, resp);
+        DTRACE_NFSV3_5(op__access__done, struct svc_req *, req,
+            cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
+            ACCESS3res *, resp);
 
         VN_RELE(vp);
 
         return;
 

@@ -712,21 +740,21 @@
         if (curthread->t_flag & T_WOULDBLOCK) {
                 curthread->t_flag &= ~T_WOULDBLOCK;
                 resp->status = NFS3ERR_JUKEBOX;
         } else
                 resp->status = puterrno3(error);
-        DTRACE_NFSV3_4(op__access__done, struct svc_req *, req,
-            cred_t *, cr, vnode_t *, vp, ACCESS3res *, resp);
+        DTRACE_NFSV3_5(op__access__done, struct svc_req *, req,
+            cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
+            ACCESS3res *, resp);
         if (vp != NULL)
                 VN_RELE(vp);
         vattr_to_post_op_attr(vap, &resp->resfail.obj_attributes);
 }
 
 void *
 rfs3_access_getfh(ACCESS3args *args)
 {
-
         return (&args->object);
 }
 
 /* ARGSUSED */
 void

@@ -746,12 +774,13 @@
 
         vap = NULL;
 
         vp = nfs3_fhtovp(&args->symlink, exi);
 
-        DTRACE_NFSV3_4(op__readlink__start, struct svc_req *, req,
-            cred_t *, cr, vnode_t *, vp, READLINK3args *, args);
+        DTRACE_NFSV3_5(op__readlink__start, struct svc_req *, req,
+            cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
+            READLINK3args *, args);
 
         if (vp == NULL) {
                 error = ESTALE;
                 goto out;
         }

@@ -867,12 +896,13 @@
 
         resp->status = NFS3_OK;
         vattr_to_post_op_attr(vap, &resp->resok.symlink_attributes);
         resp->resok.data = name;
 
-        DTRACE_NFSV3_4(op__readlink__done, struct svc_req *, req,
-            cred_t *, cr, vnode_t *, vp, READLINK3res *, resp);
+        DTRACE_NFSV3_5(op__readlink__done, struct svc_req *, req,
+            cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
+            READLINK3res *, resp);
         VN_RELE(vp);
 
         if (name != data)
                 kmem_free(data, MAXPATHLEN + 1);
 

@@ -883,28 +913,27 @@
                 curthread->t_flag &= ~T_WOULDBLOCK;
                 resp->status = NFS3ERR_JUKEBOX;
         } else
                 resp->status = puterrno3(error);
 out1:
-        DTRACE_NFSV3_4(op__readlink__done, struct svc_req *, req,
-            cred_t *, cr, vnode_t *, vp, READLINK3res *, resp);
+        DTRACE_NFSV3_5(op__readlink__done, struct svc_req *, req,
+            cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
+            READLINK3res *, resp);
         if (vp != NULL)
                 VN_RELE(vp);
         vattr_to_post_op_attr(vap, &resp->resfail.symlink_attributes);
 }
 
 void *
 rfs3_readlink_getfh(READLINK3args *args)
 {
-
         return (&args->symlink);
 }
 
 void
 rfs3_readlink_free(READLINK3res *resp)
 {
-
         if (resp->status == NFS3_OK)
                 kmem_free(resp->resok.data, MAXPATHLEN + 1);
 }
 
 /*

@@ -934,13 +963,15 @@
 
         vap = NULL;
 
         vp = nfs3_fhtovp(&args->file, exi);
 
-        DTRACE_NFSV3_4(op__read__start, struct svc_req *, req,
-            cred_t *, cr, vnode_t *, vp, READ3args *, args);
+        DTRACE_NFSV3_5(op__read__start, struct svc_req *, req,
+            cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
+            READ3args *, args);
 
+
         if (vp == NULL) {
                 error = ESTALE;
                 goto out;
         }
 

@@ -1190,12 +1221,13 @@
                 resp->resok.data.data_val = (caddr_t)mp->b_datap->db_base;
                 (resp->resok).wlist = NULL;
         }
 
 done:
-        DTRACE_NFSV3_4(op__read__done, struct svc_req *, req,
-            cred_t *, cr, vnode_t *, vp, READ3res *, resp);
+        DTRACE_NFSV3_5(op__read__done, struct svc_req *, req,
+            cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
+            READ3res *, resp);
 
         VN_RELE(vp);
 
         if (iovp != NULL)
                 kmem_free(iovp, iovcnt * sizeof (struct iovec));

@@ -1207,12 +1239,13 @@
                 curthread->t_flag &= ~T_WOULDBLOCK;
                 resp->status = NFS3ERR_JUKEBOX;
         } else
                 resp->status = puterrno3(error);
 out1:
-        DTRACE_NFSV3_4(op__read__done, struct svc_req *, req,
-            cred_t *, cr, vnode_t *, vp, READ3res *, resp);
+        DTRACE_NFSV3_5(op__read__done, struct svc_req *, req,
+            cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
+            READ3res *, resp);
 
         if (vp != NULL) {
                 if (need_rwunlock)
                         VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
                 if (in_crit)

@@ -1238,11 +1271,10 @@
 }
 
 void *
 rfs3_read_getfh(READ3args *args)
 {
-
         return (&args->file);
 }
 
 #define MAX_IOVECS      12
 

@@ -1253,10 +1285,11 @@
 
 void
 rfs3_write(WRITE3args *args, WRITE3res *resp, struct exportinfo *exi,
     struct svc_req *req, cred_t *cr, bool_t ro)
 {
+        nfs3_srv_t *ns;
         int error;
         vnode_t *vp;
         struct vattr *bvap = NULL;
         struct vattr bva;
         struct vattr *avap = NULL;

@@ -1273,18 +1306,20 @@
         int rwlock_ret = -1;
         caller_context_t ct;
 
         vp = nfs3_fhtovp(&args->file, exi);
 
-        DTRACE_NFSV3_4(op__write__start, struct svc_req *, req,
-            cred_t *, cr, vnode_t *, vp, WRITE3args *, args);
+        DTRACE_NFSV3_5(op__write__start, struct svc_req *, req,
+            cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
+            WRITE3args *, args);
 
         if (vp == NULL) {
                 error = ESTALE;
                 goto err;
         }
 
+        ns = zone_getspecific(rfs3_zone_key, curzone);
         if (is_system_labeled()) {
                 bslabel_t *clabel = req->rq_label;
 
                 ASSERT(clabel != NULL);
                 DTRACE_PROBE2(tx__rfs3__log__info__opwrite__clabel, char *,

@@ -1368,11 +1403,11 @@
         if (args->count == 0) {
                 resp->status = NFS3_OK;
                 vattr_to_wcc_data(bvap, avap, &resp->resok.file_wcc);
                 resp->resok.count = 0;
                 resp->resok.committed = args->stable;
-                resp->resok.verf = write3verf;
+                resp->resok.verf = ns->write3verf;
                 goto out;
         }
 
         if (args->mblk != NULL) {
                 iovcnt = 0;

@@ -1470,11 +1505,11 @@
 
         resp->status = NFS3_OK;
         vattr_to_wcc_data(bvap, avap, &resp->resok.file_wcc);
         resp->resok.count = args->count - uio.uio_resid;
         resp->resok.committed = args->stable;
-        resp->resok.verf = write3verf;
+        resp->resok.verf = ns->write3verf;
         goto out;
 
 err:
         if (curthread->t_flag & T_WOULDBLOCK) {
                 curthread->t_flag &= ~T_WOULDBLOCK;

@@ -1482,12 +1517,13 @@
         } else
                 resp->status = puterrno3(error);
 err1:
         vattr_to_wcc_data(bvap, avap, &resp->resfail.file_wcc);
 out:
-        DTRACE_NFSV3_4(op__write__done, struct svc_req *, req,
-            cred_t *, cr, vnode_t *, vp, WRITE3res *, resp);
+        DTRACE_NFSV3_5(op__write__done, struct svc_req *, req,
+            cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
+            WRITE3res *, resp);
 
         if (vp != NULL) {
                 if (rwlock_ret != -1)
                         VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, &ct);
                 if (in_crit)

@@ -1497,11 +1533,10 @@
 }
 
 void *
 rfs3_write_getfh(WRITE3args *args)
 {
-
         return (&args->file);
 }
 
 void
 rfs3_create(CREATE3args *args, CREATE3res *resp, struct exportinfo *exi,

@@ -1528,12 +1563,13 @@
         dbvap = NULL;
         davap = NULL;
 
         dvp = nfs3_fhtovp(&args->where.dir, exi);
 
-        DTRACE_NFSV3_4(op__create__start, struct svc_req *, req,
-            cred_t *, cr, vnode_t *, dvp, CREATE3args *, args);
+        DTRACE_NFSV3_5(op__create__start, struct svc_req *, req,
+            cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
+            CREATE3args *, args);
 
         if (dvp == NULL) {
                 error = ESTALE;
                 goto out;
         }

@@ -1555,10 +1591,15 @@
         if (rdonly(ro, dvp)) {
                 resp->status = NFS3ERR_ROFS;
                 goto out1;
         }
 
+        if (protect_zfs_mntpt(dvp) != 0) {
+                resp->status = NFS3ERR_ACCES;
+                goto out1;
+        }
+
         if (is_system_labeled()) {
                 bslabel_t *clabel = req->rq_label;
 
                 ASSERT(clabel != NULL);
                 DTRACE_PROBE2(tx__rfs3__log__info__opcreate__clabel, char *,

@@ -1828,12 +1869,13 @@
 
         resp->status = NFS3_OK;
         vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
         vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
 
-        DTRACE_NFSV3_4(op__create__done, struct svc_req *, req,
-            cred_t *, cr, vnode_t *, dvp, CREATE3res *, resp);
+        DTRACE_NFSV3_5(op__create__done, struct svc_req *, req,
+            cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
+            CREATE3res *, resp);
 
         VN_RELE(dvp);
         return;
 
 out:

@@ -1841,12 +1883,13 @@
                 curthread->t_flag &= ~T_WOULDBLOCK;
                 resp->status = NFS3ERR_JUKEBOX;
         } else
                 resp->status = puterrno3(error);
 out1:
-        DTRACE_NFSV3_4(op__create__done, struct svc_req *, req,
-            cred_t *, cr, vnode_t *, dvp, CREATE3res *, resp);
+        DTRACE_NFSV3_5(op__create__done, struct svc_req *, req,
+            cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
+            CREATE3res *, resp);
 
         if (name != NULL && name != args->where.name)
                 kmem_free(name, MAXPATHLEN + 1);
 
         if (tvp != NULL) {

@@ -1860,11 +1903,10 @@
 }
 
 void *
 rfs3_create_getfh(CREATE3args *args)
 {
-
         return (&args->where.dir);
 }
 
 void
 rfs3_mkdir(MKDIR3args *args, MKDIR3res *resp, struct exportinfo *exi,

@@ -1885,12 +1927,13 @@
         dbvap = NULL;
         davap = NULL;
 
         dvp = nfs3_fhtovp(&args->where.dir, exi);
 
-        DTRACE_NFSV3_4(op__mkdir__start, struct svc_req *, req,
-            cred_t *, cr, vnode_t *, dvp, MKDIR3args *, args);
+        DTRACE_NFSV3_5(op__mkdir__start, struct svc_req *, req,
+            cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
+            MKDIR3args *, args);
 
         if (dvp == NULL) {
                 error = ESTALE;
                 goto out;
         }

@@ -1912,10 +1955,15 @@
         if (rdonly(ro, dvp)) {
                 resp->status = NFS3ERR_ROFS;
                 goto out1;
         }
 
+        if (protect_zfs_mntpt(dvp) != 0) {
+                resp->status = NFS3ERR_ACCES;
+                goto out1;
+        }
+
         if (is_system_labeled()) {
                 bslabel_t *clabel = req->rq_label;
 
                 ASSERT(clabel != NULL);
                 DTRACE_PROBE2(tx__rfs3__log__info__opmkdir__clabel, char *,

@@ -1985,12 +2033,13 @@
 
         resp->status = NFS3_OK;
         vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
         vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
 
-        DTRACE_NFSV3_4(op__mkdir__done, struct svc_req *, req,
-            cred_t *, cr, vnode_t *, dvp, MKDIR3res *, resp);
+        DTRACE_NFSV3_5(op__mkdir__done, struct svc_req *, req,
+            cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
+            MKDIR3res *, resp);
         VN_RELE(dvp);
 
         return;
 
 out:

@@ -1998,21 +2047,21 @@
                 curthread->t_flag &= ~T_WOULDBLOCK;
                 resp->status = NFS3ERR_JUKEBOX;
         } else
                 resp->status = puterrno3(error);
 out1:
-        DTRACE_NFSV3_4(op__mkdir__done, struct svc_req *, req,
-            cred_t *, cr, vnode_t *, dvp, MKDIR3res *, resp);
+        DTRACE_NFSV3_5(op__mkdir__done, struct svc_req *, req,
+            cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
+            MKDIR3res *, resp);
         if (dvp != NULL)
                 VN_RELE(dvp);
         vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc);
 }
 
 void *
 rfs3_mkdir_getfh(MKDIR3args *args)
 {
-
         return (&args->where.dir);
 }
 
 void
 rfs3_symlink(SYMLINK3args *args, SYMLINK3res *resp, struct exportinfo *exi,

@@ -2034,12 +2083,13 @@
         dbvap = NULL;
         davap = NULL;
 
         dvp = nfs3_fhtovp(&args->where.dir, exi);
 
-        DTRACE_NFSV3_4(op__symlink__start, struct svc_req *, req,
-            cred_t *, cr, vnode_t *, dvp, SYMLINK3args *, args);
+        DTRACE_NFSV3_5(op__symlink__start, struct svc_req *, req,
+            cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
+            SYMLINK3args *, args);
 
         if (dvp == NULL) {
                 error = ESTALE;
                 goto err;
         }

@@ -2061,10 +2111,15 @@
         if (rdonly(ro, dvp)) {
                 resp->status = NFS3ERR_ROFS;
                 goto err1;
         }
 
+        if (protect_zfs_mntpt(dvp) != 0) {
+                resp->status = NFS3ERR_ACCES;
+                goto err1;
+        }
+
         if (is_system_labeled()) {
                 bslabel_t *clabel = req->rq_label;
 
                 ASSERT(clabel != NULL);
                 DTRACE_PROBE2(tx__rfs3__log__info__opsymlink__clabel, char *,

@@ -2172,21 +2227,21 @@
         if (name != NULL && name != args->where.name)
                 kmem_free(name, MAXPATHLEN + 1);
         if (symdata != NULL && symdata != args->symlink.symlink_data)
                 kmem_free(symdata, MAXPATHLEN + 1);
 
-        DTRACE_NFSV3_4(op__symlink__done, struct svc_req *, req,
-            cred_t *, cr, vnode_t *, dvp, SYMLINK3res *, resp);
+        DTRACE_NFSV3_5(op__symlink__done, struct svc_req *, req,
+            cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
+            SYMLINK3res *, resp);
 
         if (dvp != NULL)
                 VN_RELE(dvp);
 }
 
 void *
 rfs3_symlink_getfh(SYMLINK3args *args)
 {
-
         return (&args->where.dir);
 }
 
 void
 rfs3_mknod(MKNOD3args *args, MKNOD3res *resp, struct exportinfo *exi,

@@ -2210,12 +2265,13 @@
         dbvap = NULL;
         davap = NULL;
 
         dvp = nfs3_fhtovp(&args->where.dir, exi);
 
-        DTRACE_NFSV3_4(op__mknod__start, struct svc_req *, req,
-            cred_t *, cr, vnode_t *, dvp, MKNOD3args *, args);
+        DTRACE_NFSV3_5(op__mknod__start, struct svc_req *, req,
+            cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
+            MKNOD3args *, args);
 
         if (dvp == NULL) {
                 error = ESTALE;
                 goto out;
         }

@@ -2237,10 +2293,15 @@
         if (rdonly(ro, dvp)) {
                 resp->status = NFS3ERR_ROFS;
                 goto out1;
         }
 
+        if (protect_zfs_mntpt(dvp) != 0) {
+                resp->status = NFS3ERR_ACCES;
+                goto out1;
+        }
+
         if (is_system_labeled()) {
                 bslabel_t *clabel = req->rq_label;
 
                 ASSERT(clabel != NULL);
                 DTRACE_PROBE2(tx__rfs3__log__info__opmknod__clabel, char *,

@@ -2357,12 +2418,13 @@
 
         VN_RELE(vp);
 
         vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
         vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
-        DTRACE_NFSV3_4(op__mknod__done, struct svc_req *, req,
-            cred_t *, cr, vnode_t *, dvp, MKNOD3res *, resp);
+        DTRACE_NFSV3_5(op__mknod__done, struct svc_req *, req,
+            cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
+            MKNOD3res *, resp);
         VN_RELE(dvp);
         return;
 
 out:
         if (curthread->t_flag & T_WOULDBLOCK) {

@@ -2369,21 +2431,21 @@
                 curthread->t_flag &= ~T_WOULDBLOCK;
                 resp->status = NFS3ERR_JUKEBOX;
         } else
                 resp->status = puterrno3(error);
 out1:
-        DTRACE_NFSV3_4(op__mknod__done, struct svc_req *, req,
-            cred_t *, cr, vnode_t *, dvp, MKNOD3res *, resp);
+        DTRACE_NFSV3_5(op__mknod__done, struct svc_req *, req,
+            cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
+            MKNOD3res *, resp);
         if (dvp != NULL)
                 VN_RELE(dvp);
         vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc);
 }
 
 void *
 rfs3_mknod_getfh(MKNOD3args *args)
 {
-
         return (&args->where.dir);
 }
 
 void
 rfs3_remove(REMOVE3args *args, REMOVE3res *resp, struct exportinfo *exi,

@@ -2402,12 +2464,13 @@
         bvap = NULL;
         avap = NULL;
 
         vp = nfs3_fhtovp(&args->object.dir, exi);
 
-        DTRACE_NFSV3_4(op__remove__start, struct svc_req *, req,
-            cred_t *, cr, vnode_t *, vp, REMOVE3args *, args);
+        DTRACE_NFSV3_5(op__remove__start, struct svc_req *, req,
+            cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
+            REMOVE3args *, args);
 
         if (vp == NULL) {
                 error = ESTALE;
                 goto err;
         }

@@ -2511,12 +2574,13 @@
         } else
                 resp->status = puterrno3(error);
 err1:
         vattr_to_wcc_data(bvap, avap, &resp->resfail.dir_wcc);
 out:
-        DTRACE_NFSV3_4(op__remove__done, struct svc_req *, req,
-            cred_t *, cr, vnode_t *, vp, REMOVE3res *, resp);
+        DTRACE_NFSV3_5(op__remove__done, struct svc_req *, req,
+            cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
+            REMOVE3res *, resp);
 
         if (name != NULL && name != args->object.name)
                 kmem_free(name, MAXPATHLEN + 1);
 
         if (vp != NULL)

@@ -2524,11 +2588,10 @@
 }
 
 void *
 rfs3_remove_getfh(REMOVE3args *args)
 {
-
         return (&args->object.dir);
 }
 
 void
 rfs3_rmdir(RMDIR3args *args, RMDIR3res *resp, struct exportinfo *exi,

@@ -2546,12 +2609,13 @@
         bvap = NULL;
         avap = NULL;
 
         vp = nfs3_fhtovp(&args->object.dir, exi);
 
-        DTRACE_NFSV3_4(op__rmdir__start, struct svc_req *, req,
-            cred_t *, cr, vnode_t *, vp, RMDIR3args *, args);
+        DTRACE_NFSV3_5(op__rmdir__start, struct svc_req *, req,
+            cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
+            RMDIR3args *, args);
 
         if (vp == NULL) {
                 error = ESTALE;
                 goto err;
         }

@@ -2603,11 +2667,11 @@
         if (name == NULL) {
                 resp->status = NFS3ERR_INVAL;
                 goto err1;
         }
 
-        error = VOP_RMDIR(vp, name, rootdir, cr, NULL, 0);
+        error = VOP_RMDIR(vp, name, ZONE_ROOTVP(), cr, NULL, 0);
 
         if (name != args->object.name)
                 kmem_free(name, MAXPATHLEN + 1);
 
         ava.va_mask = AT_ALL;

@@ -2641,21 +2705,21 @@
         } else
                 resp->status = puterrno3(error);
 err1:
         vattr_to_wcc_data(bvap, avap, &resp->resfail.dir_wcc);
 out:
-        DTRACE_NFSV3_4(op__rmdir__done, struct svc_req *, req,
-            cred_t *, cr, vnode_t *, vp, RMDIR3res *, resp);
+        DTRACE_NFSV3_5(op__rmdir__done, struct svc_req *, req,
+            cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
+            RMDIR3res *, resp);
         if (vp != NULL)
                 VN_RELE(vp);
 
 }
 
 void *
 rfs3_rmdir_getfh(RMDIR3args *args)
 {
-
         return (&args->object.dir);
 }
 
 void
 rfs3_rename(RENAME3args *args, RENAME3res *resp, struct exportinfo *exi,

@@ -2687,12 +2751,13 @@
         tavap = NULL;
         tvp = NULL;
 
         fvp = nfs3_fhtovp(&args->from.dir, exi);
 
-        DTRACE_NFSV3_4(op__rename__start, struct svc_req *, req,
-            cred_t *, cr, vnode_t *, fvp, RENAME3args *, args);
+        DTRACE_NFSV3_5(op__rename__start, struct svc_req *, req,
+            cred_t *, cr, vnode_t *, fvp, struct exportinfo *, exi,
+            RENAME3args *, args);
 
         if (fvp == NULL) {
                 error = ESTALE;
                 goto err;
         }

@@ -2720,11 +2785,11 @@
         to_exi = checkexport(&fh3->fh3_fsid, FH3TOXFIDP(fh3));
         if (to_exi == NULL) {
                 resp->status = NFS3ERR_ACCES;
                 goto err1;
         }
-        exi_rele(to_exi);
+        exi_rele(&to_exi);
 
         if (to_exi != exi) {
                 resp->status = NFS3ERR_XDEV;
                 goto err1;
         }

@@ -2758,10 +2823,15 @@
         if (rdonly(ro, tvp)) {
                 resp->status = NFS3ERR_ROFS;
                 goto err1;
         }
 
+        if (protect_zfs_mntpt(tvp) != 0) {
+                resp->status = NFS3ERR_ACCES;
+                goto err1;
+        }
+
         if (is_system_labeled()) {
                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
                         if (!do_rfs_label_check(clabel, tvp, EQUALITY_CHECK,
                             exi)) {
                                 resp->status = NFS3ERR_ACCES;

@@ -2805,14 +2875,14 @@
                 resp->status = NFS3ERR_JUKEBOX;
                 goto err1;
         }
 
         /*
-         * Check for renaming over a delegated file.  Check rfs4_deleg_policy
+         * Check for renaming over a delegated file.  Check nfs4_deleg_policy
          * first to avoid VOP_LOOKUP if possible.
          */
-        if (rfs4_deleg_policy != SRV_NEVER_DELEGATE &&
+        if (nfs4_get_deleg_policy() != SRV_NEVER_DELEGATE &&
             VOP_LOOKUP(tvp, toname, &targvp, NULL, 0, NULL, cr,
             NULL, NULL, NULL) == 0) {
 
                 if (rfs4_check_delegated(FWRITE, targvp, TRUE)) {
                         VN_RELE(targvp);

@@ -2872,22 +2942,22 @@
         if (name != NULL && name != args->from.name)
                 kmem_free(name, MAXPATHLEN + 1);
         if (toname != NULL && toname != args->to.name)
                 kmem_free(toname, MAXPATHLEN + 1);
 
-        DTRACE_NFSV3_4(op__rename__done, struct svc_req *, req,
-            cred_t *, cr, vnode_t *, fvp, RENAME3res *, resp);
+        DTRACE_NFSV3_5(op__rename__done, struct svc_req *, req,
+            cred_t *, cr, vnode_t *, fvp, struct exportinfo *, exi,
+            RENAME3res *, resp);
         if (fvp != NULL)
                 VN_RELE(fvp);
         if (tvp != NULL)
                 VN_RELE(tvp);
 }
 
 void *
 rfs3_rename_getfh(RENAME3args *args)
 {
-
         return (&args->from.dir);
 }
 
 void
 rfs3_link(LINK3args *args, LINK3res *resp, struct exportinfo *exi,

@@ -2913,12 +2983,13 @@
         avap = NULL;
         dvp = NULL;
 
         vp = nfs3_fhtovp(&args->file, exi);
 
-        DTRACE_NFSV3_4(op__link__start, struct svc_req *, req,
-            cred_t *, cr, vnode_t *, vp, LINK3args *, args);
+        DTRACE_NFSV3_5(op__link__start, struct svc_req *, req,
+            cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
+            LINK3args *, args);
 
         if (vp == NULL) {
                 error = ESTALE;
                 goto out;
         }

@@ -2930,11 +3001,11 @@
         to_exi = checkexport(&fh3->fh3_fsid, FH3TOXFIDP(fh3));
         if (to_exi == NULL) {
                 resp->status = NFS3ERR_ACCES;
                 goto out1;
         }
-        exi_rele(to_exi);
+        exi_rele(&to_exi);
 
         if (to_exi != exi) {
                 resp->status = NFS3ERR_XDEV;
                 goto out1;
         }

@@ -2982,10 +3053,15 @@
         if (rdonly(ro, dvp)) {
                 resp->status = NFS3ERR_ROFS;
                 goto out1;
         }
 
+        if (protect_zfs_mntpt(dvp) != 0) {
+                resp->status = NFS3ERR_ACCES;
+                goto out1;
+        }
+
         if (is_system_labeled()) {
                 DTRACE_PROBE2(tx__rfs3__log__info__oplinkdir__clabel, char *,
                     "got client label from request(1)", struct svc_req *, req);
 
                 if (!blequal(&l_admin_low->tsl_label, clabel)) {

@@ -3026,12 +3102,13 @@
 
         resp->status = NFS3_OK;
         vattr_to_post_op_attr(vap, &resp->resok.file_attributes);
         vattr_to_wcc_data(bvap, avap, &resp->resok.linkdir_wcc);
 
-        DTRACE_NFSV3_4(op__link__done, struct svc_req *, req,
-            cred_t *, cr, vnode_t *, vp, LINK3res *, resp);
+        DTRACE_NFSV3_5(op__link__done, struct svc_req *, req,
+            cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
+            LINK3res *, resp);
 
         VN_RELE(vp);
 
         return;
 

@@ -3043,12 +3120,13 @@
                 resp->status = puterrno3(error);
 out1:
         if (name != NULL && name != args->link.name)
                 kmem_free(name, MAXPATHLEN + 1);
 
-        DTRACE_NFSV3_4(op__link__done, struct svc_req *, req,
-            cred_t *, cr, vnode_t *, vp, LINK3res *, resp);
+        DTRACE_NFSV3_5(op__link__done, struct svc_req *, req,
+            cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
+            LINK3res *, resp);
 
         if (vp != NULL)
                 VN_RELE(vp);
         if (dvp != NULL)
                 VN_RELE(dvp);

@@ -3057,43 +3135,18 @@
 }
 
 void *
 rfs3_link_getfh(LINK3args *args)
 {
-
         return (&args->file);
 }
 
-/*
- * This macro defines the size of a response which contains attribute
- * information and one directory entry (whose length is specified by
- * the macro parameter).  If the incoming request is larger than this,
- * then we are guaranteed to be able to return at one directory entry
- * if one exists.  Therefore, we do not need to check for
- * NFS3ERR_TOOSMALL if the requested size is larger then this.  If it
- * is not, then we need to check to make sure that this error does not
- * need to be returned.
- *
- * NFS3_READDIR_MIN_COUNT is comprised of following :
- *
- * status - 1 * BYTES_PER_XDR_UNIT
- * attr. flag - 1 * BYTES_PER_XDR_UNIT
- * cookie verifier - 2 * BYTES_PER_XDR_UNIT
- * attributes  - NFS3_SIZEOF_FATTR3 * BYTES_PER_XDR_UNIT
- * boolean - 1 * BYTES_PER_XDR_UNIT
- * file id - 2 * BYTES_PER_XDR_UNIT
- * directory name length - 1 * BYTES_PER_XDR_UNIT
- * cookie - 2 * BYTES_PER_XDR_UNIT
- * end of list - 1 * BYTES_PER_XDR_UNIT
- * end of file - 1 * BYTES_PER_XDR_UNIT
- * Name length of directory to the nearest byte
- */
+#ifdef nextdp
+#undef nextdp
+#endif
+#define nextdp(dp)      ((struct dirent64 *)((char *)(dp) + (dp)->d_reclen))
 
-#define NFS3_READDIR_MIN_COUNT(length)  \
-        ((1 + 1 + 2 + NFS3_SIZEOF_FATTR3 + 1 + 2 + 1 + 2 + 1 + 1) * \
-                BYTES_PER_XDR_UNIT + roundup((length), BYTES_PER_XDR_UNIT))
-
 /* ARGSUSED */
 void
 rfs3_readdir(READDIR3args *args, READDIR3res *resp, struct exportinfo *exi,
     struct svc_req *req, cred_t *cr, bool_t ro)
 {

@@ -3101,29 +3154,41 @@
         vnode_t *vp;
         struct vattr *vap;
         struct vattr va;
         struct iovec iov;
         struct uio uio;
-        char *data;
         int iseof;
-        int bufsize;
-        int namlen;
-        uint_t count;
-        struct sockaddr *ca;
 
-        vap = NULL;
+        count3 count = args->count;
+        count3 size;            /* size of the READDIR3resok structure */
 
+        size_t datasz;
+        char *data = NULL;
+        dirent64_t *dp;
+
+        struct sockaddr *ca;
+        entry3 **eptr;
+        entry3 *entry;
+
         vp = nfs3_fhtovp(&args->dir, exi);
 
-        DTRACE_NFSV3_4(op__readdir__start, struct svc_req *, req,
-            cred_t *, cr, vnode_t *, vp, READDIR3args *, args);
+        DTRACE_NFSV3_5(op__readdir__start, struct svc_req *, req,
+            cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
+            READDIR3args *, args);
 
         if (vp == NULL) {
-                error = ESTALE;
-                goto out;
+                resp->status = NFS3ERR_STALE;
+                vap = NULL;
+                goto out1;
         }
 
+        if (vp->v_type != VDIR) {
+                resp->status = NFS3ERR_NOTDIR;
+                vap = NULL;
+                goto out1;
+        }
+
         if (is_system_labeled()) {
                 bslabel_t *clabel = req->rq_label;
 
                 ASSERT(clabel != NULL);
                 DTRACE_PROBE2(tx__rfs3__log__info__opreaddir__clabel, char *,

@@ -3131,10 +3196,11 @@
 
                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
                         if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
                             exi)) {
                                 resp->status = NFS3ERR_ACCES;
+                                vap = NULL;
                                 goto out1;
                         }
                 }
         }
 

@@ -3141,123 +3207,152 @@
         (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL);
 
         va.va_mask = AT_ALL;
         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
 
-        if (vp->v_type != VDIR) {
-                resp->status = NFS3ERR_NOTDIR;
-                goto out1;
-        }
-
         error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
         if (error)
                 goto out;
 
         /*
-         * Now don't allow arbitrary count to alloc;
-         * allow the maximum not to exceed rfs3_tsize()
+         * Don't allow arbitrary counts for allocation
          */
-        if (args->count > rfs3_tsize(req))
-                args->count = rfs3_tsize(req);
+        if (count > rfs3_tsize(req))
+                count = rfs3_tsize(req);
 
         /*
+         * struct READDIR3resok:
+         *   dir_attributes:    1 + NFS3_SIZEOF_FATTR3
+         *   cookieverf:        2
+         *   entries (bool):    1
+         *   eof:               1
+         */
+        size = (1 + NFS3_SIZEOF_FATTR3 + 2 + 1 + 1) * BYTES_PER_XDR_UNIT;
+
+        if (size > count) {
+                resp->status = NFS3ERR_TOOSMALL;
+                goto out1;
+        }
+
+        /*
+         * This is simplification.  The dirent64_t size is not the same as the
+         * size of XDR representation of entry3, but the sizes are similar so
+         * we'll assume they are same.  This assumption should not cause any
+         * harm.  In worst case we will need to issue VOP_READDIR() once more.
+         */
+        datasz = count;
+
+        /*
          * Make sure that there is room to read at least one entry
          * if any are available.
          */
-        if (args->count < DIRENT64_RECLEN(MAXNAMELEN))
-                count = DIRENT64_RECLEN(MAXNAMELEN);
-        else
-                count = args->count;
+        if (datasz < DIRENT64_RECLEN(MAXNAMELEN))
+                datasz = DIRENT64_RECLEN(MAXNAMELEN);
 
-        data = kmem_alloc(count, KM_SLEEP);
+        data = kmem_alloc(datasz, KM_NOSLEEP);
+        if (data == NULL) {
+                /* The allocation failed; downsize and wait for it this time */
+                if (datasz > MAXBSIZE)
+                        datasz = MAXBSIZE;
+                data = kmem_alloc(datasz, KM_SLEEP);
+        }
 
-        iov.iov_base = data;
-        iov.iov_len = count;
         uio.uio_iov = &iov;
         uio.uio_iovcnt = 1;
         uio.uio_segflg = UIO_SYSSPACE;
         uio.uio_extflg = UIO_COPY_CACHED;
         uio.uio_loffset = (offset_t)args->cookie;
-        uio.uio_resid = count;
+        uio.uio_resid = datasz;
 
-        error = VOP_READDIR(vp, &uio, cr, &iseof, NULL, 0);
+        ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
+        eptr = &resp->resok.reply.entries;
+        entry = NULL;
 
-        va.va_mask = AT_ALL;
-        vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
+getmoredents:
+        iov.iov_base = data;
+        iov.iov_len = datasz;
 
+        error = VOP_READDIR(vp, &uio, cr, &iseof, NULL, 0);
         if (error) {
-                kmem_free(data, count);
-                goto out;
+                iseof = 0;
+                goto done;
         }
 
-        /*
-         * If the count was not large enough to be able to guarantee
-         * to be able to return at least one entry, then need to
-         * check to see if NFS3ERR_TOOSMALL should be returned.
-         */
-        if (args->count < NFS3_READDIR_MIN_COUNT(MAXNAMELEN)) {
-                /*
-                 * bufsize is used to keep track of the size of the response.
-                 * It is primed with:
-                 *      1 for the status +
-                 *      1 for the dir_attributes.attributes boolean +
-                 *      2 for the cookie verifier
-                 * all times BYTES_PER_XDR_UNIT to convert from XDR units
-                 * to bytes.  If there are directory attributes to be
-                 * returned, then:
-                 *      NFS3_SIZEOF_FATTR3 for the dir_attributes.attr fattr3
-                 * time BYTES_PER_XDR_UNIT is added to account for them.
-                 */
-                bufsize = (1 + 1 + 2) * BYTES_PER_XDR_UNIT;
-                if (vap != NULL)
-                        bufsize += NFS3_SIZEOF_FATTR3 * BYTES_PER_XDR_UNIT;
-                /*
-                 * An entry is composed of:
-                 *      1 for the true/false list indicator +
-                 *      2 for the fileid +
-                 *      1 for the length of the name +
-                 *      2 for the cookie +
-                 * all times BYTES_PER_XDR_UNIT to convert from
-                 * XDR units to bytes, plus the length of the name
-                 * rounded up to the nearest BYTES_PER_XDR_UNIT.
-                 */
-                if (count != uio.uio_resid) {
-                        namlen = strlen(((struct dirent64 *)data)->d_name);
-                        bufsize += (1 + 2 + 1 + 2) * BYTES_PER_XDR_UNIT +
-                            roundup(namlen, BYTES_PER_XDR_UNIT);
+        if (iov.iov_len == datasz)
+                goto done;
+
+        for (dp = (dirent64_t *)data; (char *)dp - data < datasz - iov.iov_len;
+            dp = nextdp(dp)) {
+                char *name;
+                count3 esize;
+
+                if (dp->d_ino == 0) {
+                        if (entry != NULL)
+                                entry->cookie = (cookie3)dp->d_off;
+                        continue;
                 }
+
+                name = nfscmd_convname(ca, exi, dp->d_name,
+                    NFSCMD_CONV_OUTBOUND, MAXPATHLEN + 1);
+                if (name == NULL) {
+                        if (entry != NULL)
+                                entry->cookie = (cookie3)dp->d_off;
+                        continue;
+                }
+
                 /*
-                 * We need to check to see if the number of bytes left
-                 * to go into the buffer will actually fit into the
-                 * buffer.  This is calculated as the size of this
-                 * entry plus:
-                 *      1 for the true/false list indicator +
-                 *      1 for the eof indicator
-                 * times BYTES_PER_XDR_UNIT to convert from from
-                 * XDR units to bytes.
+                 * struct entry3:
+                 *   fileid:            2
+                 *   name (length):     1
+                 *   name (data):       length (rounded up)
+                 *   cookie:            2
+                 *   nextentry (bool):  1
                  */
-                bufsize += (1 + 1) * BYTES_PER_XDR_UNIT;
-                if (bufsize > args->count) {
-                        kmem_free(data, count);
-                        resp->status = NFS3ERR_TOOSMALL;
-                        goto out1;
+                esize = (2 + 1 + 2 + 1) * BYTES_PER_XDR_UNIT +
+                    RNDUP(strlen(name));
+
+                /* If the new entry does not fit, discard it */
+                if (esize > count - size) {
+                        if (name != dp->d_name)
+                                kmem_free(name, MAXPATHLEN + 1);
+                        iseof = 0;
+                        goto done;
                 }
+
+                entry = kmem_alloc(sizeof (entry3), KM_SLEEP);
+
+                entry->fileid = (fileid3)dp->d_ino;
+                entry->name = strdup(name);
+                if (name != dp->d_name)
+                        kmem_free(name, MAXPATHLEN + 1);
+                entry->cookie = (cookie3)dp->d_off;
+
+                size += esize;
+
+                /* Add the entry to the linked list */
+                *eptr = entry;
+                eptr = &entry->nextentry;
         }
 
-        /*
-         * Have a valid readir buffer for the native character
-         * set. Need to check if a conversion is necessary and
-         * potentially rewrite the whole buffer. Note that if the
-         * conversion expands names enough, the structure may not
-         * fit. In this case, we need to drop entries until if fits
-         * and patch the counts in order that the next readdir will
-         * get the correct entries.
-         */
-        ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
-        data = nfscmd_convdirent(ca, exi, data, count, &resp->status);
+        if (!iseof && size < count) {
+                uio.uio_resid = MIN(datasz, MAXBSIZE);
+                goto getmoredents;
+        }
 
+done:
+        *eptr = NULL;
 
+        va.va_mask = AT_ALL;
+        vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
+
+        if (!iseof && resp->resok.reply.entries == NULL) {
+                if (error)
+                        goto out;
+                resp->status = NFS3ERR_TOOSMALL;
+                goto out1;
+        }
+
         VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
 
 #if 0 /* notyet */
         /*
          * Don't do this.  It causes local disk writes when just

@@ -3269,88 +3364,74 @@
          */
         (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
 #endif
 
         resp->status = NFS3_OK;
-        vattr_to_post_op_attr(vap, &resp->resok.dir_attributes);
         resp->resok.cookieverf = 0;
-        resp->resok.reply.entries = (entry3 *)data;
-        resp->resok.reply.eof = iseof;
-        resp->resok.size = count - uio.uio_resid;
-        resp->resok.count = args->count;
-        resp->resok.freecount = count;
+        resp->resok.reply.eof = iseof ? TRUE : FALSE;
 
-        DTRACE_NFSV3_4(op__readdir__done, struct svc_req *, req,
-            cred_t *, cr, vnode_t *, vp, READDIR3res *, resp);
+        vattr_to_post_op_attr(vap, &resp->resok.dir_attributes);
 
+        DTRACE_NFSV3_5(op__readdir__done, struct svc_req *, req,
+            cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
+            READDIR3res *, resp);
+
         VN_RELE(vp);
 
+        if (data != NULL)
+                kmem_free(data, datasz);
+
         return;
 
 out:
         if (curthread->t_flag & T_WOULDBLOCK) {
                 curthread->t_flag &= ~T_WOULDBLOCK;
                 resp->status = NFS3ERR_JUKEBOX;
         } else
                 resp->status = puterrno3(error);
 out1:
-        DTRACE_NFSV3_4(op__readdir__done, struct svc_req *, req,
-            cred_t *, cr, vnode_t *, vp, READDIR3res *, resp);
+        vattr_to_post_op_attr(vap, &resp->resfail.dir_attributes);
 
+        DTRACE_NFSV3_5(op__readdir__done, struct svc_req *, req,
+            cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
+            READDIR3res *, resp);
+
         if (vp != NULL) {
                 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
                 VN_RELE(vp);
         }
-        vattr_to_post_op_attr(vap, &resp->resfail.dir_attributes);
+
+        if (data != NULL)
+                kmem_free(data, datasz);
 }
 
 void *
 rfs3_readdir_getfh(READDIR3args *args)
 {
-
         return (&args->dir);
 }
 
 void
 rfs3_readdir_free(READDIR3res *resp)
 {
+        if (resp->status == NFS3_OK) {
+                entry3 *entry, *nentry;
 
-        if (resp->status == NFS3_OK)
-                kmem_free(resp->resok.reply.entries, resp->resok.freecount);
+                for (entry = resp->resok.reply.entries; entry != NULL;
+                    entry = nentry) {
+                        nentry = entry->nextentry;
+                        strfree(entry->name);
+                        kmem_free(entry, sizeof (entry3));
+                }
+        }
 }
 
 #ifdef nextdp
 #undef nextdp
 #endif
 #define nextdp(dp)      ((struct dirent64 *)((char *)(dp) + (dp)->d_reclen))
 
-/*
- * This macro computes the size of a response which contains
- * one directory entry including the attributes as well as file handle.
- * If the incoming request is larger than this, then we are guaranteed to be
- * able to return at least one more directory entry if one exists.
- *
- * NFS3_READDIRPLUS_ENTRY is made up of the following:
- *
- * boolean - 1 * BYTES_PER_XDR_UNIT
- * file id - 2 * BYTES_PER_XDR_UNIT
- * directory name length - 1 * BYTES_PER_XDR_UNIT
- * cookie - 2 * BYTES_PER_XDR_UNIT
- * attribute flag - 1 * BYTES_PER_XDR_UNIT
- * attributes - NFS3_SIZEOF_FATTR3 * BYTES_PER_XDR_UNIT
- * status byte for file handle - 1 *  BYTES_PER_XDR_UNIT
- * length of a file handle - 1 * BYTES_PER_XDR_UNIT
- * Maximum length of a file handle (NFS3_MAXFHSIZE)
- * name length of the entry to the nearest bytes
- */
-#define NFS3_READDIRPLUS_ENTRY(namelen) \
-        ((1 + 2 + 1 + 2 + 1 + NFS3_SIZEOF_FATTR3 + 1 + 1) * \
-                BYTES_PER_XDR_UNIT + \
-        NFS3_MAXFHSIZE + roundup(namelen, BYTES_PER_XDR_UNIT))
-
-static int rfs3_readdir_unit = MAXBSIZE;
-
 /* ARGSUSED */
 void
 rfs3_readdirplus(READDIRPLUS3args *args, READDIRPLUS3res *resp,
     struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
 {

@@ -3358,43 +3439,43 @@
         vnode_t *vp;
         struct vattr *vap;
         struct vattr va;
         struct iovec iov;
         struct uio uio;
-        char *data;
         int iseof;
-        struct dirent64 *dp;
-        vnode_t *nvp;
-        struct vattr *nvap;
-        struct vattr nva;
-        entryplus3_info *infop = NULL;
-        int size = 0;
-        int nents = 0;
-        int bufsize = 0;
-        int entrysize = 0;
-        int tofit = 0;
-        int rd_unit = rfs3_readdir_unit;
-        int prev_len;
-        int space_left;
-        int i;
-        uint_t *namlen = NULL;
-        char *ndata = NULL;
-        struct sockaddr *ca;
-        size_t ret;
 
-        vap = NULL;
+        count3 dircount = args->dircount;
+        count3 maxcount = args->maxcount;
+        count3 dirsize = 0;
+        count3 size;            /* size of the READDIRPLUS3resok structure */
 
+        size_t datasz;
+        char *data = NULL;
+        dirent64_t *dp;
+
+        struct sockaddr *ca;
+        entryplus3 **eptr;
+        entryplus3 *entry;
+
         vp = nfs3_fhtovp(&args->dir, exi);
 
-        DTRACE_NFSV3_4(op__readdirplus__start, struct svc_req *, req,
-            cred_t *, cr, vnode_t *, vp, READDIRPLUS3args *, args);
+        DTRACE_NFSV3_5(op__readdirplus__start, struct svc_req *, req,
+            cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
+            READDIRPLUS3args *, args);
 
         if (vp == NULL) {
-                error = ESTALE;
-                goto out;
+                resp->status = NFS3ERR_STALE;
+                vap = NULL;
+                goto out1;
         }
 
+        if (vp->v_type != VDIR) {
+                resp->status = NFS3ERR_NOTDIR;
+                vap = NULL;
+                goto out1;
+        }
+
         if (is_system_labeled()) {
                 bslabel_t *clabel = req->rq_label;
 
                 ASSERT(clabel != NULL);
                 DTRACE_PROBE2(tx__rfs3__log__info__opreaddirplus__clabel,

@@ -3403,10 +3484,11 @@
 
                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
                         if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
                             exi)) {
                                 resp->status = NFS3ERR_ACCES;
+                                vap = NULL;
                                 goto out1;
                         }
                 }
         }
 

@@ -3413,235 +3495,225 @@
         (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL);
 
         va.va_mask = AT_ALL;
         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
 
-        if (vp->v_type != VDIR) {
-                error = ENOTDIR;
-                goto out;
-        }
-
         error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
         if (error)
                 goto out;
 
         /*
          * Don't allow arbitrary counts for allocation
          */
-        if (args->maxcount > rfs3_tsize(req))
-                args->maxcount = rfs3_tsize(req);
+        if (maxcount > rfs3_tsize(req))
+                maxcount = rfs3_tsize(req);
 
         /*
-         * Make sure that there is room to read at least one entry
-         * if any are available
+         * struct READDIRPLUS3resok:
+         *   dir_attributes:    1 + NFS3_SIZEOF_FATTR3
+         *   cookieverf:        2
+         *   entries (bool):    1
+         *   eof:               1
          */
-        args->dircount = MIN(args->dircount, args->maxcount);
+        size = (1 + NFS3_SIZEOF_FATTR3 + 2 + 1 + 1) * BYTES_PER_XDR_UNIT;
 
-        if (args->dircount < DIRENT64_RECLEN(MAXNAMELEN))
-                args->dircount = DIRENT64_RECLEN(MAXNAMELEN);
+        if (size > maxcount) {
+                resp->status = NFS3ERR_TOOSMALL;
+                goto out1;
+        }
 
         /*
-         * This allocation relies on a minimum directory entry
-         * being roughly 24 bytes.  Therefore, the namlen array
-         * will have enough space based on the maximum number of
-         * entries to read.
+         * This is simplification.  The dirent64_t size is not the same as the
+         * size of XDR representation of entryplus3 (excluding attributes and
+         * handle), but the sizes are similar so we'll assume they are same.
+         * This assumption should not cause any harm.  In worst case we will
+         * need to issue VOP_READDIR() once more.
          */
-        namlen = kmem_alloc(args->dircount, KM_SLEEP);
 
-        space_left = args->dircount;
-        data = kmem_alloc(args->dircount, KM_SLEEP);
-        dp = (struct dirent64 *)data;
+        datasz = MIN(dircount, maxcount);
+
+        /*
+         * Make sure that there is room to read at least one entry
+         * if any are available.
+         */
+        if (datasz < DIRENT64_RECLEN(MAXNAMELEN))
+                datasz = DIRENT64_RECLEN(MAXNAMELEN);
+
+        data = kmem_alloc(datasz, KM_NOSLEEP);
+        if (data == NULL) {
+                /* The allocation failed; downsize and wait for it this time */
+                if (datasz > MAXBSIZE)
+                        datasz = MAXBSIZE;
+                data = kmem_alloc(datasz, KM_SLEEP);
+        }
+
         uio.uio_iov = &iov;
         uio.uio_iovcnt = 1;
         uio.uio_segflg = UIO_SYSSPACE;
         uio.uio_extflg = UIO_COPY_CACHED;
         uio.uio_loffset = (offset_t)args->cookie;
+        uio.uio_resid = datasz;
 
-        /*
-         * bufsize is used to keep track of the size of the response as we
-         * get post op attributes and filehandles for each entry.  This is
-         * an optimization as the server may have read more entries than will
-         * fit in the buffer specified by maxcount.  We stop calculating
-         * post op attributes and filehandles once we have exceeded maxcount.
-         * This will minimize the effect of truncation.
-         *
-         * It is primed with:
-         *      1 for the status +
-         *      1 for the dir_attributes.attributes boolean +
-         *      2 for the cookie verifier
-         * all times BYTES_PER_XDR_UNIT to convert from XDR units
-         * to bytes.  If there are directory attributes to be
-         * returned, then:
-         *      NFS3_SIZEOF_FATTR3 for the dir_attributes.attr fattr3
-         * time BYTES_PER_XDR_UNIT is added to account for them.
-         */
-        bufsize = (1 + 1 + 2) * BYTES_PER_XDR_UNIT;
-        if (vap != NULL)
-                bufsize += NFS3_SIZEOF_FATTR3 * BYTES_PER_XDR_UNIT;
+        ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
+        eptr = &resp->resok.reply.entries;
+        entry = NULL;
 
 getmoredents:
-        /*
-         * Here we make a check so that our read unit is not larger than
-         * the space left in the buffer.
-         */
-        rd_unit = MIN(rd_unit, space_left);
-        iov.iov_base = (char *)dp;
-        iov.iov_len = rd_unit;
-        uio.uio_resid = rd_unit;
-        prev_len = rd_unit;
+        iov.iov_base = data;
+        iov.iov_len = datasz;
 
         error = VOP_READDIR(vp, &uio, cr, &iseof, NULL, 0);
-
         if (error) {
-                kmem_free(data, args->dircount);
-                goto out;
+                iseof = 0;
+                goto done;
         }
 
-        if (uio.uio_resid == prev_len && !iseof) {
-                if (nents == 0) {
-                        kmem_free(data, args->dircount);
-                        resp->status = NFS3ERR_TOOSMALL;
-                        goto out1;
-                }
+        if (iov.iov_len == datasz)
+                goto done;
 
-                /*
-                 * We could not get any more entries, so get the attributes
-                 * and filehandle for the entries already obtained.
-                 */
-                goto good;
-        }
+        for (dp = (dirent64_t *)data; (char *)dp - data < datasz - iov.iov_len;
+            dp = nextdp(dp)) {
+                char *name;
+                vnode_t *nvp;
+                count3 edirsize;
+                count3 esize;
 
-        /*
-         * We estimate the size of the response by assuming the
-         * entry exists and attributes and filehandle are also valid
-         */
-        for (size = prev_len - uio.uio_resid;
-            size > 0;
-            size -= dp->d_reclen, dp = nextdp(dp)) {
-
                 if (dp->d_ino == 0) {
-                        nents++;
+                        if (entry != NULL)
+                                entry->cookie = (cookie3)dp->d_off;
                         continue;
                 }
 
-                namlen[nents] = strlen(dp->d_name);
-                entrysize = NFS3_READDIRPLUS_ENTRY(namlen[nents]);
+                name = nfscmd_convname(ca, exi, dp->d_name,
+                    NFSCMD_CONV_OUTBOUND, MAXPATHLEN + 1);
+                if (name == NULL) {
+                        if (entry != NULL)
+                                entry->cookie = (cookie3)dp->d_off;
+                        continue;
+                }
 
                 /*
-                 * We need to check to see if the number of bytes left
-                 * to go into the buffer will actually fit into the
-                 * buffer.  This is calculated as the size of this
-                 * entry plus:
-                 *      1 for the true/false list indicator +
-                 *      1 for the eof indicator
-                 * times BYTES_PER_XDR_UNIT to convert from XDR units
-                 * to bytes.
-                 *
-                 * Also check the dircount limit against the first entry read
-                 *
+                 * struct entryplus3:
+                 *   fileid:            2
+                 *   name (length):     1
+                 *   name (data):       length (rounded up)
+                 *   cookie:            2
                  */
-                tofit = entrysize + (1 + 1) * BYTES_PER_XDR_UNIT;
-                if (bufsize + tofit > args->maxcount) {
-                        /*
-                         * We make a check here to see if this was the
-                         * first entry being measured.  If so, then maxcount
-                         * was too small to begin with and so we need to
-                         * return with NFS3ERR_TOOSMALL.
-                         */
-                        if (nents == 0) {
-                                kmem_free(data, args->dircount);
-                                resp->status = NFS3ERR_TOOSMALL;
-                                goto out1;
-                        }
-                        iseof = FALSE;
-                        goto good;
-                }
-                bufsize += entrysize;
-                nents++;
-        }
+                edirsize = (2 + 1 + 2) * BYTES_PER_XDR_UNIT +
+                    RNDUP(strlen(name));
 
         /*
-         * If there is enough room to fit at least 1 more entry including
-         * post op attributes and filehandle in the buffer AND that we haven't
-         * exceeded dircount then go back and get some more.
+                 * struct entryplus3:
+                 *   attributes_follow: 1
+                 *   handle_follows:    1
+                 *   nextentry (bool):  1
          */
-        if (!iseof &&
-            (args->maxcount - bufsize) >= NFS3_READDIRPLUS_ENTRY(MAXNAMELEN)) {
-                space_left -= (prev_len - uio.uio_resid);
-                if (space_left >= DIRENT64_RECLEN(MAXNAMELEN))
-                        goto getmoredents;
+                esize = edirsize + (1 + 1 + 1) * BYTES_PER_XDR_UNIT;
 
-                /* else, fall through */
+                /* If the new entry does not fit, we are done */
+                if (edirsize > dircount - dirsize || esize > maxcount - size) {
+                        if (name != dp->d_name)
+                                kmem_free(name, MAXPATHLEN + 1);
+                        iseof = 0;
+                        error = 0;
+                        goto done;
         }
-good:
-        va.va_mask = AT_ALL;
-        vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
 
-        VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
+                entry = kmem_alloc(sizeof (entryplus3), KM_SLEEP);
 
-        infop = kmem_alloc(nents * sizeof (struct entryplus3_info), KM_SLEEP);
-        resp->resok.infop = infop;
+                entry->fileid = (fileid3)dp->d_ino;
+                entry->name = strdup(name);
+                if (name != dp->d_name)
+                        kmem_free(name, MAXPATHLEN + 1);
+                entry->cookie = (cookie3)dp->d_off;
 
-        dp = (struct dirent64 *)data;
-        for (i = 0; i < nents; i++) {
-
-                if (dp->d_ino == 0) {
-                        infop[i].attr.attributes = FALSE;
-                        infop[i].fh.handle_follows = FALSE;
-                        dp = nextdp(dp);
-                        continue;
-                }
-
-                infop[i].namelen = namlen[i];
-
                 error = VOP_LOOKUP(vp, dp->d_name, &nvp, NULL, 0, NULL, cr,
                     NULL, NULL, NULL);
                 if (error) {
-                        infop[i].attr.attributes = FALSE;
-                        infop[i].fh.handle_follows = FALSE;
-                        dp = nextdp(dp);
-                        continue;
-                }
+                        entry->name_attributes.attributes = FALSE;
+                        entry->name_handle.handle_follows = FALSE;
+                } else {
+                        struct vattr nva;
+                        struct vattr *nvap;
 
                 nva.va_mask = AT_ALL;
-                nvap = rfs4_delegated_getattr(nvp, &nva, 0, cr) ? NULL : &nva;
+                        nvap = rfs4_delegated_getattr(nvp, &nva, 0, cr) ? NULL :
+                            &nva;
 
                 /* Lie about the object type for a referral */
-                if (vn_is_nfs_reparse(nvp, cr))
+                        if (nvap != NULL && vn_is_nfs_reparse(nvp, cr))
                         nvap->va_type = VLNK;
 
-                vattr_to_post_op_attr(nvap, &infop[i].attr);
+                        if (vn_ismntpt(nvp)) {
+                                entry->name_attributes.attributes = FALSE;
+                                entry->name_handle.handle_follows = FALSE;
+                        } else {
+                                vattr_to_post_op_attr(nvap,
+                                    &entry->name_attributes);
 
-                error = makefh3(&infop[i].fh.handle, nvp, exi);
+                                error = makefh3(&entry->name_handle.handle, nvp,
+                                    exi);
                 if (!error)
-                        infop[i].fh.handle_follows = TRUE;
+                                        entry->name_handle.handle_follows =
+                                            TRUE;
                 else
-                        infop[i].fh.handle_follows = FALSE;
+                                        entry->name_handle.handle_follows =
+                                            FALSE;
+                        }
 
                 VN_RELE(nvp);
-                dp = nextdp(dp);
         }
 
-        ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
-        ret = nfscmd_convdirplus(ca, exi, data, nents, args->dircount, &ndata);
-        if (ndata == NULL)
-                ndata = data;
-
-        if (ret > 0) {
                 /*
-                 * We had to drop one or more entries in order to fit
-                 * during the character conversion.  We need to patch
-                 * up the size and eof info.
+                 * struct entryplus3 (optionally):
+                 *   attributes:        NFS3_SIZEOF_FATTR3
+                 *   handle length:     1
+                 *   handle data:       length (rounded up)
                  */
-                if (iseof)
-                        iseof = FALSE;
+                if (entry->name_attributes.attributes == TRUE)
+                        esize += NFS3_SIZEOF_FATTR3 * BYTES_PER_XDR_UNIT;
+                if (entry->name_handle.handle_follows == TRUE)
+                        esize += 1 * BYTES_PER_XDR_UNIT +
+                            RNDUP(entry->name_handle.handle.fh3_length);
 
-                ret = nfscmd_dropped_entrysize((struct dirent64 *)data,
-                    nents, ret);
+                /* If the new entry does not fit, discard it */
+                if (esize > maxcount - size) {
+                        strfree(entry->name);
+                        kmem_free(entry, sizeof (entryplus3));
+                        iseof = 0;
+                        error = 0;
+                        goto done;
         }
 
+                dirsize += edirsize;
+                size += esize;
 
+                /* Add the entry to the linked list */
+                *eptr = entry;
+                eptr = &entry->nextentry;
+        }
+
+        if (!iseof && dirsize < dircount && size < maxcount) {
+                uio.uio_resid = MIN(datasz, MAXBSIZE);
+                goto getmoredents;
+        }
+
+done:
+        *eptr = NULL;
+
+        va.va_mask = AT_ALL;
+        vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
+
+        if (!iseof && resp->resok.reply.entries == NULL) {
+                if (error)
+                        goto out;
+                resp->status = NFS3ERR_TOOSMALL;
+                goto out1;
+        }
+
+        VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
+
 #if 0 /* notyet */
         /*
          * Don't do this.  It causes local disk writes when just
          * reading the file and the overhead is deemed larger
          * than the benefit.

@@ -3650,29 +3722,25 @@
          * Force modified metadata out to stable storage.
          */
         (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
 #endif
 
-        kmem_free(namlen, args->dircount);
-
         resp->status = NFS3_OK;
-        vattr_to_post_op_attr(vap, &resp->resok.dir_attributes);
         resp->resok.cookieverf = 0;
-        resp->resok.reply.entries = (entryplus3 *)ndata;
-        resp->resok.reply.eof = iseof;
-        resp->resok.size = nents;
-        resp->resok.count = args->dircount - ret;
-        resp->resok.maxcount = args->maxcount;
+        resp->resok.reply.eof = iseof ? TRUE : FALSE;
 
-        DTRACE_NFSV3_4(op__readdirplus__done, struct svc_req *, req,
-            cred_t *, cr, vnode_t *, vp, READDIRPLUS3res *, resp);
-        if (ndata != data)
-                kmem_free(data, args->dircount);
+        vattr_to_post_op_attr(vap, &resp->resok.dir_attributes);
 
+        DTRACE_NFSV3_5(op__readdirplus__done, struct svc_req *, req,
+            cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
+            READDIRPLUS3res *, resp);
 
         VN_RELE(vp);
 
+        if (data != NULL)
+                kmem_free(data, datasz);
+
         return;
 
 out:
         if (curthread->t_flag & T_WOULDBLOCK) {
                 curthread->t_flag &= ~T_WOULDBLOCK;

@@ -3679,40 +3747,44 @@
                 resp->status = NFS3ERR_JUKEBOX;
         } else {
                 resp->status = puterrno3(error);
         }
 out1:
-        DTRACE_NFSV3_4(op__readdirplus__done, struct svc_req *, req,
-            cred_t *, cr, vnode_t *, vp, READDIRPLUS3res *, resp);
+        vattr_to_post_op_attr(vap, &resp->resfail.dir_attributes);
 
+        DTRACE_NFSV3_5(op__readdirplus__done, struct svc_req *, req,
+            cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
+            READDIRPLUS3res *, resp);
+
         if (vp != NULL) {
                 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
                 VN_RELE(vp);
         }
 
-        if (namlen != NULL)
-                kmem_free(namlen, args->dircount);
-
-        vattr_to_post_op_attr(vap, &resp->resfail.dir_attributes);
+        if (data != NULL)
+                kmem_free(data, datasz);
 }
 
 void *
 rfs3_readdirplus_getfh(READDIRPLUS3args *args)
 {
-
         return (&args->dir);
 }
 
 void
 rfs3_readdirplus_free(READDIRPLUS3res *resp)
 {
-
         if (resp->status == NFS3_OK) {
-                kmem_free(resp->resok.reply.entries, resp->resok.count);
-                kmem_free(resp->resok.infop,
-                    resp->resok.size * sizeof (struct entryplus3_info));
+                entryplus3 *entry, *nentry;
+
+                for (entry = resp->resok.reply.entries; entry != NULL;
+                    entry = nentry) {
+                        nentry = entry->nextentry;
+                        strfree(entry->name);
+                        kmem_free(entry, sizeof (entryplus3));
         }
+        }
 }
 
 /* ARGSUSED */
 void
 rfs3_fsstat(FSSTAT3args *args, FSSTAT3res *resp, struct exportinfo *exi,

@@ -3726,12 +3798,13 @@
 
         vap = NULL;
 
         vp = nfs3_fhtovp(&args->fsroot, exi);
 
-        DTRACE_NFSV3_4(op__fsstat__start, struct svc_req *, req,
-            cred_t *, cr, vnode_t *, vp, FSSTAT3args *, args);
+        DTRACE_NFSV3_5(op__fsstat__start, struct svc_req *, req,
+            cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
+            FSSTAT3args *, args);
 
         if (vp == NULL) {
                 error = ESTALE;
                 goto out;
         }

@@ -3777,12 +3850,13 @@
         resp->resok.tfiles = (size3)sb.f_files;
         resp->resok.ffiles = (size3)sb.f_ffree;
         resp->resok.afiles = (size3)sb.f_favail;
         resp->resok.invarsec = 0;
 
-        DTRACE_NFSV3_4(op__fsstat__done, struct svc_req *, req,
-            cred_t *, cr, vnode_t *, vp, FSSTAT3res *, resp);
+        DTRACE_NFSV3_5(op__fsstat__done, struct svc_req *, req,
+            cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
+            FSSTAT3res *, resp);
         VN_RELE(vp);
 
         return;
 
 out:

@@ -3790,22 +3864,22 @@
                 curthread->t_flag &= ~T_WOULDBLOCK;
                 resp->status = NFS3ERR_JUKEBOX;
         } else
                 resp->status = puterrno3(error);
 out1:
-        DTRACE_NFSV3_4(op__fsstat__done, struct svc_req *, req,
-            cred_t *, cr, vnode_t *, vp, FSSTAT3res *, resp);
+        DTRACE_NFSV3_5(op__fsstat__done, struct svc_req *, req,
+            cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
+            FSSTAT3res *, resp);
 
         if (vp != NULL)
                 VN_RELE(vp);
         vattr_to_post_op_attr(vap, &resp->resfail.obj_attributes);
 }
 
 void *
 rfs3_fsstat_getfh(FSSTAT3args *args)
 {
-
         return (&args->fsroot);
 }
 
 /* ARGSUSED */
 void

@@ -3819,12 +3893,13 @@
         ulong_t l = 0;
         int error;
 
         vp = nfs3_fhtovp(&args->fsroot, exi);
 
-        DTRACE_NFSV3_4(op__fsinfo__start, struct svc_req *, req,
-            cred_t *, cr, vnode_t *, vp, FSINFO3args *, args);
+        DTRACE_NFSV3_5(op__fsinfo__start, struct svc_req *, req,
+            cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
+            FSINFO3args *, args);
 
         if (vp == NULL) {
                 if (curthread->t_flag & T_WOULDBLOCK) {
                         curthread->t_flag &= ~T_WOULDBLOCK;
                         resp->status = NFS3ERR_JUKEBOX;

@@ -3894,20 +3969,22 @@
         resp->resok.time_delta.seconds = 0;
         resp->resok.time_delta.nseconds = 1000;
         resp->resok.properties = FSF3_LINK | FSF3_SYMLINK |
             FSF3_HOMOGENEOUS | FSF3_CANSETTIME;
 
-        DTRACE_NFSV3_4(op__fsinfo__done, struct svc_req *, req,
-            cred_t *, cr, vnode_t *, vp, FSINFO3res *, resp);
+        DTRACE_NFSV3_5(op__fsinfo__done, struct svc_req *, req,
+            cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
+            FSINFO3res *, resp);
 
         VN_RELE(vp);
 
         return;
 
 out:
-        DTRACE_NFSV3_4(op__fsinfo__done, struct svc_req *, req,
-            cred_t *, cr, vnode_t *, NULL, FSINFO3res *, resp);
+        DTRACE_NFSV3_5(op__fsinfo__done, struct svc_req *, req,
+            cred_t *, cr, vnode_t *, NULL, struct exportinfo *, exi,
+            FSINFO3res *, resp);
         if (vp != NULL)
                 VN_RELE(vp);
 }
 
 void *

@@ -3929,12 +4006,13 @@
 
         vap = NULL;
 
         vp = nfs3_fhtovp(&args->object, exi);
 
-        DTRACE_NFSV3_4(op__pathconf__start, struct svc_req *, req,
-            cred_t *, cr, vnode_t *, vp, PATHCONF3args *, args);
+        DTRACE_NFSV3_5(op__pathconf__start, struct svc_req *, req,
+            cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
+            PATHCONF3args *, args);
 
         if (vp == NULL) {
                 error = ESTALE;
                 goto out;
         }

@@ -3986,12 +4064,13 @@
 
         resp->status = NFS3_OK;
         vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
         resp->resok.info.case_insensitive = FALSE;
         resp->resok.info.case_preserving = TRUE;
-        DTRACE_NFSV3_4(op__pathconf__done, struct svc_req *, req,
-            cred_t *, cr, vnode_t *, vp, PATHCONF3res *, resp);
+        DTRACE_NFSV3_5(op__pathconf__done, struct svc_req *, req,
+            cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
+            PATHCONF3res *, resp);
         VN_RELE(vp);
         return;
 
 out:
         if (curthread->t_flag & T_WOULDBLOCK) {

@@ -3998,28 +4077,29 @@
                 curthread->t_flag &= ~T_WOULDBLOCK;
                 resp->status = NFS3ERR_JUKEBOX;
         } else
                 resp->status = puterrno3(error);
 out1:
-        DTRACE_NFSV3_4(op__pathconf__done, struct svc_req *, req,
-            cred_t *, cr, vnode_t *, vp, PATHCONF3res *, resp);
+        DTRACE_NFSV3_5(op__pathconf__done, struct svc_req *, req,
+            cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
+            PATHCONF3res *, resp);
         if (vp != NULL)
                 VN_RELE(vp);
         vattr_to_post_op_attr(vap, &resp->resfail.obj_attributes);
 }
 
 void *
 rfs3_pathconf_getfh(PATHCONF3args *args)
 {
-
         return (&args->object);
 }
 
 void
 rfs3_commit(COMMIT3args *args, COMMIT3res *resp, struct exportinfo *exi,
     struct svc_req *req, cred_t *cr, bool_t ro)
 {
+        nfs3_srv_t *ns;
         int error;
         vnode_t *vp;
         struct vattr *bvap;
         struct vattr bva;
         struct vattr *avap;

@@ -4028,18 +4108,20 @@
         bvap = NULL;
         avap = NULL;
 
         vp = nfs3_fhtovp(&args->file, exi);
 
-        DTRACE_NFSV3_4(op__commit__start, struct svc_req *, req,
-            cred_t *, cr, vnode_t *, vp, COMMIT3args *, args);
+        DTRACE_NFSV3_5(op__commit__start, struct svc_req *, req,
+            cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
+            COMMIT3args *, args);
 
         if (vp == NULL) {
                 error = ESTALE;
                 goto out;
         }
 
+        ns = zone_getspecific(rfs3_zone_key, curzone);
         bva.va_mask = AT_ALL;
         error = VOP_GETATTR(vp, &bva, 0, cr, NULL);
 
         /*
          * If we can't get the attributes, then we can't do the

@@ -4088,14 +4170,15 @@
         if (error)
                 goto out;
 
         resp->status = NFS3_OK;
         vattr_to_wcc_data(bvap, avap, &resp->resok.file_wcc);
-        resp->resok.verf = write3verf;
+        resp->resok.verf = ns->write3verf;
 
-        DTRACE_NFSV3_4(op__commit__done, struct svc_req *, req,
-            cred_t *, cr, vnode_t *, vp, COMMIT3res *, resp);
+        DTRACE_NFSV3_5(op__commit__done, struct svc_req *, req,
+            cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
+            COMMIT3res *, resp);
 
         VN_RELE(vp);
 
         return;
 

@@ -4104,22 +4187,22 @@
                 curthread->t_flag &= ~T_WOULDBLOCK;
                 resp->status = NFS3ERR_JUKEBOX;
         } else
                 resp->status = puterrno3(error);
 out1:
-        DTRACE_NFSV3_4(op__commit__done, struct svc_req *, req,
-            cred_t *, cr, vnode_t *, vp, COMMIT3res *, resp);
+        DTRACE_NFSV3_5(op__commit__done, struct svc_req *, req,
+            cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
+            COMMIT3res *, resp);
 
         if (vp != NULL)
                 VN_RELE(vp);
         vattr_to_wcc_data(bvap, avap, &resp->resfail.file_wcc);
 }
 
 void *
 rfs3_commit_getfh(COMMIT3args *args)
 {
-
         return (&args->file);
 }
 
 static int
 sattr3_to_vattr(sattr3 *sap, struct vattr *vap)

@@ -4183,11 +4266,11 @@
         }
 
         return (0);
 }
 
-static ftype3 vt_to_nf3[] = {
+static const ftype3 vt_to_nf3[] = {
         0, NF3REG, NF3DIR, NF3BLK, NF3CHR, NF3LNK, NF3FIFO, 0, 0, NF3SOCK, 0
 };
 
 static int
 vattr_to_fattr3(struct vattr *vap, fattr3 *fap)

@@ -4265,24 +4348,44 @@
 }
 
 static void
 vattr_to_wcc_data(struct vattr *bvap, struct vattr *avap, wcc_data *wccp)
 {
-
         vattr_to_pre_op_attr(bvap, &wccp->before);
         vattr_to_post_op_attr(avap, &wccp->after);
 }
 
-void
-rfs3_srvrinit(void)
+static int
+rdma_setup_read_data3(READ3args *args, READ3resok *rok)
 {
+        struct clist    *wcl;
+        int             wlist_len;
+        count3          count = rok->count;
+
+        wcl = args->wlist;
+        if (rdma_setup_read_chunks(wcl, count, &wlist_len) == FALSE)
+                return (FALSE);
+
+        wcl = args->wlist;
+        rok->wlist_len = wlist_len;
+        rok->wlist = wcl;
+        return (TRUE);
+}
+
+/* ARGSUSED */
+static void *
+rfs3_zone_init(zoneid_t zoneid)
+{
+        nfs3_srv_t *ns;
         struct rfs3_verf_overlay {
                 uint_t id; /* a "unique" identifier */
                 int ts; /* a unique timestamp */
         } *verfp;
         timestruc_t now;
 
+        ns = kmem_zalloc(sizeof (*ns), KM_SLEEP);
+
         /*
          * The following algorithm attempts to find a unique verifier
          * to be used as the write verifier returned from the server
          * to the client.  It is important that this verifier change
          * whenever the server reboots.  Of secondary importance, it

@@ -4302,41 +4405,38 @@
 #ifndef lint
         /*
          * We ASSERT that this constant logic expression is
          * always true because in the past, it wasn't.
          */
-        ASSERT(sizeof (*verfp) <= sizeof (write3verf));
+        ASSERT(sizeof (*verfp) <= sizeof (ns->write3verf));
 #endif
 
         gethrestime(&now);
-        verfp = (struct rfs3_verf_overlay *)&write3verf;
+        verfp = (struct rfs3_verf_overlay *)&ns->write3verf;
         verfp->ts = (int)now.tv_sec;
         verfp->id = zone_get_hostid(NULL);
 
         if (verfp->id == 0)
                 verfp->id = (uint_t)now.tv_nsec;
 
-        nfs3_srv_caller_id = fs_new_caller_id();
-
+        return (ns);
 }
 
-static int
-rdma_setup_read_data3(READ3args *args, READ3resok *rok)
+/* ARGSUSED */
+static void
+rfs3_zone_fini(zoneid_t zoneid, void *data)
 {
-        struct clist    *wcl;
-        int             wlist_len;
-        count3          count = rok->count;
+        nfs3_srv_t *ns = data;
 
-        wcl = args->wlist;
-        if (rdma_setup_read_chunks(wcl, count, &wlist_len) == FALSE) {
-                return (FALSE);
-        }
+        kmem_free(ns, sizeof (*ns));
+}
 
-        wcl = args->wlist;
-        rok->wlist_len = wlist_len;
-        rok->wlist = wcl;
-        return (TRUE);
+void
+rfs3_srvrinit(void)
+{
+        nfs3_srv_caller_id = fs_new_caller_id();
+        zone_key_create(&rfs3_zone_key, rfs3_zone_init, NULL, rfs3_zone_fini);
 }
 
 void
 rfs3_srvrfini(void)
 {