Print this page
11083 support NFS server in zone
Portions contributed by: Dan Kruchinin <dan.kruchinin@nexenta.com>
Portions contributed by: Stepan Zastupov <stepan.zastupov@gmail.com>
Portions contributed by: Joyce McIntosh <joyce.mcintosh@nexenta.com>
Portions contributed by: Mike Zeller <mike@mikezeller.net>
Portions contributed by: Dan McDonald <danmcd@joyent.com>
Portions contributed by: Gordon Ross <gordon.w.ross@gmail.com>
Portions contributed by: Vitaliy Gusev <gusev.vitaliy@gmail.com>
Reviewed by: Rick McNeal <rick.mcneal@nexenta.com>
Reviewed by: Rob Gittins <rob.gittins@nexenta.com>
Reviewed by: Sanjay Nadkarni <sanjay.nadkarni@nexenta.com>
Reviewed by: Jason King <jbk@joyent.com>
Reviewed by: C Fraire <cfraire@me.com>
Change-Id: I22f289d357503f9b48a0bc2482cc4328a6d43d16

@@ -18,12 +18,15 @@
  *
  * CDDL HEADER END
  */
 
 /*
- * Copyright 2014 Nexenta Systems, Inc.  All rights reserved.
  * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+ */
+
+/*
+ * Copyright 2018 Nexenta Systems, Inc.
  * Copyright (c) 2015, Joyent, Inc.
  */
 
 #include <sys/systm.h>
 

@@ -137,33 +140,34 @@
  * directories is added to filter lookup and readdir results
  * to only contain dirnames which lead to descendant shares.
  *
  * A visible list has a per-file-system scope.  Any exportinfo
  * struct (real or pseudo) can have a visible list as long as
- * a) its export root is VROOT
+ * a) its export root is VROOT, or is the zone's root for in-zone NFS service
  * b) a descendant of the export root is shared
  */
 struct exportinfo *
-pseudo_exportfs(vnode_t *vp, fid_t *fid, struct exp_visible *vis_head,
-    struct exportdata *exdata)
+pseudo_exportfs(nfs_export_t *ne, vnode_t *vp, fid_t *fid,
+    struct exp_visible *vis_head, struct exportdata *exdata)
 {
         struct exportinfo *exi;
         struct exportdata *kex;
         fsid_t fsid;
         int vpathlen;
         int i;
 
-        ASSERT(RW_WRITE_HELD(&exported_lock));
+        ASSERT(RW_WRITE_HELD(&ne->exported_lock));
 
         fsid = vp->v_vfsp->vfs_fsid;
         exi = kmem_zalloc(sizeof (*exi), KM_SLEEP);
         exi->exi_fsid = fsid;
         exi->exi_fid = *fid;
         exi->exi_vp = vp;
         VN_HOLD(exi->exi_vp);
         exi->exi_visible = vis_head;
         exi->exi_count = 1;
+        exi->exi_zoneid = ne->ne_globals->nfs_zoneid;
         exi->exi_volatile_dev = (vfssw[vp->v_vfsp->vfs_fstype].vsw_flag &
             VSW_VOLATILEDEV) ? 1 : 0;
         mutex_init(&exi->exi_lock, NULL, MUTEX_DEFAULT, NULL);
 
         /*

@@ -203,12 +207,20 @@
         rw_init(&exi->exi_cache_lock, NULL, RW_DEFAULT, NULL);
 
         /*
          * Insert the new entry at the front of the export list
          */
-        export_link(exi);
+        export_link(ne, exi);
 
+        /*
+         * Initialize exi_id and exi_kstats
+         */
+        mutex_enter(&nfs_exi_id_lock);
+        exi->exi_id = exi_id_get_next();
+        avl_add(&exi_id_tree, exi);
+        mutex_exit(&nfs_exi_id_lock);
+
         return (exi);
 }
 
 /*
  * Free a list of visible directories

@@ -279,18 +291,18 @@
  * Removes node from the tree and frees the treenode struct.
  * Does not free structures pointed by tree_exi and tree_vis,
  * they should be already freed.
  */
 static void
-tree_remove_node(treenode_t *node)
+tree_remove_node(nfs_export_t *ne, treenode_t *node)
 {
         treenode_t *parent = node->tree_parent;
         treenode_t *s; /* s for sibling */
 
         if (parent == NULL) {
                 kmem_free(node, sizeof (*node));
-                ns_root = NULL;
+                ne->ns_root = NULL;
                 return;
         }
         /* This node is first child */
         if (parent->tree_child_first == node) {
                 parent->tree_child_first = node->tree_sibling;

@@ -435,10 +447,11 @@
 more_visible(struct exportinfo *exi, treenode_t *tree_head)
 {
         struct exp_visible *vp1, *vp2, *vis_head, *tail, *next;
         int found;
         treenode_t *child, *curr, *connect_point;
+        nfs_export_t *ne = nfs_get_export();
 
         vis_head = tree_head->tree_vis;
         connect_point = exi->exi_tree;
 
         /*

@@ -448,11 +461,11 @@
         if (exi->exi_visible == NULL) {
                 tree_add_child(connect_point, tree_head);
                 exi->exi_visible = vis_head;
 
                 /* Update the change timestamp */
-                tree_update_change(connect_point, &vis_head->vis_change);
+                tree_update_change(ne, connect_point, &vis_head->vis_change);
 
                 return;
         }
 
         /* The outer loop traverses the supplied list. */

@@ -508,11 +521,11 @@
                         connect_point = child;
                 } else { /* Branching */
                         tree_add_child(connect_point, curr);
 
                         /* Update the change timestamp */
-                        tree_update_change(connect_point,
+                        tree_update_change(ne, connect_point,
                             &curr->tree_vis->vis_change);
 
                         connect_point = NULL;
                 }
         }

@@ -625,12 +638,15 @@
         struct exp_visible *visp;
         struct exp_visible *vis_head = NULL;
         struct vattr va;
         treenode_t *tree_head = NULL;
         timespec_t now;
+        nfs_export_t *ne;
 
-        ASSERT(RW_WRITE_HELD(&exported_lock));
+        ne = exip->exi_ne;
+        ASSERT3P(ne, ==, nfs_get_export());     /* curzone reality check */
+        ASSERT(RW_WRITE_HELD(&ne->exported_lock));
 
         gethrestime(&now);
 
         vp = exip->exi_vp;
         VN_HOLD(vp);

@@ -642,15 +658,18 @@
                 fid.fid_len = MAXFIDSZ;
                 error = vop_fid_pseudo(vp, &fid);
                 if (error)
                         break;
 
+                /* XXX KEBE ASKS DO WE NEED THIS?!? */
+                ASSERT3U(exip->exi_zoneid, ==, curzone->zone_id);
                 /*
-                 * The root of the file system needs special handling
+                 * The root of the file system, or the zone's root for
+                 * in-zone NFS service needs special handling
                  */
-                if (vp->v_flag & VROOT) {
-                        if (! exportdir) {
+                if (vp->v_flag & VROOT || vp == EXI_TO_ZONEROOTVP(exip)) {
+                        if (!exportdir) {
                                 struct exportinfo *exi;
 
                                 /*
                                  * Check if this VROOT dir is already exported.
                                  * If so, then attach the pseudonodes.  If not,

@@ -675,36 +694,36 @@
                                  * Found the root directory of a filesystem
                                  * that isn't exported.  Need to export
                                  * this as a pseudo export so that an NFS v4
                                  * client can do lookups in it.
                                  */
-                                new_exi = pseudo_exportfs(vp, &fid, vis_head,
-                                    NULL);
+                                new_exi = pseudo_exportfs(ne, vp, &fid,
+                                    vis_head, NULL);
                                 vis_head = NULL;
                         }
 
-                        if (VN_CMP(vp, rootdir)) {
+                        if (VN_IS_CURZONEROOT(vp)) {
                                 /* at system root */
                                 /*
                                  * If sharing "/", new_exi is shared exportinfo
                                  * (exip). Otherwise, new_exi is exportinfo
                                  * created by pseudo_exportfs() above.
                                  */
-                                ns_root = tree_prepend_node(tree_head, NULL,
+                                ne->ns_root = tree_prepend_node(tree_head, NULL,
                                     new_exi);
 
                                 /* Update the change timestamp */
-                                tree_update_change(ns_root, &now);
+                                tree_update_change(ne, ne->ns_root, &now);
 
                                 break;
                         }
 
                         /*
                          * Traverse across the mountpoint and continue the
                          * climb on the mounted-on filesystem.
                          */
-                        vp = untraverse(vp);
+                        vp = untraverse(vp, ne->exi_root->exi_vp);
                         exportdir = 0;
                         continue;
                 }
 
                 /*

@@ -786,11 +805,14 @@
                 while (tree_head) {
                         treenode_t *t2 = tree_head;
                         exportinfo_t *e  = tree_head->tree_exi;
                         /* exip will be freed in exportfs() */
                         if (e && e != exip) {
-                                export_unlink(e);
+                                mutex_enter(&nfs_exi_id_lock);
+                                avl_remove(&exi_id_tree, e);
+                                mutex_exit(&nfs_exi_id_lock);
+                                export_unlink(ne, e);
                                 exi_rele(e);
                         }
                         tree_head = tree_head->tree_child_first;
                         kmem_free(t2, sizeof (*t2));
                 }

@@ -807,21 +829,34 @@
  *
  * Deleting of nodes will start only if the unshared
  * node was a leaf node.
  * Deleting of nodes will finish when we reach a node which
  * has children or is a real export, then we might still need
- * to continue releasing visibles, until we reach VROOT node.
+ * to continue releasing visibles, until we reach VROOT or zone's root node.
  */
 void
-treeclimb_unexport(struct exportinfo *exip)
+treeclimb_unexport(nfs_export_t *ne, struct exportinfo *exip)
 {
         treenode_t *tnode, *old_nd;
         treenode_t *connect_point = NULL;
 
-        ASSERT(RW_WRITE_HELD(&exported_lock));
+        ASSERT(RW_WRITE_HELD(&ne->exported_lock));
+        ASSERT(curzone->zone_id == exip->exi_zoneid ||
+            curzone->zone_id == global_zone->zone_id);
 
+        /*
+         * exi_tree can be null for the zone root
+         * which means we're already at the "top"
+         * and there's nothing more to "climb".
+         */
         tnode = exip->exi_tree;
+        if (tnode == NULL) {
+                /* Should only happen for... */
+                ASSERT(exip == ne->exi_root);
+                return;
+        }
+
         /*
          * The unshared exportinfo was unlinked in unexport().
          * Zeroing tree_exi ensures that we will skip it.
          */
         tnode->tree_exi = NULL;

@@ -829,20 +864,27 @@
         if (tnode->tree_vis != NULL) /* system root has tree_vis == NULL */
                 tnode->tree_vis->vis_exported = 0;
 
         while (tnode != NULL) {
 
-                /* Stop at VROOT node which is exported or has child */
+                /*
+                 * Stop at VROOT (or zone root) node which is exported or has
+                 * child.
+                 */
                 if (TREE_ROOT(tnode) &&
                     (TREE_EXPORTED(tnode) || tnode->tree_child_first != NULL))
                         break;
 
                 /* Release pseudo export if it has no child */
                 if (TREE_ROOT(tnode) && !TREE_EXPORTED(tnode) &&
                     tnode->tree_child_first == NULL) {
-                        export_unlink(tnode->tree_exi);
+                        mutex_enter(&nfs_exi_id_lock);
+                        avl_remove(&exi_id_tree, tnode->tree_exi);
+                        mutex_exit(&nfs_exi_id_lock);
+                        export_unlink(ne, tnode->tree_exi);
                         exi_rele(tnode->tree_exi);
+                        tnode->tree_exi = NULL;
                 }
 
                 /* Release visible in parent's exportinfo */
                 if (tnode->tree_vis != NULL)
                         less_visible(vis2exi(tnode), tnode->tree_vis);

@@ -852,33 +894,33 @@
                 tnode = tnode->tree_parent;
 
                 /* Remove itself, if this is a leaf and non-exported node */
                 if (old_nd->tree_child_first == NULL &&
                     !TREE_EXPORTED(old_nd)) {
-                        tree_remove_node(old_nd);
+                        tree_remove_node(ne, old_nd);
                         connect_point = tnode;
                 }
         }
 
         /* Update the change timestamp */
         if (connect_point != NULL)
-                tree_update_change(connect_point, NULL);
+                tree_update_change(ne, connect_point, NULL);
 }
 
 /*
  * Traverse backward across mountpoint from the
  * root vnode of a filesystem to its mounted-on
  * vnode.
  */
 vnode_t *
-untraverse(vnode_t *vp)
+untraverse(vnode_t *vp, vnode_t *zone_rootvp)
 {
         vnode_t *tvp, *nextvp;
 
         tvp = vp;
         for (;;) {
-                if (! (tvp->v_flag & VROOT))
+                if (!(tvp->v_flag & VROOT) && !VN_CMP(tvp, zone_rootvp))
                         break;
 
                 /* lock vfs to prevent unmount of this vfs */
                 vfs_lock_wait(tvp->v_vfsp);
 

@@ -905,11 +947,11 @@
         return (tvp);
 }
 
 /*
  * Given an exportinfo, climb up to find the exportinfo for the VROOT
- * of the filesystem.
+ * (or zone root) of the filesystem.
  *
  * e.g.         /
  *              |
  *              a (VROOT) pseudo-exportinfo
  *              |

@@ -922,11 +964,11 @@
  * where c is in the same filesystem as a.
  * So, get_root_export(*exportinfo_for_c) returns exportinfo_for_a
  *
  * If d is shared, then c will be put into a's visible list.
  * Note: visible list is per filesystem and is attached to the
- * VROOT exportinfo.
+ * VROOT exportinfo.  Returned exi does NOT have a new hold.
  */
 struct exportinfo *
 get_root_export(struct exportinfo *exip)
 {
         treenode_t *tnode = exip->exi_tree;

@@ -954,16 +996,19 @@
         bool_t vp_is_exported;
 
         vp_is_exported = VN_CMP(vp, exi->exi_vp);
 
         /*
-         * An exported root vnode has a sub-dir shared if it has a visible list.
-         * i.e. if it does not have a visible list, then there is no node in
-         * this filesystem leads to any other shared node.
+         * An exported root vnode has a sub-dir shared if it has a visible
+         * list.  i.e. if it does not have a visible list, then there is no
+         * node in this filesystem leads to any other shared node.
          */
-        if (vp_is_exported && (vp->v_flag & VROOT))
+        ASSERT3P(curzone->zone_id, ==, exi->exi_zoneid);
+        if (vp_is_exported &&
+            ((vp->v_flag & VROOT) || VN_IS_CURZONEROOT(vp))) {
                 return (exi->exi_visible ? 1 : 0);
+        }
 
         /*
          * Only the exportinfo of a fs root node may have a visible list.
          * Either it is a pseudo root node, or a real exported root node.
          */

@@ -1032,11 +1077,11 @@
 
         /*
          * Only a PSEUDO node has a visible list or an exported VROOT
          * node may have a visible list.
          */
-        if (! PSEUDO(exi))
+        if (!PSEUDO(exi))
                 exi = get_root_export(exi);
 
         /* Get the fid of the vnode */
 
         bzero(&fid, sizeof (fid));

@@ -1140,11 +1185,11 @@
 {
         /*
          * Only a PSEUDO node has a visible list or an exported VROOT
          * node may have a visible list.
          */
-        if (! PSEUDO(exi))
+        if (!PSEUDO(exi))
                 exi = get_root_export(exi);
 
         for (*visp = exi->exi_visible; *visp != NULL; *visp = (*visp)->vis_next)
                 if ((u_longlong_t)ino == (*visp)->vis_ino) {
                         return (1);

@@ -1152,27 +1197,20 @@
 
         return (0);
 }
 
 /*
- * The change attribute value of the root of nfs pseudo namespace.
- *
- * The ns_root_change is protected by exported_lock because all of the treenode
- * operations are protected by exported_lock too.
- */
-static timespec_t ns_root_change;
-
-/*
  * Get the change attribute from visible and returns TRUE.
  * If the change value is not available returns FALSE.
  */
 bool_t
 nfs_visible_change(struct exportinfo *exi, vnode_t *vp, timespec_t *change)
 {
         struct exp_visible *visp;
         fid_t fid;
         treenode_t *node;
+        nfs_export_t *ne = nfs_get_export();
 
         /*
          * First check to see if vp is export root.
          */
         if (VN_CMP(vp, exi->exi_vp))

@@ -1213,18 +1251,17 @@
         return (FALSE);
 
 exproot:
         /* The VROOT export have its visible available through treenode */
         node = exi->exi_tree;
-        if (node != ns_root) {
+        if (node != ne->ns_root) {
                 ASSERT(node->tree_vis != NULL);
                 *change = node->tree_vis->vis_change;
         } else {
                 ASSERT(node->tree_vis == NULL);
-                *change = ns_root_change;
+                *change = ne->ns_root_change;
         }
-
         return (TRUE);
 }
 
 /*
  * Update the change attribute value for a particular treenode.  The change

@@ -1232,19 +1269,19 @@
  * ns_root_change.
  *
  * If the change value is not supplied, the current time is used.
  */
 void
-tree_update_change(treenode_t *tnode, timespec_t *change)
+tree_update_change(nfs_export_t *ne, treenode_t *tnode, timespec_t *change)
 {
         timespec_t *vis_change;
 
         ASSERT(tnode != NULL);
-        ASSERT((tnode != ns_root && tnode->tree_vis != NULL) ||
-            (tnode == ns_root && tnode->tree_vis == NULL));
+        ASSERT((tnode != ne->ns_root && tnode->tree_vis != NULL) ||
+            (tnode == ne->ns_root && tnode->tree_vis == NULL));
 
-        vis_change = tnode == ns_root ? &ns_root_change
+        vis_change = tnode == ne->ns_root ? &ne->ns_root_change
             : &tnode->tree_vis->vis_change;
 
         if (change != NULL)
                 *vis_change = *change;
         else