Print this page
11083 support NFS server in zone
Portions contributed by: Dan Kruchinin <dan.kruchinin@nexenta.com>
Portions contributed by: Stepan Zastupov <stepan.zastupov@gmail.com>
Portions contributed by: Joyce McIntosh <joyce.mcintosh@nexenta.com>
Portions contributed by: Mike Zeller <mike@mikezeller.net>
Portions contributed by: Dan McDonald <danmcd@joyent.com>
Portions contributed by: Gordon Ross <gordon.w.ross@gmail.com>
Portions contributed by: Vitaliy Gusev <gusev.vitaliy@gmail.com>
Reviewed by: Rick McNeal <rick.mcneal@nexenta.com>
Reviewed by: Rob Gittins <rob.gittins@nexenta.com>
Reviewed by: Sanjay Nadkarni <sanjay.nadkarni@nexenta.com>
Reviewed by: Jason King <jbk@joyent.com>
Reviewed by: C Fraire <cfraire@me.com>
Change-Id: I22f289d357503f9b48a0bc2482cc4328a6d43d16
*** 18,29 ****
*
* CDDL HEADER END
*/
/*
- * Copyright 2014 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2015, Joyent, Inc.
*/
#include <sys/systm.h>
--- 18,32 ----
*
* CDDL HEADER END
*/
/*
* Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+ */
+
+ /*
+ * Copyright 2018 Nexenta Systems, Inc.
* Copyright (c) 2015, Joyent, Inc.
*/
#include <sys/systm.h>
*** 137,169 ****
* directories is added to filter lookup and readdir results
* to only contain dirnames which lead to descendant shares.
*
* A visible list has a per-file-system scope. Any exportinfo
* struct (real or pseudo) can have a visible list as long as
! * a) its export root is VROOT
* b) a descendant of the export root is shared
*/
struct exportinfo *
! pseudo_exportfs(vnode_t *vp, fid_t *fid, struct exp_visible *vis_head,
! struct exportdata *exdata)
{
struct exportinfo *exi;
struct exportdata *kex;
fsid_t fsid;
int vpathlen;
int i;
! ASSERT(RW_WRITE_HELD(&exported_lock));
fsid = vp->v_vfsp->vfs_fsid;
exi = kmem_zalloc(sizeof (*exi), KM_SLEEP);
exi->exi_fsid = fsid;
exi->exi_fid = *fid;
exi->exi_vp = vp;
VN_HOLD(exi->exi_vp);
exi->exi_visible = vis_head;
exi->exi_count = 1;
exi->exi_volatile_dev = (vfssw[vp->v_vfsp->vfs_fstype].vsw_flag &
VSW_VOLATILEDEV) ? 1 : 0;
mutex_init(&exi->exi_lock, NULL, MUTEX_DEFAULT, NULL);
/*
--- 140,173 ----
* directories is added to filter lookup and readdir results
* to only contain dirnames which lead to descendant shares.
*
* A visible list has a per-file-system scope. Any exportinfo
* struct (real or pseudo) can have a visible list as long as
! * a) its export root is VROOT, or is the zone's root for in-zone NFS service
* b) a descendant of the export root is shared
*/
struct exportinfo *
! pseudo_exportfs(nfs_export_t *ne, vnode_t *vp, fid_t *fid,
! struct exp_visible *vis_head, struct exportdata *exdata)
{
struct exportinfo *exi;
struct exportdata *kex;
fsid_t fsid;
int vpathlen;
int i;
! ASSERT(RW_WRITE_HELD(&ne->exported_lock));
fsid = vp->v_vfsp->vfs_fsid;
exi = kmem_zalloc(sizeof (*exi), KM_SLEEP);
exi->exi_fsid = fsid;
exi->exi_fid = *fid;
exi->exi_vp = vp;
VN_HOLD(exi->exi_vp);
exi->exi_visible = vis_head;
exi->exi_count = 1;
+ exi->exi_zoneid = ne->ne_globals->nfs_zoneid;
exi->exi_volatile_dev = (vfssw[vp->v_vfsp->vfs_fstype].vsw_flag &
VSW_VOLATILEDEV) ? 1 : 0;
mutex_init(&exi->exi_lock, NULL, MUTEX_DEFAULT, NULL);
/*
*** 203,214 ****
rw_init(&exi->exi_cache_lock, NULL, RW_DEFAULT, NULL);
/*
* Insert the new entry at the front of the export list
*/
! export_link(exi);
return (exi);
}
/*
* Free a list of visible directories
--- 207,226 ----
rw_init(&exi->exi_cache_lock, NULL, RW_DEFAULT, NULL);
/*
* Insert the new entry at the front of the export list
*/
! export_link(ne, exi);
+ /*
+ * Initialize exi_id and exi_kstats
+ */
+ mutex_enter(&nfs_exi_id_lock);
+ exi->exi_id = exi_id_get_next();
+ avl_add(&exi_id_tree, exi);
+ mutex_exit(&nfs_exi_id_lock);
+
return (exi);
}
/*
* Free a list of visible directories
*** 279,296 ****
* Removes node from the tree and frees the treenode struct.
* Does not free structures pointed by tree_exi and tree_vis,
* they should be already freed.
*/
static void
! tree_remove_node(treenode_t *node)
{
treenode_t *parent = node->tree_parent;
treenode_t *s; /* s for sibling */
if (parent == NULL) {
kmem_free(node, sizeof (*node));
! ns_root = NULL;
return;
}
/* This node is first child */
if (parent->tree_child_first == node) {
parent->tree_child_first = node->tree_sibling;
--- 291,308 ----
* Removes node from the tree and frees the treenode struct.
* Does not free structures pointed by tree_exi and tree_vis,
* they should be already freed.
*/
static void
! tree_remove_node(nfs_export_t *ne, treenode_t *node)
{
treenode_t *parent = node->tree_parent;
treenode_t *s; /* s for sibling */
if (parent == NULL) {
kmem_free(node, sizeof (*node));
! ne->ns_root = NULL;
return;
}
/* This node is first child */
if (parent->tree_child_first == node) {
parent->tree_child_first = node->tree_sibling;
*** 435,444 ****
--- 447,457 ----
more_visible(struct exportinfo *exi, treenode_t *tree_head)
{
struct exp_visible *vp1, *vp2, *vis_head, *tail, *next;
int found;
treenode_t *child, *curr, *connect_point;
+ nfs_export_t *ne = nfs_get_export();
vis_head = tree_head->tree_vis;
connect_point = exi->exi_tree;
/*
*** 448,458 ****
if (exi->exi_visible == NULL) {
tree_add_child(connect_point, tree_head);
exi->exi_visible = vis_head;
/* Update the change timestamp */
! tree_update_change(connect_point, &vis_head->vis_change);
return;
}
/* The outer loop traverses the supplied list. */
--- 461,471 ----
if (exi->exi_visible == NULL) {
tree_add_child(connect_point, tree_head);
exi->exi_visible = vis_head;
/* Update the change timestamp */
! tree_update_change(ne, connect_point, &vis_head->vis_change);
return;
}
/* The outer loop traverses the supplied list. */
*** 508,518 ****
connect_point = child;
} else { /* Branching */
tree_add_child(connect_point, curr);
/* Update the change timestamp */
! tree_update_change(connect_point,
&curr->tree_vis->vis_change);
connect_point = NULL;
}
}
--- 521,531 ----
connect_point = child;
} else { /* Branching */
tree_add_child(connect_point, curr);
/* Update the change timestamp */
! tree_update_change(ne, connect_point,
&curr->tree_vis->vis_change);
connect_point = NULL;
}
}
*** 625,636 ****
struct exp_visible *visp;
struct exp_visible *vis_head = NULL;
struct vattr va;
treenode_t *tree_head = NULL;
timespec_t now;
! ASSERT(RW_WRITE_HELD(&exported_lock));
gethrestime(&now);
vp = exip->exi_vp;
VN_HOLD(vp);
--- 638,652 ----
struct exp_visible *visp;
struct exp_visible *vis_head = NULL;
struct vattr va;
treenode_t *tree_head = NULL;
timespec_t now;
+ nfs_export_t *ne;
! ne = exip->exi_ne;
! ASSERT3P(ne, ==, nfs_get_export()); /* curzone reality check */
! ASSERT(RW_WRITE_HELD(&ne->exported_lock));
gethrestime(&now);
vp = exip->exi_vp;
VN_HOLD(vp);
*** 642,656 ****
fid.fid_len = MAXFIDSZ;
error = vop_fid_pseudo(vp, &fid);
if (error)
break;
/*
! * The root of the file system needs special handling
*/
! if (vp->v_flag & VROOT) {
! if (! exportdir) {
struct exportinfo *exi;
/*
* Check if this VROOT dir is already exported.
* If so, then attach the pseudonodes. If not,
--- 658,675 ----
fid.fid_len = MAXFIDSZ;
error = vop_fid_pseudo(vp, &fid);
if (error)
break;
+ /* XXX KEBE ASKS DO WE NEED THIS?!? */
+ ASSERT3U(exip->exi_zoneid, ==, curzone->zone_id);
/*
! * The root of the file system, or the zone's root for
! * in-zone NFS service needs special handling
*/
! if (vp->v_flag & VROOT || vp == EXI_TO_ZONEROOTVP(exip)) {
! if (!exportdir) {
struct exportinfo *exi;
/*
* Check if this VROOT dir is already exported.
* If so, then attach the pseudonodes. If not,
*** 675,710 ****
* Found the root directory of a filesystem
* that isn't exported. Need to export
* this as a pseudo export so that an NFS v4
* client can do lookups in it.
*/
! new_exi = pseudo_exportfs(vp, &fid, vis_head,
! NULL);
vis_head = NULL;
}
! if (VN_CMP(vp, rootdir)) {
/* at system root */
/*
* If sharing "/", new_exi is shared exportinfo
* (exip). Otherwise, new_exi is exportinfo
* created by pseudo_exportfs() above.
*/
! ns_root = tree_prepend_node(tree_head, NULL,
new_exi);
/* Update the change timestamp */
! tree_update_change(ns_root, &now);
break;
}
/*
* Traverse across the mountpoint and continue the
* climb on the mounted-on filesystem.
*/
! vp = untraverse(vp);
exportdir = 0;
continue;
}
/*
--- 694,729 ----
* Found the root directory of a filesystem
* that isn't exported. Need to export
* this as a pseudo export so that an NFS v4
* client can do lookups in it.
*/
! new_exi = pseudo_exportfs(ne, vp, &fid,
! vis_head, NULL);
vis_head = NULL;
}
! if (VN_IS_CURZONEROOT(vp)) {
/* at system root */
/*
* If sharing "/", new_exi is shared exportinfo
* (exip). Otherwise, new_exi is exportinfo
* created by pseudo_exportfs() above.
*/
! ne->ns_root = tree_prepend_node(tree_head, NULL,
new_exi);
/* Update the change timestamp */
! tree_update_change(ne, ne->ns_root, &now);
break;
}
/*
* Traverse across the mountpoint and continue the
* climb on the mounted-on filesystem.
*/
! vp = untraverse(vp, ne->exi_root->exi_vp);
exportdir = 0;
continue;
}
/*
*** 786,796 ****
while (tree_head) {
treenode_t *t2 = tree_head;
exportinfo_t *e = tree_head->tree_exi;
/* exip will be freed in exportfs() */
if (e && e != exip) {
! export_unlink(e);
exi_rele(e);
}
tree_head = tree_head->tree_child_first;
kmem_free(t2, sizeof (*t2));
}
--- 805,818 ----
while (tree_head) {
treenode_t *t2 = tree_head;
exportinfo_t *e = tree_head->tree_exi;
/* exip will be freed in exportfs() */
if (e && e != exip) {
! mutex_enter(&nfs_exi_id_lock);
! avl_remove(&exi_id_tree, e);
! mutex_exit(&nfs_exi_id_lock);
! export_unlink(ne, e);
exi_rele(e);
}
tree_head = tree_head->tree_child_first;
kmem_free(t2, sizeof (*t2));
}
*** 807,827 ****
*
* Deleting of nodes will start only if the unshared
* node was a leaf node.
* Deleting of nodes will finish when we reach a node which
* has children or is a real export, then we might still need
! * to continue releasing visibles, until we reach VROOT node.
*/
void
! treeclimb_unexport(struct exportinfo *exip)
{
treenode_t *tnode, *old_nd;
treenode_t *connect_point = NULL;
! ASSERT(RW_WRITE_HELD(&exported_lock));
tnode = exip->exi_tree;
/*
* The unshared exportinfo was unlinked in unexport().
* Zeroing tree_exi ensures that we will skip it.
*/
tnode->tree_exi = NULL;
--- 829,862 ----
*
* Deleting of nodes will start only if the unshared
* node was a leaf node.
* Deleting of nodes will finish when we reach a node which
* has children or is a real export, then we might still need
! * to continue releasing visibles, until we reach VROOT or zone's root node.
*/
void
! treeclimb_unexport(nfs_export_t *ne, struct exportinfo *exip)
{
treenode_t *tnode, *old_nd;
treenode_t *connect_point = NULL;
! ASSERT(RW_WRITE_HELD(&ne->exported_lock));
! ASSERT(curzone->zone_id == exip->exi_zoneid ||
! curzone->zone_id == global_zone->zone_id);
+ /*
+ * exi_tree can be null for the zone root
+ * which means we're already at the "top"
+ * and there's nothing more to "climb".
+ */
tnode = exip->exi_tree;
+ if (tnode == NULL) {
+ /* Should only happen for... */
+ ASSERT(exip == ne->exi_root);
+ return;
+ }
+
/*
* The unshared exportinfo was unlinked in unexport().
* Zeroing tree_exi ensures that we will skip it.
*/
tnode->tree_exi = NULL;
*** 829,848 ****
if (tnode->tree_vis != NULL) /* system root has tree_vis == NULL */
tnode->tree_vis->vis_exported = 0;
while (tnode != NULL) {
! /* Stop at VROOT node which is exported or has child */
if (TREE_ROOT(tnode) &&
(TREE_EXPORTED(tnode) || tnode->tree_child_first != NULL))
break;
/* Release pseudo export if it has no child */
if (TREE_ROOT(tnode) && !TREE_EXPORTED(tnode) &&
tnode->tree_child_first == NULL) {
! export_unlink(tnode->tree_exi);
exi_rele(tnode->tree_exi);
}
/* Release visible in parent's exportinfo */
if (tnode->tree_vis != NULL)
less_visible(vis2exi(tnode), tnode->tree_vis);
--- 864,890 ----
if (tnode->tree_vis != NULL) /* system root has tree_vis == NULL */
tnode->tree_vis->vis_exported = 0;
while (tnode != NULL) {
! /*
! * Stop at VROOT (or zone root) node which is exported or has
! * child.
! */
if (TREE_ROOT(tnode) &&
(TREE_EXPORTED(tnode) || tnode->tree_child_first != NULL))
break;
/* Release pseudo export if it has no child */
if (TREE_ROOT(tnode) && !TREE_EXPORTED(tnode) &&
tnode->tree_child_first == NULL) {
! mutex_enter(&nfs_exi_id_lock);
! avl_remove(&exi_id_tree, tnode->tree_exi);
! mutex_exit(&nfs_exi_id_lock);
! export_unlink(ne, tnode->tree_exi);
exi_rele(tnode->tree_exi);
+ tnode->tree_exi = NULL;
}
/* Release visible in parent's exportinfo */
if (tnode->tree_vis != NULL)
less_visible(vis2exi(tnode), tnode->tree_vis);
*** 852,884 ****
tnode = tnode->tree_parent;
/* Remove itself, if this is a leaf and non-exported node */
if (old_nd->tree_child_first == NULL &&
!TREE_EXPORTED(old_nd)) {
! tree_remove_node(old_nd);
connect_point = tnode;
}
}
/* Update the change timestamp */
if (connect_point != NULL)
! tree_update_change(connect_point, NULL);
}
/*
* Traverse backward across mountpoint from the
* root vnode of a filesystem to its mounted-on
* vnode.
*/
vnode_t *
! untraverse(vnode_t *vp)
{
vnode_t *tvp, *nextvp;
tvp = vp;
for (;;) {
! if (! (tvp->v_flag & VROOT))
break;
/* lock vfs to prevent unmount of this vfs */
vfs_lock_wait(tvp->v_vfsp);
--- 894,926 ----
tnode = tnode->tree_parent;
/* Remove itself, if this is a leaf and non-exported node */
if (old_nd->tree_child_first == NULL &&
!TREE_EXPORTED(old_nd)) {
! tree_remove_node(ne, old_nd);
connect_point = tnode;
}
}
/* Update the change timestamp */
if (connect_point != NULL)
! tree_update_change(ne, connect_point, NULL);
}
/*
* Traverse backward across mountpoint from the
* root vnode of a filesystem to its mounted-on
* vnode.
*/
vnode_t *
! untraverse(vnode_t *vp, vnode_t *zone_rootvp)
{
vnode_t *tvp, *nextvp;
tvp = vp;
for (;;) {
! if (!(tvp->v_flag & VROOT) && !VN_CMP(tvp, zone_rootvp))
break;
/* lock vfs to prevent unmount of this vfs */
vfs_lock_wait(tvp->v_vfsp);
*** 905,915 ****
return (tvp);
}
/*
* Given an exportinfo, climb up to find the exportinfo for the VROOT
! * of the filesystem.
*
* e.g. /
* |
* a (VROOT) pseudo-exportinfo
* |
--- 947,957 ----
return (tvp);
}
/*
* Given an exportinfo, climb up to find the exportinfo for the VROOT
! * (or zone root) of the filesystem.
*
* e.g. /
* |
* a (VROOT) pseudo-exportinfo
* |
*** 922,932 ****
* where c is in the same filesystem as a.
* So, get_root_export(*exportinfo_for_c) returns exportinfo_for_a
*
* If d is shared, then c will be put into a's visible list.
* Note: visible list is per filesystem and is attached to the
! * VROOT exportinfo.
*/
struct exportinfo *
get_root_export(struct exportinfo *exip)
{
treenode_t *tnode = exip->exi_tree;
--- 964,974 ----
* where c is in the same filesystem as a.
* So, get_root_export(*exportinfo_for_c) returns exportinfo_for_a
*
* If d is shared, then c will be put into a's visible list.
* Note: visible list is per filesystem and is attached to the
! * VROOT exportinfo. Returned exi does NOT have a new hold.
*/
struct exportinfo *
get_root_export(struct exportinfo *exip)
{
treenode_t *tnode = exip->exi_tree;
*** 954,969 ****
bool_t vp_is_exported;
vp_is_exported = VN_CMP(vp, exi->exi_vp);
/*
! * An exported root vnode has a sub-dir shared if it has a visible list.
! * i.e. if it does not have a visible list, then there is no node in
! * this filesystem leads to any other shared node.
*/
! if (vp_is_exported && (vp->v_flag & VROOT))
return (exi->exi_visible ? 1 : 0);
/*
* Only the exportinfo of a fs root node may have a visible list.
* Either it is a pseudo root node, or a real exported root node.
*/
--- 996,1014 ----
bool_t vp_is_exported;
vp_is_exported = VN_CMP(vp, exi->exi_vp);
/*
! * An exported root vnode has a sub-dir shared if it has a visible
! * list. i.e. if it does not have a visible list, then there is no
! * node in this filesystem leads to any other shared node.
*/
! ASSERT3P(curzone->zone_id, ==, exi->exi_zoneid);
! if (vp_is_exported &&
! ((vp->v_flag & VROOT) || VN_IS_CURZONEROOT(vp))) {
return (exi->exi_visible ? 1 : 0);
+ }
/*
* Only the exportinfo of a fs root node may have a visible list.
* Either it is a pseudo root node, or a real exported root node.
*/
*** 1032,1042 ****
/*
* Only a PSEUDO node has a visible list or an exported VROOT
* node may have a visible list.
*/
! if (! PSEUDO(exi))
exi = get_root_export(exi);
/* Get the fid of the vnode */
bzero(&fid, sizeof (fid));
--- 1077,1087 ----
/*
* Only a PSEUDO node has a visible list or an exported VROOT
* node may have a visible list.
*/
! if (!PSEUDO(exi))
exi = get_root_export(exi);
/* Get the fid of the vnode */
bzero(&fid, sizeof (fid));
*** 1140,1150 ****
{
/*
* Only a PSEUDO node has a visible list or an exported VROOT
* node may have a visible list.
*/
! if (! PSEUDO(exi))
exi = get_root_export(exi);
for (*visp = exi->exi_visible; *visp != NULL; *visp = (*visp)->vis_next)
if ((u_longlong_t)ino == (*visp)->vis_ino) {
return (1);
--- 1185,1195 ----
{
/*
* Only a PSEUDO node has a visible list or an exported VROOT
* node may have a visible list.
*/
! if (!PSEUDO(exi))
exi = get_root_export(exi);
for (*visp = exi->exi_visible; *visp != NULL; *visp = (*visp)->vis_next)
if ((u_longlong_t)ino == (*visp)->vis_ino) {
return (1);
*** 1152,1178 ****
return (0);
}
/*
- * The change attribute value of the root of nfs pseudo namespace.
- *
- * The ns_root_change is protected by exported_lock because all of the treenode
- * operations are protected by exported_lock too.
- */
- static timespec_t ns_root_change;
-
- /*
* Get the change attribute from visible and returns TRUE.
* If the change value is not available returns FALSE.
*/
bool_t
nfs_visible_change(struct exportinfo *exi, vnode_t *vp, timespec_t *change)
{
struct exp_visible *visp;
fid_t fid;
treenode_t *node;
/*
* First check to see if vp is export root.
*/
if (VN_CMP(vp, exi->exi_vp))
--- 1197,1216 ----
return (0);
}
/*
* Get the change attribute from visible and returns TRUE.
* If the change value is not available returns FALSE.
*/
bool_t
nfs_visible_change(struct exportinfo *exi, vnode_t *vp, timespec_t *change)
{
struct exp_visible *visp;
fid_t fid;
treenode_t *node;
+ nfs_export_t *ne = nfs_get_export();
/*
* First check to see if vp is export root.
*/
if (VN_CMP(vp, exi->exi_vp))
*** 1213,1230 ****
return (FALSE);
exproot:
/* The VROOT export have its visible available through treenode */
node = exi->exi_tree;
! if (node != ns_root) {
ASSERT(node->tree_vis != NULL);
*change = node->tree_vis->vis_change;
} else {
ASSERT(node->tree_vis == NULL);
! *change = ns_root_change;
}
-
return (TRUE);
}
/*
* Update the change attribute value for a particular treenode. The change
--- 1251,1267 ----
return (FALSE);
exproot:
/* The VROOT export have its visible available through treenode */
node = exi->exi_tree;
! if (node != ne->ns_root) {
ASSERT(node->tree_vis != NULL);
*change = node->tree_vis->vis_change;
} else {
ASSERT(node->tree_vis == NULL);
! *change = ne->ns_root_change;
}
return (TRUE);
}
/*
* Update the change attribute value for a particular treenode. The change
*** 1232,1250 ****
* ns_root_change.
*
* If the change value is not supplied, the current time is used.
*/
void
! tree_update_change(treenode_t *tnode, timespec_t *change)
{
timespec_t *vis_change;
ASSERT(tnode != NULL);
! ASSERT((tnode != ns_root && tnode->tree_vis != NULL) ||
! (tnode == ns_root && tnode->tree_vis == NULL));
! vis_change = tnode == ns_root ? &ns_root_change
: &tnode->tree_vis->vis_change;
if (change != NULL)
*vis_change = *change;
else
--- 1269,1287 ----
* ns_root_change.
*
* If the change value is not supplied, the current time is used.
*/
void
! tree_update_change(nfs_export_t *ne, treenode_t *tnode, timespec_t *change)
{
timespec_t *vis_change;
ASSERT(tnode != NULL);
! ASSERT((tnode != ne->ns_root && tnode->tree_vis != NULL) ||
! (tnode == ne->ns_root && tnode->tree_vis == NULL));
! vis_change = tnode == ne->ns_root ? &ne->ns_root_change
: &tnode->tree_vis->vis_change;
if (change != NULL)
*vis_change = *change;
else