Print this page
11083 support NFS server in zone
Portions contributed by: Dan Kruchinin <dan.kruchinin@nexenta.com>
Portions contributed by: Stepan Zastupov <stepan.zastupov@gmail.com>
Portions contributed by: Joyce McIntosh <joyce.mcintosh@nexenta.com>
Portions contributed by: Mike Zeller <mike@mikezeller.net>
Portions contributed by: Dan McDonald <danmcd@joyent.com>
Portions contributed by: Gordon Ross <gordon.w.ross@gmail.com>
Portions contributed by: Vitaliy Gusev <gusev.vitaliy@gmail.com>
Reviewed by: Rick McNeal <rick.mcneal@nexenta.com>
Reviewed by: Rob Gittins <rob.gittins@nexenta.com>
Reviewed by: Sanjay Nadkarni <sanjay.nadkarni@nexenta.com>
Reviewed by: Jason King <jbk@joyent.com>
Reviewed by: C Fraire <cfraire@me.com>
Change-Id: I22f289d357503f9b48a0bc2482cc4328a6d43d16

*** 18,29 **** * * CDDL HEADER END */ /* - * Copyright 2014 Nexenta Systems, Inc. All rights reserved. * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2015, Joyent, Inc. */ #include <sys/systm.h> --- 18,32 ---- * * CDDL HEADER END */ /* * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. + */ + + /* + * Copyright 2018 Nexenta Systems, Inc. * Copyright (c) 2015, Joyent, Inc. */ #include <sys/systm.h>
*** 137,169 **** * directories is added to filter lookup and readdir results * to only contain dirnames which lead to descendant shares. * * A visible list has a per-file-system scope. Any exportinfo * struct (real or pseudo) can have a visible list as long as ! * a) its export root is VROOT * b) a descendant of the export root is shared */ struct exportinfo * ! pseudo_exportfs(vnode_t *vp, fid_t *fid, struct exp_visible *vis_head, ! struct exportdata *exdata) { struct exportinfo *exi; struct exportdata *kex; fsid_t fsid; int vpathlen; int i; ! ASSERT(RW_WRITE_HELD(&exported_lock)); fsid = vp->v_vfsp->vfs_fsid; exi = kmem_zalloc(sizeof (*exi), KM_SLEEP); exi->exi_fsid = fsid; exi->exi_fid = *fid; exi->exi_vp = vp; VN_HOLD(exi->exi_vp); exi->exi_visible = vis_head; exi->exi_count = 1; exi->exi_volatile_dev = (vfssw[vp->v_vfsp->vfs_fstype].vsw_flag & VSW_VOLATILEDEV) ? 1 : 0; mutex_init(&exi->exi_lock, NULL, MUTEX_DEFAULT, NULL); /* --- 140,173 ---- * directories is added to filter lookup and readdir results * to only contain dirnames which lead to descendant shares. * * A visible list has a per-file-system scope. Any exportinfo * struct (real or pseudo) can have a visible list as long as ! * a) its export root is VROOT, or is the zone's root for in-zone NFS service * b) a descendant of the export root is shared */ struct exportinfo * ! pseudo_exportfs(nfs_export_t *ne, vnode_t *vp, fid_t *fid, ! struct exp_visible *vis_head, struct exportdata *exdata) { struct exportinfo *exi; struct exportdata *kex; fsid_t fsid; int vpathlen; int i; ! ASSERT(RW_WRITE_HELD(&ne->exported_lock)); fsid = vp->v_vfsp->vfs_fsid; exi = kmem_zalloc(sizeof (*exi), KM_SLEEP); exi->exi_fsid = fsid; exi->exi_fid = *fid; exi->exi_vp = vp; VN_HOLD(exi->exi_vp); exi->exi_visible = vis_head; exi->exi_count = 1; + exi->exi_zoneid = ne->ne_globals->nfs_zoneid; exi->exi_volatile_dev = (vfssw[vp->v_vfsp->vfs_fstype].vsw_flag & VSW_VOLATILEDEV) ? 1 : 0; mutex_init(&exi->exi_lock, NULL, MUTEX_DEFAULT, NULL); /*
*** 203,214 **** rw_init(&exi->exi_cache_lock, NULL, RW_DEFAULT, NULL); /* * Insert the new entry at the front of the export list */ ! export_link(exi); return (exi); } /* * Free a list of visible directories --- 207,226 ---- rw_init(&exi->exi_cache_lock, NULL, RW_DEFAULT, NULL); /* * Insert the new entry at the front of the export list */ ! export_link(ne, exi); + /* + * Initialize exi_id and exi_kstats + */ + mutex_enter(&nfs_exi_id_lock); + exi->exi_id = exi_id_get_next(); + avl_add(&exi_id_tree, exi); + mutex_exit(&nfs_exi_id_lock); + return (exi); } /* * Free a list of visible directories
*** 279,296 **** * Removes node from the tree and frees the treenode struct. * Does not free structures pointed by tree_exi and tree_vis, * they should be already freed. */ static void ! tree_remove_node(treenode_t *node) { treenode_t *parent = node->tree_parent; treenode_t *s; /* s for sibling */ if (parent == NULL) { kmem_free(node, sizeof (*node)); ! ns_root = NULL; return; } /* This node is first child */ if (parent->tree_child_first == node) { parent->tree_child_first = node->tree_sibling; --- 291,308 ---- * Removes node from the tree and frees the treenode struct. * Does not free structures pointed by tree_exi and tree_vis, * they should be already freed. */ static void ! tree_remove_node(nfs_export_t *ne, treenode_t *node) { treenode_t *parent = node->tree_parent; treenode_t *s; /* s for sibling */ if (parent == NULL) { kmem_free(node, sizeof (*node)); ! ne->ns_root = NULL; return; } /* This node is first child */ if (parent->tree_child_first == node) { parent->tree_child_first = node->tree_sibling;
*** 435,444 **** --- 447,457 ---- more_visible(struct exportinfo *exi, treenode_t *tree_head) { struct exp_visible *vp1, *vp2, *vis_head, *tail, *next; int found; treenode_t *child, *curr, *connect_point; + nfs_export_t *ne = nfs_get_export(); vis_head = tree_head->tree_vis; connect_point = exi->exi_tree; /*
*** 448,458 **** if (exi->exi_visible == NULL) { tree_add_child(connect_point, tree_head); exi->exi_visible = vis_head; /* Update the change timestamp */ ! tree_update_change(connect_point, &vis_head->vis_change); return; } /* The outer loop traverses the supplied list. */ --- 461,471 ---- if (exi->exi_visible == NULL) { tree_add_child(connect_point, tree_head); exi->exi_visible = vis_head; /* Update the change timestamp */ ! tree_update_change(ne, connect_point, &vis_head->vis_change); return; } /* The outer loop traverses the supplied list. */
*** 508,518 **** connect_point = child; } else { /* Branching */ tree_add_child(connect_point, curr); /* Update the change timestamp */ ! tree_update_change(connect_point, &curr->tree_vis->vis_change); connect_point = NULL; } } --- 521,531 ---- connect_point = child; } else { /* Branching */ tree_add_child(connect_point, curr); /* Update the change timestamp */ ! tree_update_change(ne, connect_point, &curr->tree_vis->vis_change); connect_point = NULL; } }
*** 625,636 **** struct exp_visible *visp; struct exp_visible *vis_head = NULL; struct vattr va; treenode_t *tree_head = NULL; timespec_t now; ! ASSERT(RW_WRITE_HELD(&exported_lock)); gethrestime(&now); vp = exip->exi_vp; VN_HOLD(vp); --- 638,652 ---- struct exp_visible *visp; struct exp_visible *vis_head = NULL; struct vattr va; treenode_t *tree_head = NULL; timespec_t now; + nfs_export_t *ne; ! ne = exip->exi_ne; ! ASSERT3P(ne, ==, nfs_get_export()); /* curzone reality check */ ! ASSERT(RW_WRITE_HELD(&ne->exported_lock)); gethrestime(&now); vp = exip->exi_vp; VN_HOLD(vp);
*** 642,656 **** fid.fid_len = MAXFIDSZ; error = vop_fid_pseudo(vp, &fid); if (error) break; /* ! * The root of the file system needs special handling */ ! if (vp->v_flag & VROOT) { ! if (! exportdir) { struct exportinfo *exi; /* * Check if this VROOT dir is already exported. * If so, then attach the pseudonodes. If not, --- 658,675 ---- fid.fid_len = MAXFIDSZ; error = vop_fid_pseudo(vp, &fid); if (error) break; + /* XXX KEBE ASKS DO WE NEED THIS?!? */ + ASSERT3U(exip->exi_zoneid, ==, curzone->zone_id); /* ! * The root of the file system, or the zone's root for ! * in-zone NFS service needs special handling */ ! if (vp->v_flag & VROOT || vp == EXI_TO_ZONEROOTVP(exip)) { ! if (!exportdir) { struct exportinfo *exi; /* * Check if this VROOT dir is already exported. * If so, then attach the pseudonodes. If not,
*** 675,710 **** * Found the root directory of a filesystem * that isn't exported. Need to export * this as a pseudo export so that an NFS v4 * client can do lookups in it. */ ! new_exi = pseudo_exportfs(vp, &fid, vis_head, ! NULL); vis_head = NULL; } ! if (VN_CMP(vp, rootdir)) { /* at system root */ /* * If sharing "/", new_exi is shared exportinfo * (exip). Otherwise, new_exi is exportinfo * created by pseudo_exportfs() above. */ ! ns_root = tree_prepend_node(tree_head, NULL, new_exi); /* Update the change timestamp */ ! tree_update_change(ns_root, &now); break; } /* * Traverse across the mountpoint and continue the * climb on the mounted-on filesystem. */ ! vp = untraverse(vp); exportdir = 0; continue; } /* --- 694,729 ---- * Found the root directory of a filesystem * that isn't exported. Need to export * this as a pseudo export so that an NFS v4 * client can do lookups in it. */ ! new_exi = pseudo_exportfs(ne, vp, &fid, ! vis_head, NULL); vis_head = NULL; } ! if (VN_IS_CURZONEROOT(vp)) { /* at system root */ /* * If sharing "/", new_exi is shared exportinfo * (exip). Otherwise, new_exi is exportinfo * created by pseudo_exportfs() above. */ ! ne->ns_root = tree_prepend_node(tree_head, NULL, new_exi); /* Update the change timestamp */ ! tree_update_change(ne, ne->ns_root, &now); break; } /* * Traverse across the mountpoint and continue the * climb on the mounted-on filesystem. */ ! vp = untraverse(vp, ne->exi_root->exi_vp); exportdir = 0; continue; } /*
*** 786,796 **** while (tree_head) { treenode_t *t2 = tree_head; exportinfo_t *e = tree_head->tree_exi; /* exip will be freed in exportfs() */ if (e && e != exip) { ! export_unlink(e); exi_rele(e); } tree_head = tree_head->tree_child_first; kmem_free(t2, sizeof (*t2)); } --- 805,818 ---- while (tree_head) { treenode_t *t2 = tree_head; exportinfo_t *e = tree_head->tree_exi; /* exip will be freed in exportfs() */ if (e && e != exip) { ! mutex_enter(&nfs_exi_id_lock); ! avl_remove(&exi_id_tree, e); ! mutex_exit(&nfs_exi_id_lock); ! export_unlink(ne, e); exi_rele(e); } tree_head = tree_head->tree_child_first; kmem_free(t2, sizeof (*t2)); }
*** 807,827 **** * * Deleting of nodes will start only if the unshared * node was a leaf node. * Deleting of nodes will finish when we reach a node which * has children or is a real export, then we might still need ! * to continue releasing visibles, until we reach VROOT node. */ void ! treeclimb_unexport(struct exportinfo *exip) { treenode_t *tnode, *old_nd; treenode_t *connect_point = NULL; ! ASSERT(RW_WRITE_HELD(&exported_lock)); tnode = exip->exi_tree; /* * The unshared exportinfo was unlinked in unexport(). * Zeroing tree_exi ensures that we will skip it. */ tnode->tree_exi = NULL; --- 829,862 ---- * * Deleting of nodes will start only if the unshared * node was a leaf node. * Deleting of nodes will finish when we reach a node which * has children or is a real export, then we might still need ! * to continue releasing visibles, until we reach VROOT or zone's root node. */ void ! treeclimb_unexport(nfs_export_t *ne, struct exportinfo *exip) { treenode_t *tnode, *old_nd; treenode_t *connect_point = NULL; ! ASSERT(RW_WRITE_HELD(&ne->exported_lock)); ! ASSERT(curzone->zone_id == exip->exi_zoneid || ! curzone->zone_id == global_zone->zone_id); + /* + * exi_tree can be null for the zone root + * which means we're already at the "top" + * and there's nothing more to "climb". + */ tnode = exip->exi_tree; + if (tnode == NULL) { + /* Should only happen for... */ + ASSERT(exip == ne->exi_root); + return; + } + /* * The unshared exportinfo was unlinked in unexport(). * Zeroing tree_exi ensures that we will skip it. */ tnode->tree_exi = NULL;
*** 829,848 **** if (tnode->tree_vis != NULL) /* system root has tree_vis == NULL */ tnode->tree_vis->vis_exported = 0; while (tnode != NULL) { ! /* Stop at VROOT node which is exported or has child */ if (TREE_ROOT(tnode) && (TREE_EXPORTED(tnode) || tnode->tree_child_first != NULL)) break; /* Release pseudo export if it has no child */ if (TREE_ROOT(tnode) && !TREE_EXPORTED(tnode) && tnode->tree_child_first == NULL) { ! export_unlink(tnode->tree_exi); exi_rele(tnode->tree_exi); } /* Release visible in parent's exportinfo */ if (tnode->tree_vis != NULL) less_visible(vis2exi(tnode), tnode->tree_vis); --- 864,890 ---- if (tnode->tree_vis != NULL) /* system root has tree_vis == NULL */ tnode->tree_vis->vis_exported = 0; while (tnode != NULL) { ! /* ! * Stop at VROOT (or zone root) node which is exported or has ! * child. ! */ if (TREE_ROOT(tnode) && (TREE_EXPORTED(tnode) || tnode->tree_child_first != NULL)) break; /* Release pseudo export if it has no child */ if (TREE_ROOT(tnode) && !TREE_EXPORTED(tnode) && tnode->tree_child_first == NULL) { ! mutex_enter(&nfs_exi_id_lock); ! avl_remove(&exi_id_tree, tnode->tree_exi); ! mutex_exit(&nfs_exi_id_lock); ! export_unlink(ne, tnode->tree_exi); exi_rele(tnode->tree_exi); + tnode->tree_exi = NULL; } /* Release visible in parent's exportinfo */ if (tnode->tree_vis != NULL) less_visible(vis2exi(tnode), tnode->tree_vis);
*** 852,884 **** tnode = tnode->tree_parent; /* Remove itself, if this is a leaf and non-exported node */ if (old_nd->tree_child_first == NULL && !TREE_EXPORTED(old_nd)) { ! tree_remove_node(old_nd); connect_point = tnode; } } /* Update the change timestamp */ if (connect_point != NULL) ! tree_update_change(connect_point, NULL); } /* * Traverse backward across mountpoint from the * root vnode of a filesystem to its mounted-on * vnode. */ vnode_t * ! untraverse(vnode_t *vp) { vnode_t *tvp, *nextvp; tvp = vp; for (;;) { ! if (! (tvp->v_flag & VROOT)) break; /* lock vfs to prevent unmount of this vfs */ vfs_lock_wait(tvp->v_vfsp); --- 894,926 ---- tnode = tnode->tree_parent; /* Remove itself, if this is a leaf and non-exported node */ if (old_nd->tree_child_first == NULL && !TREE_EXPORTED(old_nd)) { ! tree_remove_node(ne, old_nd); connect_point = tnode; } } /* Update the change timestamp */ if (connect_point != NULL) ! tree_update_change(ne, connect_point, NULL); } /* * Traverse backward across mountpoint from the * root vnode of a filesystem to its mounted-on * vnode. */ vnode_t * ! untraverse(vnode_t *vp, vnode_t *zone_rootvp) { vnode_t *tvp, *nextvp; tvp = vp; for (;;) { ! if (!(tvp->v_flag & VROOT) && !VN_CMP(tvp, zone_rootvp)) break; /* lock vfs to prevent unmount of this vfs */ vfs_lock_wait(tvp->v_vfsp);
*** 905,915 **** return (tvp); } /* * Given an exportinfo, climb up to find the exportinfo for the VROOT ! * of the filesystem. * * e.g. / * | * a (VROOT) pseudo-exportinfo * | --- 947,957 ---- return (tvp); } /* * Given an exportinfo, climb up to find the exportinfo for the VROOT ! * (or zone root) of the filesystem. * * e.g. / * | * a (VROOT) pseudo-exportinfo * |
*** 922,932 **** * where c is in the same filesystem as a. * So, get_root_export(*exportinfo_for_c) returns exportinfo_for_a * * If d is shared, then c will be put into a's visible list. * Note: visible list is per filesystem and is attached to the ! * VROOT exportinfo. */ struct exportinfo * get_root_export(struct exportinfo *exip) { treenode_t *tnode = exip->exi_tree; --- 964,974 ---- * where c is in the same filesystem as a. * So, get_root_export(*exportinfo_for_c) returns exportinfo_for_a * * If d is shared, then c will be put into a's visible list. * Note: visible list is per filesystem and is attached to the ! * VROOT exportinfo. Returned exi does NOT have a new hold. */ struct exportinfo * get_root_export(struct exportinfo *exip) { treenode_t *tnode = exip->exi_tree;
*** 954,969 **** bool_t vp_is_exported; vp_is_exported = VN_CMP(vp, exi->exi_vp); /* ! * An exported root vnode has a sub-dir shared if it has a visible list. ! * i.e. if it does not have a visible list, then there is no node in ! * this filesystem leads to any other shared node. */ ! if (vp_is_exported && (vp->v_flag & VROOT)) return (exi->exi_visible ? 1 : 0); /* * Only the exportinfo of a fs root node may have a visible list. * Either it is a pseudo root node, or a real exported root node. */ --- 996,1014 ---- bool_t vp_is_exported; vp_is_exported = VN_CMP(vp, exi->exi_vp); /* ! * An exported root vnode has a sub-dir shared if it has a visible ! * list. i.e. if it does not have a visible list, then there is no ! * node in this filesystem leads to any other shared node. */ ! ASSERT3P(curzone->zone_id, ==, exi->exi_zoneid); ! if (vp_is_exported && ! ((vp->v_flag & VROOT) || VN_IS_CURZONEROOT(vp))) { return (exi->exi_visible ? 1 : 0); + } /* * Only the exportinfo of a fs root node may have a visible list. * Either it is a pseudo root node, or a real exported root node. */
*** 1032,1042 **** /* * Only a PSEUDO node has a visible list or an exported VROOT * node may have a visible list. */ ! if (! PSEUDO(exi)) exi = get_root_export(exi); /* Get the fid of the vnode */ bzero(&fid, sizeof (fid)); --- 1077,1087 ---- /* * Only a PSEUDO node has a visible list or an exported VROOT * node may have a visible list. */ ! if (!PSEUDO(exi)) exi = get_root_export(exi); /* Get the fid of the vnode */ bzero(&fid, sizeof (fid));
*** 1140,1150 **** { /* * Only a PSEUDO node has a visible list or an exported VROOT * node may have a visible list. */ ! if (! PSEUDO(exi)) exi = get_root_export(exi); for (*visp = exi->exi_visible; *visp != NULL; *visp = (*visp)->vis_next) if ((u_longlong_t)ino == (*visp)->vis_ino) { return (1); --- 1185,1195 ---- { /* * Only a PSEUDO node has a visible list or an exported VROOT * node may have a visible list. */ ! if (!PSEUDO(exi)) exi = get_root_export(exi); for (*visp = exi->exi_visible; *visp != NULL; *visp = (*visp)->vis_next) if ((u_longlong_t)ino == (*visp)->vis_ino) { return (1);
*** 1152,1178 **** return (0); } /* - * The change attribute value of the root of nfs pseudo namespace. - * - * The ns_root_change is protected by exported_lock because all of the treenode - * operations are protected by exported_lock too. - */ - static timespec_t ns_root_change; - - /* * Get the change attribute from visible and returns TRUE. * If the change value is not available returns FALSE. */ bool_t nfs_visible_change(struct exportinfo *exi, vnode_t *vp, timespec_t *change) { struct exp_visible *visp; fid_t fid; treenode_t *node; /* * First check to see if vp is export root. */ if (VN_CMP(vp, exi->exi_vp)) --- 1197,1216 ---- return (0); } /* * Get the change attribute from visible and returns TRUE. * If the change value is not available returns FALSE. */ bool_t nfs_visible_change(struct exportinfo *exi, vnode_t *vp, timespec_t *change) { struct exp_visible *visp; fid_t fid; treenode_t *node; + nfs_export_t *ne = nfs_get_export(); /* * First check to see if vp is export root. */ if (VN_CMP(vp, exi->exi_vp))
*** 1213,1230 **** return (FALSE); exproot: /* The VROOT export have its visible available through treenode */ node = exi->exi_tree; ! if (node != ns_root) { ASSERT(node->tree_vis != NULL); *change = node->tree_vis->vis_change; } else { ASSERT(node->tree_vis == NULL); ! *change = ns_root_change; } - return (TRUE); } /* * Update the change attribute value for a particular treenode. The change --- 1251,1267 ---- return (FALSE); exproot: /* The VROOT export have its visible available through treenode */ node = exi->exi_tree; ! if (node != ne->ns_root) { ASSERT(node->tree_vis != NULL); *change = node->tree_vis->vis_change; } else { ASSERT(node->tree_vis == NULL); ! *change = ne->ns_root_change; } return (TRUE); } /* * Update the change attribute value for a particular treenode. The change
*** 1232,1250 **** * ns_root_change. * * If the change value is not supplied, the current time is used. */ void ! tree_update_change(treenode_t *tnode, timespec_t *change) { timespec_t *vis_change; ASSERT(tnode != NULL); ! ASSERT((tnode != ns_root && tnode->tree_vis != NULL) || ! (tnode == ns_root && tnode->tree_vis == NULL)); ! vis_change = tnode == ns_root ? &ns_root_change : &tnode->tree_vis->vis_change; if (change != NULL) *vis_change = *change; else --- 1269,1287 ---- * ns_root_change. * * If the change value is not supplied, the current time is used. */ void ! tree_update_change(nfs_export_t *ne, treenode_t *tnode, timespec_t *change) { timespec_t *vis_change; ASSERT(tnode != NULL); ! ASSERT((tnode != ne->ns_root && tnode->tree_vis != NULL) || ! (tnode == ne->ns_root && tnode->tree_vis == NULL)); ! vis_change = tnode == ne->ns_root ? &ne->ns_root_change : &tnode->tree_vis->vis_change; if (change != NULL) *vis_change = *change; else