Print this page
NEX-19178 Changing the NFS export path makes the SMB share offline
Reviewed by: Evan Layton <evan.layton@nexenta.com>
Reviewed by: Yuri Pankov <yuri.pankov@nexenta.com>
Reviewed by: Matt Barden <matt.barden@nexenta.com>
Revert "NEX-19178 Changing the ZFS mountpoint property of a dataset takes the SMB share offline"
This reverts commit 35bb44b3cdee0719ce685304ca801335d5cc234e.
NEX-19178 Changing the ZFS mountpoint property of a dataset takes the SMB share offline
Reviewed by: Rob Gittins <rob.gittins@nexenta.com>
Reviewed by: Sanjay Nadkarni <sanjay.nadkarni@nexenta.com>
Reviewed by: Yuri Pankov <yuri.pankov@nexenta.com>
Reviewed by: Matt Barden <matt.barden@nexenta.com>
NEX-15279 support NFS server in zone
NEX-15520 online NFS shares cause zoneadm halt to hang in nfs_export_zone_fini
Portions contributed by: Dan Kruchinin dan.kruchinin@nexenta.com
Portions contributed by: Stepan Zastupov stepan.zastupov@gmail.com
Reviewed by: Joyce McIntosh <joyce.mcintosh@nexenta.com>
Reviewed by: Rob Gittins <rob.gittins@nexenta.com>
Reviewed by: Gordon Ross <gordon.ross@nexenta.com>
NEX-16219 pool import performance regression due to repeated libshare initialization
Reviewd by: Sanjay Nadkarni <sanjay.nadkarni@nexenta.com>
Reviewed by: Evan Layton <evan.layton@nexenta.com>
NEX-15937 zpool import performance degradation in filesystem sharing
Reviewed by: Evan Layton <evan.layton@nexenta.com>
Reviewed by: Sanjay Nadkarni <sanjay.nadkarni@nexenta.com>
NEX-15937 zpool import performance degradation in filesystem sharing
Reviewed by: Evan Layton <evan.layton@nexenta.com>
Reviewed by: Sanjay Nadkarni <sanjay.nadkarni@nexenta.com>
NEX-6586 cleanup gcc warnings in libzfs_mount.c
Reviewed by: Yuri Pankov <yuri.pankov@nexenta.com>
Reviewed by: Alek Pinchuk <alek.pinchuk@nexenta.com>
2605 want to resume interrupted zfs send
Reviewed by: George Wilson <george.wilson@delphix.com>
Reviewed by: Paul Dagnelie <pcd@delphix.com>
Reviewed by: Richard Elling <Richard.Elling@RichardElling.com>
Reviewed by: Xin Li <delphij@freebsd.org>
Reviewed by: Arne Jansen <sensille@gmx.net>
Approved by: Dan McDonald <danmcd@omniti.com>
6280 libzfs: unshare_one() could fail with EZFS_SHARENFSFAILED
Reviewed by: Toomas Soome <tsoome@me.com>
Reviewed by: Dan McDonald <danmcd@omniti.com>
Reviewed by: Matthew Ahrens <mahrens@delphix.com>
Approved by: Gordon Ross <gwr@nexenta.com>
NEX-1557 Parallel mount during HA Failover sometimes doesn't share the dataset, causes shares to go offline
SUP-647 Long failover times dominated by zpool import times trigger client-side errors
re #13594 rb4488 Lint complaints fix
re #10054 #13409 rb4387 added parallel unmount for zpool export
@@ -18,12 +18,16 @@
*
* CDDL HEADER END
*/
/*
- * Copyright 2015 Nexenta Systems, Inc. All rights reserved.
+ * Copyright 2018 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ */
+
+/*
+ * Copyright 2019 Nexenta Systems, Inc.
* Copyright (c) 2014, 2016 by Delphix. All rights reserved.
* Copyright 2016 Igor Kozhukhov <ikozhukhov@gmail.com>
* Copyright 2017 Joyent, Inc.
* Copyright 2017 RackTop Systems.
*/
@@ -59,11 +63,13 @@
*
* The following functions are available for pool consumers, and will
* mount/unmount and share/unshare all datasets within pool:
*
* zpool_enable_datasets()
+ * zpool_enable_datasets_ex()
* zpool_disable_datasets()
+ * zpool_disable_datasets_ex()
*/
#include <dirent.h>
#include <dlfcn.h>
#include <errno.h>
@@ -76,10 +82,11 @@
#include <unistd.h>
#include <zone.h>
#include <sys/mntent.h>
#include <sys/mount.h>
#include <sys/stat.h>
+#include <thread_pool.h>
#include <sys/statvfs.h>
#include <libzfs.h>
#include "libzfs_impl.h"
@@ -419,11 +426,12 @@
* Unmount a single filesystem.
*/
static int
unmount_one(libzfs_handle_t *hdl, const char *mountpoint, int flags)
{
- if (umount2(mountpoint, flags) != 0) {
+ int ret = umount2(mountpoint, flags);
+ if (ret != 0) {
zfs_error_aux(hdl, strerror(errno));
return (zfs_error_fmt(hdl, EZFS_UMOUNTFAILED,
dgettext(TEXT_DOMAIN, "cannot unmount '%s'"),
mountpoint));
}
@@ -569,20 +577,20 @@
* initialized in _zfs_init_libshare() are actually present.
*/
static sa_handle_t (*_sa_init)(int);
static sa_handle_t (*_sa_init_arg)(int, void *);
+static int (*_sa_service)(sa_handle_t);
static void (*_sa_fini)(sa_handle_t);
static sa_share_t (*_sa_find_share)(sa_handle_t, char *);
static int (*_sa_enable_share)(sa_share_t, char *);
static int (*_sa_disable_share)(sa_share_t, char *);
static char *(*_sa_errorstr)(int);
static int (*_sa_parse_legacy_options)(sa_group_t, char *, char *);
static boolean_t (*_sa_needs_refresh)(sa_handle_t *);
static libzfs_handle_t *(*_sa_get_zfs_handle)(sa_handle_t);
-static int (*_sa_zfs_process_share)(sa_handle_t, sa_group_t, sa_share_t,
- char *, char *, zprop_source_t, char *, char *, char *);
+static int (* _sa_get_zfs_share)(sa_handle_t, char *, zfs_handle_t *);
static void (*_sa_update_sharetab_ts)(sa_handle_t);
/*
* _zfs_init_libshare()
*
@@ -611,10 +619,12 @@
if ((libshare = dlopen(path, RTLD_LAZY | RTLD_GLOBAL)) != NULL) {
_sa_init = (sa_handle_t (*)(int))dlsym(libshare, "sa_init");
_sa_init_arg = (sa_handle_t (*)(int, void *))dlsym(libshare,
"sa_init_arg");
_sa_fini = (void (*)(sa_handle_t))dlsym(libshare, "sa_fini");
+ _sa_service = (int (*)(sa_handle_t))dlsym(libshare,
+ "sa_service");
_sa_find_share = (sa_share_t (*)(sa_handle_t, char *))
dlsym(libshare, "sa_find_share");
_sa_enable_share = (int (*)(sa_share_t, char *))dlsym(libshare,
"sa_enable_share");
_sa_disable_share = (int (*)(sa_share_t, char *))dlsym(libshare,
@@ -624,33 +634,33 @@
dlsym(libshare, "sa_parse_legacy_options");
_sa_needs_refresh = (boolean_t (*)(sa_handle_t *))
dlsym(libshare, "sa_needs_refresh");
_sa_get_zfs_handle = (libzfs_handle_t *(*)(sa_handle_t))
dlsym(libshare, "sa_get_zfs_handle");
- _sa_zfs_process_share = (int (*)(sa_handle_t, sa_group_t,
- sa_share_t, char *, char *, zprop_source_t, char *,
- char *, char *))dlsym(libshare, "sa_zfs_process_share");
+ _sa_get_zfs_share = (int (*)(sa_handle_t, char *,
+ zfs_handle_t *)) dlsym(libshare, "sa_get_zfs_share");
_sa_update_sharetab_ts = (void (*)(sa_handle_t))
dlsym(libshare, "sa_update_sharetab_ts");
if (_sa_init == NULL || _sa_init_arg == NULL ||
_sa_fini == NULL || _sa_find_share == NULL ||
_sa_enable_share == NULL || _sa_disable_share == NULL ||
_sa_errorstr == NULL || _sa_parse_legacy_options == NULL ||
_sa_needs_refresh == NULL || _sa_get_zfs_handle == NULL ||
- _sa_zfs_process_share == NULL ||
+ _sa_get_zfs_share == NULL || _sa_service == NULL ||
_sa_update_sharetab_ts == NULL) {
_sa_init = NULL;
_sa_init_arg = NULL;
+ _sa_service = NULL;
_sa_fini = NULL;
_sa_disable_share = NULL;
_sa_enable_share = NULL;
_sa_errorstr = NULL;
_sa_parse_legacy_options = NULL;
(void) dlclose(libshare);
_sa_needs_refresh = NULL;
_sa_get_zfs_handle = NULL;
- _sa_zfs_process_share = NULL;
+ _sa_get_zfs_share = NULL;
_sa_update_sharetab_ts = NULL;
}
}
}
@@ -796,58 +806,58 @@
char sourcestr[ZFS_MAXPROPLEN];
libzfs_handle_t *hdl = zhp->zfs_hdl;
sa_share_t share;
zfs_share_proto_t *curr_proto;
zprop_source_t sourcetype;
+ int service = SA_INIT_ONE_SHARE_FROM_HANDLE;
int ret;
if (!zfs_is_mountable(zhp, mountpoint, sizeof (mountpoint), NULL))
return (0);
+ /*
+ * Function may be called in a loop from higher up stack, with libshare
+ * initialized for multiple shares (SA_INIT_SHARE_API_SELECTIVE).
+ * zfs_init_libshare_arg will refresh the handle's cache if necessary.
+ * In this case we do not want to switch to per share initialization.
+ * Specify SA_INIT_SHARE_API to do full refresh, if refresh required.
+ */
+ if ((hdl->libzfs_sharehdl != NULL) && (_sa_service != NULL) &&
+ (_sa_service(hdl->libzfs_sharehdl) ==
+ SA_INIT_SHARE_API_SELECTIVE)) {
+ service = SA_INIT_SHARE_API;
+ }
+
for (curr_proto = proto; *curr_proto != PROTO_END; curr_proto++) {
/*
* Return success if there are no share options.
*/
if (zfs_prop_get(zhp, proto_table[*curr_proto].p_prop,
shareopts, sizeof (shareopts), &sourcetype, sourcestr,
ZFS_MAXPROPLEN, B_FALSE) != 0 ||
strcmp(shareopts, "off") == 0)
continue;
- ret = zfs_init_libshare_arg(hdl, SA_INIT_ONE_SHARE_FROM_HANDLE,
- zhp);
+ ret = zfs_init_libshare_arg(hdl, service, zhp);
if (ret != SA_OK) {
(void) zfs_error_fmt(hdl, EZFS_SHARENFSFAILED,
dgettext(TEXT_DOMAIN, "cannot share '%s': %s"),
zfs_get_name(zhp), _sa_errorstr != NULL ?
_sa_errorstr(ret) : "");
return (-1);
}
- /*
- * If the 'zoned' property is set, then zfs_is_mountable()
- * will have already bailed out if we are in the global zone.
- * But local zones cannot be NFS servers, so we ignore it for
- * local zones as well.
- */
- if (zfs_prop_get_int(zhp, ZFS_PROP_ZONED))
- continue;
-
share = zfs_sa_find_share(hdl->libzfs_sharehdl, mountpoint);
if (share == NULL) {
/*
* This may be a new file system that was just
- * created so isn't in the internal cache
- * (second time through). Rather than
- * reloading the entire configuration, we can
- * assume ZFS has done the checking and it is
- * safe to add this to the internal
- * configuration.
+ * created so isn't in the internal cache.
+ * Rather than reloading the entire configuration,
+ * we can add just this one share to the cache.
*/
- if (_sa_zfs_process_share(hdl->libzfs_sharehdl,
- NULL, NULL, mountpoint,
- proto_table[*curr_proto].p_name, sourcetype,
- shareopts, sourcestr, zhp->zfs_name) != SA_OK) {
+ if ((_sa_get_zfs_share == NULL) ||
+ (_sa_get_zfs_share(hdl->libzfs_sharehdl, "zfs", zhp)
+ != SA_OK)) {
(void) zfs_error_fmt(hdl,
proto_table[*curr_proto].p_share_err,
dgettext(TEXT_DOMAIN, "cannot share '%s'"),
zfs_get_name(zhp));
return (-1);
@@ -905,26 +915,34 @@
zfs_share_proto_t proto)
{
sa_share_t share;
int err;
char *mntpt;
+ int service = SA_INIT_ONE_SHARE_FROM_NAME;
/*
* Mountpoint could get trashed if libshare calls getmntany
* which it does during API initialization, so strdup the
* value.
*/
mntpt = zfs_strdup(hdl, mountpoint);
/*
- * make sure libshare initialized, initialize everything because we
- * don't know what other unsharing may happen later. Functions up the
- * stack are allowed to initialize instead a subset of shares at the
- * time the set is known.
+ * Function may be called in a loop from higher up stack, with libshare
+ * initialized for multiple shares (SA_INIT_SHARE_API_SELECTIVE).
+ * zfs_init_libshare_arg will refresh the handle's cache if necessary.
+ * In this case we do not want to switch to per share initialization.
+ * Specify SA_INIT_SHARE_API to do full refresh, if refresh required.
*/
- if ((err = zfs_init_libshare_arg(hdl, SA_INIT_ONE_SHARE_FROM_NAME,
- (void *)name)) != SA_OK) {
+ if ((hdl->libzfs_sharehdl != NULL) && (_sa_service != NULL) &&
+ (_sa_service(hdl->libzfs_sharehdl) ==
+ SA_INIT_SHARE_API_SELECTIVE)) {
+ service = SA_INIT_SHARE_API;
+ }
+
+ err = zfs_init_libshare_arg(hdl, service, (void *)name);
+ if (err != SA_OK) {
free(mntpt); /* don't need the copy anymore */
return (zfs_error_fmt(hdl, proto_table[proto].p_unshare_err,
dgettext(TEXT_DOMAIN, "cannot unshare '%s': %s"),
name, _sa_errorstr(err)));
}
@@ -1153,27 +1171,491 @@
return (1);
return (strcmp(zfs_get_name(a), zfs_get_name(b)));
}
-/*
- * Mount and share all datasets within the given pool. This assumes that no
- * datasets within the pool are currently mounted. Because users can create
- * complicated nested hierarchies of mountpoints, we first gather all the
- * datasets and mountpoints within the pool, and sort them by mountpoint. Once
- * we have the list of all filesystems, we iterate over them in order and mount
- * and/or share each one.
+static int
+mountpoint_compare(const void *a, const void *b)
+{
+ const char *mounta = *((char **)a);
+ const char *mountb = *((char **)b);
+
+ return (strcmp(mountb, mounta));
+}
+
+typedef enum {
+ TASK_TO_PROCESS,
+ TASK_IN_PROCESSING,
+ TASK_DONE,
+ TASK_MAX
+} task_state_t;
+
+typedef struct mount_task {
+ const char *mp;
+ zfs_handle_t *zh;
+ task_state_t state;
+ int error;
+} mount_task_t;
+
+typedef struct mount_task_q {
+ pthread_mutex_t q_lock;
+ libzfs_handle_t *hdl;
+ const char *mntopts;
+ const char *error_mp;
+ zfs_handle_t *error_zh;
+ int error;
+ int q_length;
+ int n_tasks;
+ int flags;
+ mount_task_t task[1];
+} mount_task_q_t;
+
+static int
+mount_task_q_init(int argc, zfs_handle_t **handles, const char *mntopts,
+ int flags, mount_task_q_t **task)
+{
+ mount_task_q_t *task_q;
+ int i, error;
+ size_t task_q_size;
+
+ *task = NULL;
+ /* nothing to do ? should not be here */
+ if (argc <= 0)
+ return (EINVAL);
+
+ /* allocate and init task_q */
+ task_q_size = sizeof (mount_task_q_t) +
+ (argc - 1) * sizeof (mount_task_t);
+ task_q = calloc(task_q_size, 1);
+ if (task_q == NULL)
+ return (ENOMEM);
+
+ if ((error = pthread_mutex_init(&task_q->q_lock, NULL)) != 0) {
+ free(task_q);
+ return (error);
+ }
+ task_q->q_length = argc;
+ task_q->n_tasks = argc;
+ task_q->flags = flags;
+ task_q->mntopts = mntopts;
+
+ /* we are not going to change the strings, so no need to strdup */
+ for (i = 0; i < argc; ++i) {
+ task_q->task[i].zh = handles[i];
+ task_q->task[i].state = TASK_TO_PROCESS;
+ task_q->error = 0;
+ }
+
+ *task = task_q;
+ return (0);
+}
+
+static int
+umount_task_q_init(int argc, const char **argv, int flags,
+ libzfs_handle_t *hdl, mount_task_q_t **task)
+{
+ mount_task_q_t *task_q;
+ int i, error;
+ size_t task_q_size;
+
+ *task = NULL;
+ /* nothing to do ? should not be here */
+ if (argc <= 0)
+ return (EINVAL);
+
+ /* allocate and init task_q */
+ task_q_size = sizeof (mount_task_q_t) +
+ (argc - 1) * sizeof (mount_task_t);
+ task_q = calloc(task_q_size, 1);
+ if (task_q == NULL)
+ return (ENOMEM);
+
+ if ((error = pthread_mutex_init(&task_q->q_lock, NULL)) != 0) {
+ free(task_q);
+ return (error);
+ }
+ task_q->hdl = hdl;
+ task_q->q_length = argc;
+ task_q->n_tasks = argc;
+ task_q->flags = flags;
+
+ /* we are not going to change the strings, so no need to strdup */
+ for (i = 0; i < argc; ++i) {
+ task_q->task[i].mp = argv[i];
+ task_q->task[i].state = TASK_TO_PROCESS;
+ task_q->error = 0;
+ }
+
+ *task = task_q;
+ return (0);
+}
+
+static void
+mount_task_q_fini(mount_task_q_t *task_q)
+{
+ assert(task_q != NULL);
+ (void) pthread_mutex_destroy(&task_q->q_lock);
+ free(task_q);
+}
+
+static int
+is_child_of(const char *s1, const char *s2)
+{
+ for (; *s1 && *s2 && (*s1 == *s2); ++s1, ++s2)
+ ;
+ return (!*s2 && (*s1 == '/'));
+}
+
+static boolean_t
+task_completed(int ind, mount_task_q_t *task_q)
+{
+ return (task_q->task[ind].state == TASK_DONE);
+}
+
+static boolean_t
+task_to_process(int ind, mount_task_q_t *task_q)
+{
+ return (task_q->task[ind].state == TASK_TO_PROCESS);
+}
+
+static boolean_t
+task_in_processing(int ind, mount_task_q_t *task_q)
+{
+ return (task_q->task[ind].state == TASK_IN_PROCESSING);
+}
+
+static void
+task_next_stage(int ind, mount_task_q_t *task_q)
+{
+ /* our state machine is a pipeline */
+ task_q->task[ind].state++;
+ assert(task_q->task[ind].state < TASK_MAX);
+}
+
+static boolean_t
+task_state_valid(int ind, mount_task_q_t *task_q)
+{
+ /* our state machine is a pipeline */
+ return (task_q->task[ind].state < TASK_MAX);
+}
+
+static boolean_t
+child_umount_pending(int ind, mount_task_q_t *task_q)
+{
+ int i;
+ for (i = ind-1; i >= 0; --i) {
+ assert(task_state_valid(i, task_q));
+ if ((task_q->task[i].state != TASK_DONE) &&
+ is_child_of(task_q->task[i].mp, task_q->task[ind].mp))
+ return (B_TRUE);
+ }
+
+ return (B_FALSE);
+}
+
+static boolean_t
+parent_mount_pending(int ind, mount_task_q_t *task_q)
+{
+ int i;
+ for (i = ind-1; i >= 0; --i) {
+ assert(task_state_valid(i, task_q));
+ if ((task_q->task[i].state != TASK_DONE) &&
+ is_child_of(task_q->task[ind].zh->zfs_name,
+ task_q->task[i].zh->zfs_name))
+ return (B_TRUE);
+ }
+
+ return (B_FALSE);
+}
+
+static void
+unmounter(void *arg)
+{
+ mount_task_q_t *task_q = (mount_task_q_t *)arg;
+ int error = 0, done = 0;
+
+ assert(task_q != NULL);
+ if (task_q == NULL)
+ return;
+
+ while (!error && !done) {
+ mount_task_t *task;
+ int i, t, umount_err, flags, q_error;
+
+ if ((error = pthread_mutex_lock(&task_q->q_lock)) != 0)
+ break; /* Out of while() loop */
+
+ if (task_q->error || task_q->n_tasks == 0) {
+ (void) pthread_mutex_unlock(&task_q->q_lock);
+ break; /* Out of while() loop */
+ }
+
+ /* Find task ready for processing */
+ for (i = 0, task = NULL, t = -1; i < task_q->q_length; ++i) {
+ if (task_q->error) {
+ /* Fatal error, stop processing */
+ done = 1;
+ break; /* Out of for() loop */
+ }
+
+ if (task_completed(i, task_q))
+ continue; /* for() loop */
+
+ if (task_to_process(i, task_q)) {
+ /*
+ * Cannot umount if some children are still
+ * mounted; come back later
*/
-#pragma weak zpool_mount_datasets = zpool_enable_datasets
+ if ((child_umount_pending(i, task_q)))
+ continue; /* for() loop */
+ /* Should be OK to unmount now */
+ task_next_stage(i, task_q);
+ task = &task_q->task[i];
+ t = i;
+ break; /* Out of for() loop */
+ }
+
+ /* Otherwise, the task is already in processing */
+ assert(task_in_processing(i, task_q));
+ }
+
+ flags = task_q->flags;
+
+ error = pthread_mutex_unlock(&task_q->q_lock);
+
+ if (done || (task == NULL) || error || task_q->error)
+ break; /* Out of while() loop */
+
+ umount_err = umount2(task->mp, flags);
+ q_error = errno;
+
+ if ((error = pthread_mutex_lock(&task_q->q_lock)) != 0)
+ break; /* Out of while() loop */
+
+ /* done processing */
+ assert(t >= 0 && t < task_q->q_length);
+ task_next_stage(t, task_q);
+ assert(task_completed(t, task_q));
+ task_q->n_tasks--;
+
+ if (umount_err) {
+ /*
+ * umount2() failed, cannot be busy because of mounted
+ * children - we have checked above, so it is fatal
+ */
+ assert(child_umount_pending(t, task_q) == B_FALSE);
+ task->error = q_error;
+ if (!task_q->error) {
+ task_q->error = task->error;
+ task_q->error_mp = task->mp;
+ }
+ done = 1;
+ }
+
+ if ((error = pthread_mutex_unlock(&task_q->q_lock)) != 0)
+ break; /* Out of while() loop */
+ }
+}
+
+static void
+mounter(void *arg)
+{
+ mount_task_q_t *task_q = (mount_task_q_t *)arg;
+ int error = 0, done = 0;
+
+ assert(task_q != NULL);
+ if (task_q == NULL)
+ return;
+
+ while (!error && !done) {
+ mount_task_t *task;
+ int i, t, mount_err, flags, q_error;
+ const char *mntopts;
+
+ if ((error = pthread_mutex_lock(&task_q->q_lock)) != 0)
+ break; /* Out of while() loop */
+
+ if (task_q->error || task_q->n_tasks == 0) {
+ (void) pthread_mutex_unlock(&task_q->q_lock);
+ break; /* Out of while() loop */
+ }
+
+ /* Find task ready for processing */
+ for (i = 0, task = NULL, t = -1; i < task_q->q_length; ++i) {
+ if (task_q->error) {
+ /* Fatal error, stop processing */
+ done = 1;
+ break; /* Out of for() loop */
+ }
+
+ if (task_completed(i, task_q))
+ continue; /* for() loop */
+
+ if (task_to_process(i, task_q)) {
+ /*
+ * Cannot mount if some parents are not
+ * mounted yet; come back later
+ */
+ if ((parent_mount_pending(i, task_q)))
+ continue; /* for() loop */
+ /* Should be OK to mount now */
+ task_next_stage(i, task_q);
+ task = &task_q->task[i];
+ t = i;
+ break; /* Out of for() loop */
+ }
+
+ /* Otherwise, the task is already in processing */
+ assert(task_in_processing(i, task_q));
+ }
+
+ flags = task_q->flags;
+ mntopts = task_q->mntopts;
+
+ error = pthread_mutex_unlock(&task_q->q_lock);
+
+ if (done || (task == NULL) || error || task_q->error)
+ break; /* Out of while() loop */
+
+ mount_err = zfs_mount(task->zh, mntopts, flags);
+ q_error = errno;
+
+ if ((error = pthread_mutex_lock(&task_q->q_lock)) != 0)
+ break; /* Out of while() loop */
+
+ /* done processing */
+ assert(t >= 0 && t < task_q->q_length);
+ task_next_stage(t, task_q);
+ assert(task_completed(t, task_q));
+ task_q->n_tasks--;
+
+ if (mount_err) {
+ task->error = q_error;
+ if (!task_q->error) {
+ task_q->error = task->error;
+ task_q->error_zh = task->zh;
+ }
+ done = 1;
+ }
+
+ if ((error = pthread_mutex_unlock(&task_q->q_lock)) != 0)
+ break; /* Out of while() loop */
+ }
+}
+
+#define THREADS_HARD_LIMIT 128
+int parallel_unmount(libzfs_handle_t *hdl, int argc, const char **argv,
+ int flags, int n_threads)
+{
+ mount_task_q_t *task_queue = NULL;
+ int i, error;
+ tpool_t *t;
+
+ if (argc == 0)
+ return (0);
+
+ if ((error = umount_task_q_init(argc, argv, flags, hdl, &task_queue))
+ != 0) {
+ assert(task_queue == NULL);
+ return (error);
+ }
+
+ if (n_threads > argc)
+ n_threads = argc;
+
+ if (n_threads > THREADS_HARD_LIMIT)
+ n_threads = THREADS_HARD_LIMIT;
+
+ t = tpool_create(1, n_threads, 0, NULL);
+
+ for (i = 0; i < n_threads; ++i)
+ (void) tpool_dispatch(t, unmounter, task_queue);
+
+ tpool_wait(t);
+ tpool_destroy(t);
+
+ if (task_queue->error) {
+ /*
+ * Tell ZFS!
+ */
+ zfs_error_aux(hdl,
+ strerror(error ? error : task_queue->error));
+ error = zfs_error_fmt(hdl, EZFS_UMOUNTFAILED,
+ dgettext(TEXT_DOMAIN, "cannot unmount '%s'"),
+ error ? "datasets" : task_queue->error_mp);
+ }
+ if (task_queue)
+ mount_task_q_fini(task_queue);
+
+ return (error);
+}
+
+int parallel_mount(get_all_cb_t *cb, int *good, const char *mntopts,
+ int flags, int n_threads)
+{
+ int i, error = 0;
+ mount_task_q_t *task_queue = NULL;
+ tpool_t *t;
+
+ if (cb->cb_used == 0)
+ return (0);
+
+ if (n_threads > cb->cb_used)
+ n_threads = cb->cb_used;
+
+ if ((error = mount_task_q_init(cb->cb_used, cb->cb_handles,
+ mntopts, flags, &task_queue)) != 0) {
+ assert(task_queue == NULL);
+ return (error);
+ }
+
+ t = tpool_create(1, n_threads, 0, NULL);
+
+ for (i = 0; i < n_threads; ++i)
+ (void) tpool_dispatch(t, mounter, task_queue);
+
+ tpool_wait(t);
+ for (i = 0; i < cb->cb_used; ++i) {
+ good[i] = !task_queue->task[i].error;
+ if (!good[i]) {
+ zfs_handle_t *hdl = task_queue->error_zh;
+ zfs_error_aux(hdl->zfs_hdl,
+ strerror(task_queue->task[i].error));
+ (void) zfs_error_fmt(hdl->zfs_hdl, EZFS_MOUNTFAILED,
+ dgettext(TEXT_DOMAIN, "cannot mount '%s'"),
+ task_queue->task[i].zh->zfs_name);
+ }
+ }
+ tpool_destroy(t);
+
+ if (task_queue->error) {
+ zfs_handle_t *hdl = task_queue->error_zh;
+ /*
+ * Tell ZFS!
+ */
+ zfs_error_aux(hdl->zfs_hdl,
+ strerror(error ? error : task_queue->error));
+ error = zfs_error_fmt(hdl->zfs_hdl, EZFS_MOUNTFAILED,
+ dgettext(TEXT_DOMAIN, "cannot mount '%s'"),
+ error ? "datasets" : hdl->zfs_name);
+ }
+ if (task_queue)
+ mount_task_q_fini(task_queue);
+
+ return (error);
+}
+
int
-zpool_enable_datasets(zpool_handle_t *zhp, const char *mntopts, int flags)
+zpool_enable_datasets_ex(zpool_handle_t *zhp, const char *mntopts, int flags,
+ int n_threads)
{
get_all_cb_t cb = { 0 };
libzfs_handle_t *hdl = zhp->zpool_hdl;
zfs_handle_t *zfsp;
int i, ret = -1;
int *good;
+ sa_init_selective_arg_t sharearg;
/*
* Gather all non-snap datasets within the pool.
*/
if ((zfsp = zfs_open(hdl, zhp->zpool_name, ZFS_TYPE_DATASET)) == NULL)
@@ -1195,18 +1677,36 @@
if ((good = zfs_alloc(zhp->zpool_hdl,
cb.cb_used * sizeof (int))) == NULL)
goto out;
ret = 0;
+ if (n_threads < 2) {
for (i = 0; i < cb.cb_used; i++) {
if (zfs_mount(cb.cb_handles[i], mntopts, flags) != 0)
ret = -1;
else
good[i] = 1;
}
+ } else {
+ ret = parallel_mount(&cb, good, mntopts, flags, n_threads);
+ }
/*
+ * Initilialize libshare SA_INIT_SHARE_API_SELECTIVE here
+ * to avoid unneccesary load/unload of the libshare API
+ * per shared dataset downstream.
+ */
+ sharearg.zhandle_arr = cb.cb_handles;
+ sharearg.zhandle_len = cb.cb_used;
+ ret = zfs_init_libshare_arg(hdl, SA_INIT_SHARE_API_SELECTIVE,
+ &sharearg);
+ if (ret != 0) {
+ free(good);
+ goto out;
+ }
+
+ /*
* Then share all the ones that need to be shared. This needs
* to be a separate pass in order to avoid excessive reloading
* of the configuration. Good should never be NULL since
* zfs_alloc is supposed to exit if memory isn't available.
*/
@@ -1223,30 +1723,12 @@
free(cb.cb_handles);
return (ret);
}
-static int
-mountpoint_compare(const void *a, const void *b)
-{
- const char *mounta = *((char **)a);
- const char *mountb = *((char **)b);
-
- return (strcmp(mountb, mounta));
-}
-
-/* alias for 2002/240 */
-#pragma weak zpool_unmount_datasets = zpool_disable_datasets
-/*
- * Unshare and unmount all datasets within the given pool. We don't want to
- * rely on traversing the DSL to discover the filesystems within the pool,
- * because this may be expensive (if not all of them are mounted), and can fail
- * arbitrarily (on I/O error, for example). Instead, we walk /etc/mnttab and
- * gather all the filesystems that are currently mounted.
- */
int
-zpool_disable_datasets(zpool_handle_t *zhp, boolean_t force)
+zpool_disable_datasets_ex(zpool_handle_t *zhp, boolean_t force, int n_threads)
{
int used, alloc;
struct mnttab entry;
size_t namelen;
char **mountpoints = NULL;
@@ -1344,30 +1826,34 @@
for (i = 0; i < used; i++) {
zfs_share_proto_t *curr_proto;
for (curr_proto = share_all_proto; *curr_proto != PROTO_END;
curr_proto++) {
if (is_shared(hdl, mountpoints[i], *curr_proto) &&
- unshare_one(hdl, mountpoints[i],
- mountpoints[i], *curr_proto) != 0)
+ unshare_one(hdl, mountpoints[i], mountpoints[i],
+ *curr_proto) != 0)
goto out;
}
}
/*
* Now unmount everything, removing the underlying directories as
* appropriate.
*/
+ if (n_threads < 2) {
for (i = 0; i < used; i++) {
if (unmount_one(hdl, mountpoints[i], flags) != 0)
goto out;
}
-
+ } else {
+ if (parallel_unmount(hdl, used, (const char **)mountpoints,
+ flags, n_threads) != 0)
+ goto out;
+ }
for (i = 0; i < used; i++) {
if (datasets[i])
remove_mountpoint(datasets[i]);
}
-
ret = 0;
out:
for (i = 0; i < used; i++) {
if (datasets[i])
zfs_close(datasets[i]);
@@ -1375,6 +1861,36 @@
}
free(datasets);
free(mountpoints);
return (ret);
+}
+
+/*
+ * Mount and share all datasets within the given pool. This assumes that no
+ * datasets within the pool are currently mounted. Because users can create
+ * complicated nested hierarchies of mountpoints, we first gather all the
+ * datasets and mountpoints within the pool, and sort them by mountpoint. Once
+ * we have the list of all filesystems, we iterate over them in order and mount
+ * and/or share each one.
+ */
+#pragma weak zpool_mount_datasets = zpool_enable_datasets
+int
+zpool_enable_datasets(zpool_handle_t *zhp, const char *mntopts, int flags)
+{
+ return (zpool_enable_datasets_ex(zhp, mntopts, flags, 1));
+}
+
+/* alias for 2002/240 */
+#pragma weak zpool_unmount_datasets = zpool_disable_datasets
+/*
+ * Unshare and unmount all datasets within the given pool. We don't want to
+ * rely on traversing the DSL to discover the filesystems within the pool,
+ * because this may be expensive (if not all of them are mounted), and can fail
+ * arbitrarily (on I/O error, for example). Instead, we walk /etc/mnttab and
+ * gather all the filesystems that are currently mounted.
+ */
+int
+zpool_disable_datasets(zpool_handle_t *zhp, boolean_t force)
+{
+ return (zpool_disable_datasets_ex(zhp, force, 1));
}