Print this page
NEX-19178 Changing the NFS export path makes the SMB share offline
Reviewed by: Evan Layton <evan.layton@nexenta.com>
Reviewed by: Yuri Pankov <yuri.pankov@nexenta.com>
Reviewed by: Matt Barden <matt.barden@nexenta.com>
Revert "NEX-19178 Changing the ZFS mountpoint property of a dataset takes the SMB share offline"
This reverts commit 35bb44b3cdee0719ce685304ca801335d5cc234e.
NEX-19178 Changing the ZFS mountpoint property of a dataset takes the SMB share offline
Reviewed by: Rob Gittins <rob.gittins@nexenta.com>
Reviewed by: Sanjay Nadkarni <sanjay.nadkarni@nexenta.com>
Reviewed by: Yuri Pankov <yuri.pankov@nexenta.com>
Reviewed by: Matt Barden <matt.barden@nexenta.com>
NEX-15279 support NFS server in zone
NEX-15520 online NFS shares cause zoneadm halt to hang in nfs_export_zone_fini
Portions contributed by: Dan Kruchinin dan.kruchinin@nexenta.com
Portions contributed by: Stepan Zastupov stepan.zastupov@gmail.com
Reviewed by: Joyce McIntosh <joyce.mcintosh@nexenta.com>
Reviewed by: Rob Gittins <rob.gittins@nexenta.com>
Reviewed by: Gordon Ross <gordon.ross@nexenta.com>
NEX-16219 pool import performance regression due to repeated libshare initialization
Reviewd by: Sanjay Nadkarni <sanjay.nadkarni@nexenta.com>
Reviewed by: Evan Layton <evan.layton@nexenta.com>
NEX-15937 zpool import performance degradation in filesystem sharing
Reviewed by: Evan Layton <evan.layton@nexenta.com>
Reviewed by: Sanjay Nadkarni <sanjay.nadkarni@nexenta.com>
NEX-15937 zpool import performance degradation in filesystem sharing
Reviewed by: Evan Layton <evan.layton@nexenta.com>
Reviewed by: Sanjay Nadkarni <sanjay.nadkarni@nexenta.com>
NEX-6586 cleanup gcc warnings in libzfs_mount.c
Reviewed by: Yuri Pankov <yuri.pankov@nexenta.com>
Reviewed by: Alek Pinchuk <alek.pinchuk@nexenta.com>
2605 want to resume interrupted zfs send
Reviewed by: George Wilson <george.wilson@delphix.com>
Reviewed by: Paul Dagnelie <pcd@delphix.com>
Reviewed by: Richard Elling <Richard.Elling@RichardElling.com>
Reviewed by: Xin Li <delphij@freebsd.org>
Reviewed by: Arne Jansen <sensille@gmx.net>
Approved by: Dan McDonald <danmcd@omniti.com>
6280 libzfs: unshare_one() could fail with EZFS_SHARENFSFAILED
Reviewed by: Toomas Soome <tsoome@me.com>
Reviewed by: Dan McDonald <danmcd@omniti.com>
Reviewed by: Matthew Ahrens <mahrens@delphix.com>
Approved by: Gordon Ross <gwr@nexenta.com>
NEX-1557 Parallel mount during HA Failover sometimes doesn't share the dataset, causes shares to go offline
SUP-647 Long failover times dominated by zpool import times trigger client-side errors
re #13594 rb4488 Lint complaints fix
re #10054 #13409 rb4387 added parallel unmount for zpool export

@@ -18,12 +18,16 @@
  *
  * CDDL HEADER END
  */
 
 /*
- * Copyright 2015 Nexenta Systems, Inc.  All rights reserved.
+ * Copyright 2018 Nexenta Systems, Inc.  All rights reserved.
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ */
+
+/*
+ * Copyright 2019 Nexenta Systems, Inc.
  * Copyright (c) 2014, 2016 by Delphix. All rights reserved.
  * Copyright 2016 Igor Kozhukhov <ikozhukhov@gmail.com>
  * Copyright 2017 Joyent, Inc.
  * Copyright 2017 RackTop Systems.
  */

@@ -59,11 +63,13 @@
  *
  * The following functions are available for pool consumers, and will
  * mount/unmount and share/unshare all datasets within pool:
  *
  *      zpool_enable_datasets()
+ *      zpool_enable_datasets_ex()
  *      zpool_disable_datasets()
+ *      zpool_disable_datasets_ex()
  */
 
 #include <dirent.h>
 #include <dlfcn.h>
 #include <errno.h>

@@ -76,10 +82,11 @@
 #include <unistd.h>
 #include <zone.h>
 #include <sys/mntent.h>
 #include <sys/mount.h>
 #include <sys/stat.h>
+#include <thread_pool.h>
 #include <sys/statvfs.h>
 
 #include <libzfs.h>
 
 #include "libzfs_impl.h"

@@ -419,11 +426,12 @@
  * Unmount a single filesystem.
  */
 static int
 unmount_one(libzfs_handle_t *hdl, const char *mountpoint, int flags)
 {
-        if (umount2(mountpoint, flags) != 0) {
+        int ret = umount2(mountpoint, flags);
+        if (ret != 0) {
                 zfs_error_aux(hdl, strerror(errno));
                 return (zfs_error_fmt(hdl, EZFS_UMOUNTFAILED,
                     dgettext(TEXT_DOMAIN, "cannot unmount '%s'"),
                     mountpoint));
         }

@@ -569,20 +577,20 @@
  * initialized in _zfs_init_libshare() are actually present.
  */
 
 static sa_handle_t (*_sa_init)(int);
 static sa_handle_t (*_sa_init_arg)(int, void *);
+static int (*_sa_service)(sa_handle_t);
 static void (*_sa_fini)(sa_handle_t);
 static sa_share_t (*_sa_find_share)(sa_handle_t, char *);
 static int (*_sa_enable_share)(sa_share_t, char *);
 static int (*_sa_disable_share)(sa_share_t, char *);
 static char *(*_sa_errorstr)(int);
 static int (*_sa_parse_legacy_options)(sa_group_t, char *, char *);
 static boolean_t (*_sa_needs_refresh)(sa_handle_t *);
 static libzfs_handle_t *(*_sa_get_zfs_handle)(sa_handle_t);
-static int (*_sa_zfs_process_share)(sa_handle_t, sa_group_t, sa_share_t,
-    char *, char *, zprop_source_t, char *, char *, char *);
+static int (* _sa_get_zfs_share)(sa_handle_t, char *, zfs_handle_t *);
 static void (*_sa_update_sharetab_ts)(sa_handle_t);
 
 /*
  * _zfs_init_libshare()
  *

@@ -611,10 +619,12 @@
         if ((libshare = dlopen(path, RTLD_LAZY | RTLD_GLOBAL)) != NULL) {
                 _sa_init = (sa_handle_t (*)(int))dlsym(libshare, "sa_init");
                 _sa_init_arg = (sa_handle_t (*)(int, void *))dlsym(libshare,
                     "sa_init_arg");
                 _sa_fini = (void (*)(sa_handle_t))dlsym(libshare, "sa_fini");
+                _sa_service = (int (*)(sa_handle_t))dlsym(libshare,
+                    "sa_service");
                 _sa_find_share = (sa_share_t (*)(sa_handle_t, char *))
                     dlsym(libshare, "sa_find_share");
                 _sa_enable_share = (int (*)(sa_share_t, char *))dlsym(libshare,
                     "sa_enable_share");
                 _sa_disable_share = (int (*)(sa_share_t, char *))dlsym(libshare,

@@ -624,33 +634,33 @@
                     dlsym(libshare, "sa_parse_legacy_options");
                 _sa_needs_refresh = (boolean_t (*)(sa_handle_t *))
                     dlsym(libshare, "sa_needs_refresh");
                 _sa_get_zfs_handle = (libzfs_handle_t *(*)(sa_handle_t))
                     dlsym(libshare, "sa_get_zfs_handle");
-                _sa_zfs_process_share = (int (*)(sa_handle_t, sa_group_t,
-                    sa_share_t, char *, char *, zprop_source_t, char *,
-                    char *, char *))dlsym(libshare, "sa_zfs_process_share");
+                _sa_get_zfs_share = (int (*)(sa_handle_t, char *,
+                    zfs_handle_t *)) dlsym(libshare, "sa_get_zfs_share");
                 _sa_update_sharetab_ts = (void (*)(sa_handle_t))
                     dlsym(libshare, "sa_update_sharetab_ts");
                 if (_sa_init == NULL || _sa_init_arg == NULL ||
                     _sa_fini == NULL || _sa_find_share == NULL ||
                     _sa_enable_share == NULL || _sa_disable_share == NULL ||
                     _sa_errorstr == NULL || _sa_parse_legacy_options == NULL ||
                     _sa_needs_refresh == NULL || _sa_get_zfs_handle == NULL ||
-                    _sa_zfs_process_share == NULL ||
+                    _sa_get_zfs_share == NULL || _sa_service == NULL ||
                     _sa_update_sharetab_ts == NULL) {
                         _sa_init = NULL;
                         _sa_init_arg = NULL;
+                        _sa_service = NULL;
                         _sa_fini = NULL;
                         _sa_disable_share = NULL;
                         _sa_enable_share = NULL;
                         _sa_errorstr = NULL;
                         _sa_parse_legacy_options = NULL;
                         (void) dlclose(libshare);
                         _sa_needs_refresh = NULL;
                         _sa_get_zfs_handle = NULL;
-                        _sa_zfs_process_share = NULL;
+                        _sa_get_zfs_share = NULL;
                         _sa_update_sharetab_ts = NULL;
                 }
         }
 }
 

@@ -796,58 +806,58 @@
         char sourcestr[ZFS_MAXPROPLEN];
         libzfs_handle_t *hdl = zhp->zfs_hdl;
         sa_share_t share;
         zfs_share_proto_t *curr_proto;
         zprop_source_t sourcetype;
+        int service = SA_INIT_ONE_SHARE_FROM_HANDLE;
         int ret;
 
         if (!zfs_is_mountable(zhp, mountpoint, sizeof (mountpoint), NULL))
                 return (0);
 
+        /*
+         * Function may be called in a loop from higher up stack, with libshare
+         * initialized for multiple shares (SA_INIT_SHARE_API_SELECTIVE).
+         * zfs_init_libshare_arg will refresh the handle's cache if necessary.
+         * In this case we do not want to switch to per share initialization.
+         * Specify SA_INIT_SHARE_API to do full refresh, if refresh required.
+         */
+        if ((hdl->libzfs_sharehdl != NULL) && (_sa_service != NULL) &&
+            (_sa_service(hdl->libzfs_sharehdl) ==
+            SA_INIT_SHARE_API_SELECTIVE)) {
+                service = SA_INIT_SHARE_API;
+        }
+
         for (curr_proto = proto; *curr_proto != PROTO_END; curr_proto++) {
                 /*
                  * Return success if there are no share options.
                  */
                 if (zfs_prop_get(zhp, proto_table[*curr_proto].p_prop,
                     shareopts, sizeof (shareopts), &sourcetype, sourcestr,
                     ZFS_MAXPROPLEN, B_FALSE) != 0 ||
                     strcmp(shareopts, "off") == 0)
                         continue;
-                ret = zfs_init_libshare_arg(hdl, SA_INIT_ONE_SHARE_FROM_HANDLE,
-                    zhp);
+                ret = zfs_init_libshare_arg(hdl, service, zhp);
                 if (ret != SA_OK) {
                         (void) zfs_error_fmt(hdl, EZFS_SHARENFSFAILED,
                             dgettext(TEXT_DOMAIN, "cannot share '%s': %s"),
                             zfs_get_name(zhp), _sa_errorstr != NULL ?
                             _sa_errorstr(ret) : "");
                         return (-1);
                 }
 
-                /*
-                 * If the 'zoned' property is set, then zfs_is_mountable()
-                 * will have already bailed out if we are in the global zone.
-                 * But local zones cannot be NFS servers, so we ignore it for
-                 * local zones as well.
-                 */
-                if (zfs_prop_get_int(zhp, ZFS_PROP_ZONED))
-                        continue;
-
                 share = zfs_sa_find_share(hdl->libzfs_sharehdl, mountpoint);
                 if (share == NULL) {
                         /*
                          * This may be a new file system that was just
-                         * created so isn't in the internal cache
-                         * (second time through). Rather than
-                         * reloading the entire configuration, we can
-                         * assume ZFS has done the checking and it is
-                         * safe to add this to the internal
-                         * configuration.
+                         * created so isn't in the internal cache.
+                         * Rather than reloading the entire configuration,
+                         * we can add just this one share to the cache.
                          */
-                        if (_sa_zfs_process_share(hdl->libzfs_sharehdl,
-                            NULL, NULL, mountpoint,
-                            proto_table[*curr_proto].p_name, sourcetype,
-                            shareopts, sourcestr, zhp->zfs_name) != SA_OK) {
+                        if ((_sa_get_zfs_share == NULL) ||
+                            (_sa_get_zfs_share(hdl->libzfs_sharehdl, "zfs", zhp)
+                            != SA_OK)) {
                                 (void) zfs_error_fmt(hdl,
                                     proto_table[*curr_proto].p_share_err,
                                     dgettext(TEXT_DOMAIN, "cannot share '%s'"),
                                     zfs_get_name(zhp));
                                 return (-1);

@@ -905,26 +915,34 @@
     zfs_share_proto_t proto)
 {
         sa_share_t share;
         int err;
         char *mntpt;
+        int service = SA_INIT_ONE_SHARE_FROM_NAME;
 
         /*
          * Mountpoint could get trashed if libshare calls getmntany
          * which it does during API initialization, so strdup the
          * value.
          */
         mntpt = zfs_strdup(hdl, mountpoint);
 
         /*
-         * make sure libshare initialized, initialize everything because we
-         * don't know what other unsharing may happen later. Functions up the
-         * stack are allowed to initialize instead a subset of shares at the
-         * time the set is known.
+         * Function may be called in a loop from higher up stack, with libshare
+         * initialized for multiple shares (SA_INIT_SHARE_API_SELECTIVE).
+         * zfs_init_libshare_arg will refresh the handle's cache if necessary.
+         * In this case we do not want to switch to per share initialization.
+         * Specify SA_INIT_SHARE_API to do full refresh, if refresh required.
          */
-        if ((err = zfs_init_libshare_arg(hdl, SA_INIT_ONE_SHARE_FROM_NAME,
-            (void *)name)) != SA_OK) {
+        if ((hdl->libzfs_sharehdl != NULL) && (_sa_service != NULL) &&
+            (_sa_service(hdl->libzfs_sharehdl) ==
+            SA_INIT_SHARE_API_SELECTIVE)) {
+                service = SA_INIT_SHARE_API;
+        }
+
+        err = zfs_init_libshare_arg(hdl, service, (void *)name);
+        if (err != SA_OK) {
                 free(mntpt);    /* don't need the copy anymore */
                 return (zfs_error_fmt(hdl, proto_table[proto].p_unshare_err,
                     dgettext(TEXT_DOMAIN, "cannot unshare '%s': %s"),
                     name, _sa_errorstr(err)));
         }

@@ -1153,27 +1171,491 @@
                 return (1);
 
         return (strcmp(zfs_get_name(a), zfs_get_name(b)));
 }
 
-/*
- * Mount and share all datasets within the given pool.  This assumes that no
- * datasets within the pool are currently mounted.  Because users can create
- * complicated nested hierarchies of mountpoints, we first gather all the
- * datasets and mountpoints within the pool, and sort them by mountpoint.  Once
- * we have the list of all filesystems, we iterate over them in order and mount
- * and/or share each one.
+static int
+mountpoint_compare(const void *a, const void *b)
+{
+        const char *mounta = *((char **)a);
+        const char *mountb = *((char **)b);
+
+        return (strcmp(mountb, mounta));
+}
+
+typedef enum {
+        TASK_TO_PROCESS,
+        TASK_IN_PROCESSING,
+        TASK_DONE,
+        TASK_MAX
+} task_state_t;
+
+typedef struct mount_task {
+        const char      *mp;
+        zfs_handle_t    *zh;
+        task_state_t    state;
+        int             error;
+} mount_task_t;
+
+typedef struct mount_task_q {
+        pthread_mutex_t q_lock;
+        libzfs_handle_t *hdl;
+        const char      *mntopts;
+        const char      *error_mp;
+        zfs_handle_t    *error_zh;
+        int             error;
+        int             q_length;
+        int             n_tasks;
+        int             flags;
+        mount_task_t    task[1];
+} mount_task_q_t;
+
+static int
+mount_task_q_init(int argc, zfs_handle_t **handles, const char *mntopts,
+    int flags, mount_task_q_t **task)
+{
+        mount_task_q_t *task_q;
+        int i, error;
+        size_t task_q_size;
+
+        *task = NULL;
+        /* nothing to do ? should not be here */
+        if (argc <= 0)
+                return (EINVAL);
+
+        /* allocate and init task_q */
+        task_q_size = sizeof (mount_task_q_t) +
+            (argc - 1) * sizeof (mount_task_t);
+        task_q = calloc(task_q_size, 1);
+        if (task_q == NULL)
+                return (ENOMEM);
+
+        if ((error = pthread_mutex_init(&task_q->q_lock, NULL)) != 0) {
+                free(task_q);
+                return (error);
+        }
+        task_q->q_length = argc;
+        task_q->n_tasks = argc;
+        task_q->flags = flags;
+        task_q->mntopts = mntopts;
+
+        /* we are not going to change the strings, so no need to strdup */
+        for (i = 0; i < argc; ++i) {
+                task_q->task[i].zh = handles[i];
+                task_q->task[i].state = TASK_TO_PROCESS;
+                task_q->error = 0;
+        }
+
+        *task = task_q;
+        return (0);
+}
+
+static int
+umount_task_q_init(int argc, const char **argv, int flags,
+    libzfs_handle_t *hdl, mount_task_q_t **task)
+{
+        mount_task_q_t *task_q;
+        int i, error;
+        size_t task_q_size;
+
+        *task = NULL;
+        /* nothing to do ? should not be here */
+        if (argc <= 0)
+                return (EINVAL);
+
+        /* allocate and init task_q */
+        task_q_size = sizeof (mount_task_q_t) +
+            (argc - 1) * sizeof (mount_task_t);
+        task_q = calloc(task_q_size, 1);
+        if (task_q == NULL)
+                return (ENOMEM);
+
+        if ((error = pthread_mutex_init(&task_q->q_lock, NULL)) != 0) {
+                free(task_q);
+                return (error);
+        }
+        task_q->hdl = hdl;
+        task_q->q_length = argc;
+        task_q->n_tasks = argc;
+        task_q->flags = flags;
+
+        /* we are not going to change the strings, so no need to strdup */
+        for (i = 0; i < argc; ++i) {
+                task_q->task[i].mp = argv[i];
+                task_q->task[i].state = TASK_TO_PROCESS;
+                task_q->error = 0;
+        }
+
+        *task = task_q;
+        return (0);
+}
+
+static void
+mount_task_q_fini(mount_task_q_t *task_q)
+{
+        assert(task_q != NULL);
+        (void) pthread_mutex_destroy(&task_q->q_lock);
+        free(task_q);
+}
+
+static int
+is_child_of(const char *s1, const char *s2)
+{
+        for (; *s1 && *s2 && (*s1 == *s2); ++s1, ++s2)
+                ;
+        return (!*s2 && (*s1 == '/'));
+}
+
+static boolean_t
+task_completed(int ind, mount_task_q_t *task_q)
+{
+        return (task_q->task[ind].state == TASK_DONE);
+}
+
+static boolean_t
+task_to_process(int ind, mount_task_q_t *task_q)
+{
+        return (task_q->task[ind].state == TASK_TO_PROCESS);
+}
+
+static boolean_t
+task_in_processing(int ind, mount_task_q_t *task_q)
+{
+        return (task_q->task[ind].state == TASK_IN_PROCESSING);
+}
+
+static void
+task_next_stage(int ind, mount_task_q_t *task_q)
+{
+        /* our state machine is a pipeline */
+        task_q->task[ind].state++;
+        assert(task_q->task[ind].state < TASK_MAX);
+}
+
+static boolean_t
+task_state_valid(int ind, mount_task_q_t *task_q)
+{
+        /* our state machine is a pipeline */
+        return (task_q->task[ind].state < TASK_MAX);
+}
+
+static boolean_t
+child_umount_pending(int ind, mount_task_q_t *task_q)
+{
+        int i;
+        for (i = ind-1; i >= 0; --i) {
+                assert(task_state_valid(i, task_q));
+                if ((task_q->task[i].state != TASK_DONE) &&
+                    is_child_of(task_q->task[i].mp, task_q->task[ind].mp))
+                        return (B_TRUE);
+        }
+
+        return (B_FALSE);
+}
+
+static boolean_t
+parent_mount_pending(int ind, mount_task_q_t *task_q)
+{
+        int i;
+        for (i = ind-1; i >= 0; --i) {
+                assert(task_state_valid(i, task_q));
+                if ((task_q->task[i].state != TASK_DONE) &&
+                    is_child_of(task_q->task[ind].zh->zfs_name,
+                    task_q->task[i].zh->zfs_name))
+                        return (B_TRUE);
+        }
+
+        return (B_FALSE);
+}
+
+static void
+unmounter(void *arg)
+{
+        mount_task_q_t *task_q = (mount_task_q_t *)arg;
+        int error = 0, done = 0;
+
+        assert(task_q != NULL);
+        if (task_q == NULL)
+                return;
+
+        while (!error && !done) {
+                mount_task_t *task;
+                int i, t, umount_err, flags, q_error;
+
+                if ((error = pthread_mutex_lock(&task_q->q_lock)) != 0)
+                        break; /* Out of while() loop */
+
+                if (task_q->error || task_q->n_tasks == 0) {
+                        (void) pthread_mutex_unlock(&task_q->q_lock);
+                        break; /* Out of while() loop */
+                }
+
+                /* Find task ready for processing */
+                for (i = 0, task = NULL, t = -1; i < task_q->q_length; ++i) {
+                        if (task_q->error) {
+                                /* Fatal error, stop processing */
+                                done = 1;
+                                break; /* Out of for() loop */
+                        }
+
+                        if (task_completed(i, task_q))
+                                continue; /* for() loop */
+
+                        if (task_to_process(i, task_q)) {
+                                /*
+                                 * Cannot umount if some children are still
+                                 * mounted; come back later
  */
-#pragma weak zpool_mount_datasets = zpool_enable_datasets
+                                if ((child_umount_pending(i, task_q)))
+                                        continue; /* for() loop */
+                                /* Should be OK to unmount now */
+                                task_next_stage(i, task_q);
+                                task = &task_q->task[i];
+                                t = i;
+                                break; /* Out of for() loop */
+                        }
+
+                        /* Otherwise, the task is already in processing */
+                        assert(task_in_processing(i, task_q));
+                }
+
+                flags = task_q->flags;
+
+                error = pthread_mutex_unlock(&task_q->q_lock);
+
+                if (done || (task == NULL) || error || task_q->error)
+                        break; /* Out of while() loop */
+
+                umount_err = umount2(task->mp, flags);
+                q_error = errno;
+
+                if ((error = pthread_mutex_lock(&task_q->q_lock)) != 0)
+                        break; /* Out of while() loop */
+
+                /* done processing */
+                assert(t >= 0 && t < task_q->q_length);
+                task_next_stage(t, task_q);
+                assert(task_completed(t, task_q));
+                task_q->n_tasks--;
+
+                if (umount_err) {
+                        /*
+                         * umount2() failed, cannot be busy because of mounted
+                         * children - we have checked above, so it is fatal
+                         */
+                        assert(child_umount_pending(t, task_q) == B_FALSE);
+                        task->error = q_error;
+                        if (!task_q->error) {
+                                task_q->error = task->error;
+                                task_q->error_mp = task->mp;
+                        }
+                        done = 1;
+                }
+
+                if ((error = pthread_mutex_unlock(&task_q->q_lock)) != 0)
+                        break; /* Out of while() loop */
+        }
+}
+
+static void
+mounter(void *arg)
+{
+        mount_task_q_t *task_q = (mount_task_q_t *)arg;
+        int error = 0, done = 0;
+
+        assert(task_q != NULL);
+        if (task_q == NULL)
+                return;
+
+        while (!error && !done) {
+                mount_task_t *task;
+                int i, t, mount_err, flags, q_error;
+                const char *mntopts;
+
+                if ((error = pthread_mutex_lock(&task_q->q_lock)) != 0)
+                        break; /* Out of while() loop */
+
+                if (task_q->error || task_q->n_tasks == 0) {
+                        (void) pthread_mutex_unlock(&task_q->q_lock);
+                        break; /* Out of while() loop */
+                }
+
+                /* Find task ready for processing */
+                for (i = 0, task = NULL, t = -1; i < task_q->q_length; ++i) {
+                        if (task_q->error) {
+                                /* Fatal error, stop processing */
+                                done = 1;
+                                break; /* Out of for() loop */
+                        }
+
+                        if (task_completed(i, task_q))
+                                continue; /* for() loop */
+
+                        if (task_to_process(i, task_q)) {
+                                /*
+                                 * Cannot mount if some parents are not
+                                 * mounted yet; come back later
+                                 */
+                                if ((parent_mount_pending(i, task_q)))
+                                        continue; /* for() loop */
+                                /* Should be OK to mount now */
+                                task_next_stage(i, task_q);
+                                task = &task_q->task[i];
+                                t = i;
+                                break; /* Out of for() loop */
+                        }
+
+                        /* Otherwise, the task is already in processing */
+                        assert(task_in_processing(i, task_q));
+                }
+
+                flags = task_q->flags;
+                mntopts = task_q->mntopts;
+
+                error = pthread_mutex_unlock(&task_q->q_lock);
+
+                if (done || (task == NULL) || error || task_q->error)
+                        break; /* Out of while() loop */
+
+                mount_err = zfs_mount(task->zh, mntopts, flags);
+                q_error = errno;
+
+                if ((error = pthread_mutex_lock(&task_q->q_lock)) != 0)
+                        break; /* Out of while() loop */
+
+                /* done processing */
+                assert(t >= 0 && t < task_q->q_length);
+                task_next_stage(t, task_q);
+                assert(task_completed(t, task_q));
+                task_q->n_tasks--;
+
+                if (mount_err) {
+                        task->error = q_error;
+                        if (!task_q->error) {
+                                task_q->error = task->error;
+                                task_q->error_zh = task->zh;
+                        }
+                        done = 1;
+                }
+
+                if ((error = pthread_mutex_unlock(&task_q->q_lock)) != 0)
+                        break; /* Out of while() loop */
+        }
+}
+
+#define THREADS_HARD_LIMIT      128
+int parallel_unmount(libzfs_handle_t *hdl, int argc, const char **argv,
+    int flags, int n_threads)
+{
+        mount_task_q_t *task_queue = NULL;
+        int             i, error;
+        tpool_t         *t;
+
+        if (argc == 0)
+                return (0);
+
+        if ((error = umount_task_q_init(argc, argv, flags, hdl, &task_queue))
+            != 0) {
+                assert(task_queue == NULL);
+                return (error);
+        }
+
+        if (n_threads > argc)
+                n_threads = argc;
+
+        if (n_threads > THREADS_HARD_LIMIT)
+                n_threads = THREADS_HARD_LIMIT;
+
+        t = tpool_create(1, n_threads, 0, NULL);
+
+        for (i = 0; i < n_threads; ++i)
+                (void) tpool_dispatch(t, unmounter, task_queue);
+
+        tpool_wait(t);
+        tpool_destroy(t);
+
+        if (task_queue->error) {
+                /*
+                 * Tell ZFS!
+                 */
+                zfs_error_aux(hdl,
+                    strerror(error ? error : task_queue->error));
+                error = zfs_error_fmt(hdl, EZFS_UMOUNTFAILED,
+                    dgettext(TEXT_DOMAIN, "cannot unmount '%s'"),
+                    error ? "datasets" : task_queue->error_mp);
+        }
+        if (task_queue)
+                mount_task_q_fini(task_queue);
+
+        return (error);
+}
+
+int parallel_mount(get_all_cb_t *cb, int *good, const char *mntopts,
+    int flags, int n_threads)
+{
+        int             i, error = 0;
+        mount_task_q_t  *task_queue = NULL;
+        tpool_t         *t;
+
+        if (cb->cb_used == 0)
+                return (0);
+
+        if (n_threads > cb->cb_used)
+                n_threads = cb->cb_used;
+
+        if ((error = mount_task_q_init(cb->cb_used, cb->cb_handles,
+            mntopts, flags, &task_queue)) != 0) {
+                assert(task_queue == NULL);
+                return (error);
+        }
+
+        t = tpool_create(1, n_threads, 0, NULL);
+
+        for (i = 0; i < n_threads; ++i)
+                (void) tpool_dispatch(t, mounter, task_queue);
+
+        tpool_wait(t);
+        for (i = 0; i < cb->cb_used; ++i) {
+                good[i] = !task_queue->task[i].error;
+                if (!good[i]) {
+                        zfs_handle_t *hdl = task_queue->error_zh;
+                        zfs_error_aux(hdl->zfs_hdl,
+                            strerror(task_queue->task[i].error));
+                        (void) zfs_error_fmt(hdl->zfs_hdl, EZFS_MOUNTFAILED,
+                            dgettext(TEXT_DOMAIN, "cannot mount '%s'"),
+                            task_queue->task[i].zh->zfs_name);
+                }
+        }
+        tpool_destroy(t);
+
+        if (task_queue->error) {
+                zfs_handle_t *hdl = task_queue->error_zh;
+                /*
+                 * Tell ZFS!
+                 */
+                zfs_error_aux(hdl->zfs_hdl,
+                    strerror(error ? error : task_queue->error));
+                error = zfs_error_fmt(hdl->zfs_hdl, EZFS_MOUNTFAILED,
+                    dgettext(TEXT_DOMAIN, "cannot mount '%s'"),
+                    error ? "datasets" : hdl->zfs_name);
+        }
+        if (task_queue)
+                mount_task_q_fini(task_queue);
+
+        return (error);
+}
+
 int
-zpool_enable_datasets(zpool_handle_t *zhp, const char *mntopts, int flags)
+zpool_enable_datasets_ex(zpool_handle_t *zhp, const char *mntopts, int flags,
+    int n_threads)
 {
         get_all_cb_t cb = { 0 };
         libzfs_handle_t *hdl = zhp->zpool_hdl;
         zfs_handle_t *zfsp;
         int i, ret = -1;
         int *good;
+        sa_init_selective_arg_t sharearg;
 
         /*
          * Gather all non-snap datasets within the pool.
          */
         if ((zfsp = zfs_open(hdl, zhp->zpool_name, ZFS_TYPE_DATASET)) == NULL)

@@ -1195,18 +1677,36 @@
         if ((good = zfs_alloc(zhp->zpool_hdl,
             cb.cb_used * sizeof (int))) == NULL)
                 goto out;
 
         ret = 0;
+        if (n_threads < 2) {
         for (i = 0; i < cb.cb_used; i++) {
                 if (zfs_mount(cb.cb_handles[i], mntopts, flags) != 0)
                         ret = -1;
                 else
                         good[i] = 1;
         }
+        } else {
+                ret = parallel_mount(&cb, good, mntopts, flags, n_threads);
+        }
 
         /*
+         * Initilialize libshare SA_INIT_SHARE_API_SELECTIVE here
+         * to avoid unneccesary load/unload of the libshare API
+         * per shared dataset downstream.
+         */
+        sharearg.zhandle_arr = cb.cb_handles;
+        sharearg.zhandle_len = cb.cb_used;
+        ret = zfs_init_libshare_arg(hdl, SA_INIT_SHARE_API_SELECTIVE,
+            &sharearg);
+        if (ret != 0) {
+                free(good);
+                goto out;
+        }
+
+        /*
          * Then share all the ones that need to be shared. This needs
          * to be a separate pass in order to avoid excessive reloading
          * of the configuration. Good should never be NULL since
          * zfs_alloc is supposed to exit if memory isn't available.
          */

@@ -1223,30 +1723,12 @@
         free(cb.cb_handles);
 
         return (ret);
 }
 
-static int
-mountpoint_compare(const void *a, const void *b)
-{
-        const char *mounta = *((char **)a);
-        const char *mountb = *((char **)b);
-
-        return (strcmp(mountb, mounta));
-}
-
-/* alias for 2002/240 */
-#pragma weak zpool_unmount_datasets = zpool_disable_datasets
-/*
- * Unshare and unmount all datasets within the given pool.  We don't want to
- * rely on traversing the DSL to discover the filesystems within the pool,
- * because this may be expensive (if not all of them are mounted), and can fail
- * arbitrarily (on I/O error, for example).  Instead, we walk /etc/mnttab and
- * gather all the filesystems that are currently mounted.
- */
 int
-zpool_disable_datasets(zpool_handle_t *zhp, boolean_t force)
+zpool_disable_datasets_ex(zpool_handle_t *zhp, boolean_t force, int n_threads)
 {
         int used, alloc;
         struct mnttab entry;
         size_t namelen;
         char **mountpoints = NULL;

@@ -1344,30 +1826,34 @@
         for (i = 0; i < used; i++) {
                 zfs_share_proto_t *curr_proto;
                 for (curr_proto = share_all_proto; *curr_proto != PROTO_END;
                     curr_proto++) {
                         if (is_shared(hdl, mountpoints[i], *curr_proto) &&
-                            unshare_one(hdl, mountpoints[i],
-                            mountpoints[i], *curr_proto) != 0)
+                            unshare_one(hdl, mountpoints[i], mountpoints[i],
+                            *curr_proto) != 0)
                                 goto out;
                 }
         }
 
         /*
          * Now unmount everything, removing the underlying directories as
          * appropriate.
          */
+        if (n_threads < 2) {
         for (i = 0; i < used; i++) {
                 if (unmount_one(hdl, mountpoints[i], flags) != 0)
                         goto out;
         }
-
+        } else {
+                if (parallel_unmount(hdl, used, (const char **)mountpoints,
+                    flags, n_threads) != 0)
+                        goto out;
+        }
         for (i = 0; i < used; i++) {
                 if (datasets[i])
                         remove_mountpoint(datasets[i]);
         }
-
         ret = 0;
 out:
         for (i = 0; i < used; i++) {
                 if (datasets[i])
                         zfs_close(datasets[i]);

@@ -1375,6 +1861,36 @@
         }
         free(datasets);
         free(mountpoints);
 
         return (ret);
+}
+
+/*
+ * Mount and share all datasets within the given pool.  This assumes that no
+ * datasets within the pool are currently mounted.  Because users can create
+ * complicated nested hierarchies of mountpoints, we first gather all the
+ * datasets and mountpoints within the pool, and sort them by mountpoint.  Once
+ * we have the list of all filesystems, we iterate over them in order and mount
+ * and/or share each one.
+ */
+#pragma weak zpool_mount_datasets = zpool_enable_datasets
+int
+zpool_enable_datasets(zpool_handle_t *zhp, const char *mntopts, int flags)
+{
+        return (zpool_enable_datasets_ex(zhp, mntopts, flags, 1));
+}
+
+/* alias for 2002/240 */
+#pragma weak zpool_unmount_datasets = zpool_disable_datasets
+/*
+ * Unshare and unmount all datasets within the given pool.  We don't want to
+ * rely on traversing the DSL to discover the filesystems within the pool,
+ * because this may be expensive (if not all of them are mounted), and can fail
+ * arbitrarily (on I/O error, for example).  Instead, we walk /etc/mnttab and
+ * gather all the filesystems that are currently mounted.
+ */
+int
+zpool_disable_datasets(zpool_handle_t *zhp, boolean_t force)
+{
+        return (zpool_disable_datasets_ex(zhp, force, 1));
 }