1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright 2018 Nexenta Systems, Inc.  All rights reserved.
  24  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  25  */
  26 
  27 /*
  28  * Copyright 2019 Nexenta Systems, Inc.
  29  * Copyright (c) 2014, 2016 by Delphix. All rights reserved.
  30  * Copyright 2016 Igor Kozhukhov <ikozhukhov@gmail.com>
  31  * Copyright 2017 Joyent, Inc.
  32  * Copyright 2017 RackTop Systems.
  33  */
  34 
  35 /*
  36  * Routines to manage ZFS mounts.  We separate all the nasty routines that have
  37  * to deal with the OS.  The following functions are the main entry points --
  38  * they are used by mount and unmount and when changing a filesystem's
  39  * mountpoint.
  40  *
  41  *      zfs_is_mounted()
  42  *      zfs_mount()
  43  *      zfs_unmount()
  44  *      zfs_unmountall()
  45  *
  46  * This file also contains the functions used to manage sharing filesystems via
  47  * NFS and iSCSI:
  48  *
  49  *      zfs_is_shared()
  50  *      zfs_share()
  51  *      zfs_unshare()
  52  *
  53  *      zfs_is_shared_nfs()
  54  *      zfs_is_shared_smb()
  55  *      zfs_share_proto()
  56  *      zfs_shareall();
  57  *      zfs_unshare_nfs()
  58  *      zfs_unshare_smb()
  59  *      zfs_unshareall_nfs()
  60  *      zfs_unshareall_smb()
  61  *      zfs_unshareall()
  62  *      zfs_unshareall_bypath()
  63  *
  64  * The following functions are available for pool consumers, and will
  65  * mount/unmount and share/unshare all datasets within pool:
  66  *
  67  *      zpool_enable_datasets()
  68  *      zpool_enable_datasets_ex()
  69  *      zpool_disable_datasets()
  70  *      zpool_disable_datasets_ex()
  71  */
  72 
  73 #include <dirent.h>
  74 #include <dlfcn.h>
  75 #include <errno.h>
  76 #include <fcntl.h>
  77 #include <libgen.h>
  78 #include <libintl.h>
  79 #include <stdio.h>
  80 #include <stdlib.h>
  81 #include <strings.h>
  82 #include <unistd.h>
  83 #include <zone.h>
  84 #include <sys/mntent.h>
  85 #include <sys/mount.h>
  86 #include <sys/stat.h>
  87 #include <thread_pool.h>
  88 #include <sys/statvfs.h>
  89 
  90 #include <libzfs.h>
  91 
  92 #include "libzfs_impl.h"
  93 
  94 #include <libshare.h>
  95 #include <sys/systeminfo.h>
  96 #define MAXISALEN       257     /* based on sysinfo(2) man page */
  97 
  98 static int zfs_share_proto(zfs_handle_t *, zfs_share_proto_t *);
  99 zfs_share_type_t zfs_is_shared_proto(zfs_handle_t *, char **,
 100     zfs_share_proto_t);
 101 
 102 /*
 103  * The share protocols table must be in the same order as the zfs_share_proto_t
 104  * enum in libzfs_impl.h
 105  */
 106 typedef struct {
 107         zfs_prop_t p_prop;
 108         char *p_name;
 109         int p_share_err;
 110         int p_unshare_err;
 111 } proto_table_t;
 112 
 113 proto_table_t proto_table[PROTO_END] = {
 114         {ZFS_PROP_SHARENFS, "nfs", EZFS_SHARENFSFAILED, EZFS_UNSHARENFSFAILED},
 115         {ZFS_PROP_SHARESMB, "smb", EZFS_SHARESMBFAILED, EZFS_UNSHARESMBFAILED},
 116 };
 117 
 118 zfs_share_proto_t nfs_only[] = {
 119         PROTO_NFS,
 120         PROTO_END
 121 };
 122 
 123 zfs_share_proto_t smb_only[] = {
 124         PROTO_SMB,
 125         PROTO_END
 126 };
 127 zfs_share_proto_t share_all_proto[] = {
 128         PROTO_NFS,
 129         PROTO_SMB,
 130         PROTO_END
 131 };
 132 
 133 /*
 134  * Search the sharetab for the given mountpoint and protocol, returning
 135  * a zfs_share_type_t value.
 136  */
 137 static zfs_share_type_t
 138 is_shared(libzfs_handle_t *hdl, const char *mountpoint, zfs_share_proto_t proto)
 139 {
 140         char buf[MAXPATHLEN], *tab;
 141         char *ptr;
 142 
 143         if (hdl->libzfs_sharetab == NULL)
 144                 return (SHARED_NOT_SHARED);
 145 
 146         (void) fseek(hdl->libzfs_sharetab, 0, SEEK_SET);
 147 
 148         while (fgets(buf, sizeof (buf), hdl->libzfs_sharetab) != NULL) {
 149 
 150                 /* the mountpoint is the first entry on each line */
 151                 if ((tab = strchr(buf, '\t')) == NULL)
 152                         continue;
 153 
 154                 *tab = '\0';
 155                 if (strcmp(buf, mountpoint) == 0) {
 156                         /*
 157                          * the protocol field is the third field
 158                          * skip over second field
 159                          */
 160                         ptr = ++tab;
 161                         if ((tab = strchr(ptr, '\t')) == NULL)
 162                                 continue;
 163                         ptr = ++tab;
 164                         if ((tab = strchr(ptr, '\t')) == NULL)
 165                                 continue;
 166                         *tab = '\0';
 167                         if (strcmp(ptr,
 168                             proto_table[proto].p_name) == 0) {
 169                                 switch (proto) {
 170                                 case PROTO_NFS:
 171                                         return (SHARED_NFS);
 172                                 case PROTO_SMB:
 173                                         return (SHARED_SMB);
 174                                 default:
 175                                         return (0);
 176                                 }
 177                         }
 178                 }
 179         }
 180 
 181         return (SHARED_NOT_SHARED);
 182 }
 183 
 184 static boolean_t
 185 dir_is_empty_stat(const char *dirname)
 186 {
 187         struct stat st;
 188 
 189         /*
 190          * We only want to return false if the given path is a non empty
 191          * directory, all other errors are handled elsewhere.
 192          */
 193         if (stat(dirname, &st) < 0 || !S_ISDIR(st.st_mode)) {
 194                 return (B_TRUE);
 195         }
 196 
 197         /*
 198          * An empty directory will still have two entries in it, one
 199          * entry for each of "." and "..".
 200          */
 201         if (st.st_size > 2) {
 202                 return (B_FALSE);
 203         }
 204 
 205         return (B_TRUE);
 206 }
 207 
 208 static boolean_t
 209 dir_is_empty_readdir(const char *dirname)
 210 {
 211         DIR *dirp;
 212         struct dirent64 *dp;
 213         int dirfd;
 214 
 215         if ((dirfd = openat(AT_FDCWD, dirname,
 216             O_RDONLY | O_NDELAY | O_LARGEFILE | O_CLOEXEC, 0)) < 0) {
 217                 return (B_TRUE);
 218         }
 219 
 220         if ((dirp = fdopendir(dirfd)) == NULL) {
 221                 (void) close(dirfd);
 222                 return (B_TRUE);
 223         }
 224 
 225         while ((dp = readdir64(dirp)) != NULL) {
 226 
 227                 if (strcmp(dp->d_name, ".") == 0 ||
 228                     strcmp(dp->d_name, "..") == 0)
 229                         continue;
 230 
 231                 (void) closedir(dirp);
 232                 return (B_FALSE);
 233         }
 234 
 235         (void) closedir(dirp);
 236         return (B_TRUE);
 237 }
 238 
 239 /*
 240  * Returns true if the specified directory is empty.  If we can't open the
 241  * directory at all, return true so that the mount can fail with a more
 242  * informative error message.
 243  */
 244 static boolean_t
 245 dir_is_empty(const char *dirname)
 246 {
 247         struct statvfs64 st;
 248 
 249         /*
 250          * If the statvfs call fails or the filesystem is not a ZFS
 251          * filesystem, fall back to the slow path which uses readdir.
 252          */
 253         if ((statvfs64(dirname, &st) != 0) ||
 254             (strcmp(st.f_basetype, "zfs") != 0)) {
 255                 return (dir_is_empty_readdir(dirname));
 256         }
 257 
 258         /*
 259          * At this point, we know the provided path is on a ZFS
 260          * filesystem, so we can use stat instead of readdir to
 261          * determine if the directory is empty or not. We try to avoid
 262          * using readdir because that requires opening "dirname"; this
 263          * open file descriptor can potentially end up in a child
 264          * process if there's a concurrent fork, thus preventing the
 265          * zfs_mount() from otherwise succeeding (the open file
 266          * descriptor inherited by the child process will cause the
 267          * parent's mount to fail with EBUSY). The performance
 268          * implications of replacing the open, read, and close with a
 269          * single stat is nice; but is not the main motivation for the
 270          * added complexity.
 271          */
 272         return (dir_is_empty_stat(dirname));
 273 }
 274 
 275 /*
 276  * Checks to see if the mount is active.  If the filesystem is mounted, we fill
 277  * in 'where' with the current mountpoint, and return 1.  Otherwise, we return
 278  * 0.
 279  */
 280 boolean_t
 281 is_mounted(libzfs_handle_t *zfs_hdl, const char *special, char **where)
 282 {
 283         struct mnttab entry;
 284 
 285         if (libzfs_mnttab_find(zfs_hdl, special, &entry) != 0)
 286                 return (B_FALSE);
 287 
 288         if (where != NULL)
 289                 *where = zfs_strdup(zfs_hdl, entry.mnt_mountp);
 290 
 291         return (B_TRUE);
 292 }
 293 
 294 boolean_t
 295 zfs_is_mounted(zfs_handle_t *zhp, char **where)
 296 {
 297         return (is_mounted(zhp->zfs_hdl, zfs_get_name(zhp), where));
 298 }
 299 
 300 /*
 301  * Returns true if the given dataset is mountable, false otherwise.  Returns the
 302  * mountpoint in 'buf'.
 303  */
 304 static boolean_t
 305 zfs_is_mountable(zfs_handle_t *zhp, char *buf, size_t buflen,
 306     zprop_source_t *source)
 307 {
 308         char sourceloc[MAXNAMELEN];
 309         zprop_source_t sourcetype;
 310 
 311         if (!zfs_prop_valid_for_type(ZFS_PROP_MOUNTPOINT, zhp->zfs_type))
 312                 return (B_FALSE);
 313 
 314         verify(zfs_prop_get(zhp, ZFS_PROP_MOUNTPOINT, buf, buflen,
 315             &sourcetype, sourceloc, sizeof (sourceloc), B_FALSE) == 0);
 316 
 317         if (strcmp(buf, ZFS_MOUNTPOINT_NONE) == 0 ||
 318             strcmp(buf, ZFS_MOUNTPOINT_LEGACY) == 0)
 319                 return (B_FALSE);
 320 
 321         if (zfs_prop_get_int(zhp, ZFS_PROP_CANMOUNT) == ZFS_CANMOUNT_OFF)
 322                 return (B_FALSE);
 323 
 324         if (zfs_prop_get_int(zhp, ZFS_PROP_ZONED) &&
 325             getzoneid() == GLOBAL_ZONEID)
 326                 return (B_FALSE);
 327 
 328         if (source)
 329                 *source = sourcetype;
 330 
 331         return (B_TRUE);
 332 }
 333 
 334 /*
 335  * Mount the given filesystem.
 336  */
 337 int
 338 zfs_mount(zfs_handle_t *zhp, const char *options, int flags)
 339 {
 340         struct stat buf;
 341         char mountpoint[ZFS_MAXPROPLEN];
 342         char mntopts[MNT_LINE_MAX];
 343         libzfs_handle_t *hdl = zhp->zfs_hdl;
 344 
 345         if (options == NULL)
 346                 mntopts[0] = '\0';
 347         else
 348                 (void) strlcpy(mntopts, options, sizeof (mntopts));
 349 
 350         /*
 351          * If the pool is imported read-only then all mounts must be read-only
 352          */
 353         if (zpool_get_prop_int(zhp->zpool_hdl, ZPOOL_PROP_READONLY, NULL))
 354                 flags |= MS_RDONLY;
 355 
 356         if (!zfs_is_mountable(zhp, mountpoint, sizeof (mountpoint), NULL))
 357                 return (0);
 358 
 359         /* Create the directory if it doesn't already exist */
 360         if (lstat(mountpoint, &buf) != 0) {
 361                 if (mkdirp(mountpoint, 0755) != 0) {
 362                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 363                             "failed to create mountpoint"));
 364                         return (zfs_error_fmt(hdl, EZFS_MOUNTFAILED,
 365                             dgettext(TEXT_DOMAIN, "cannot mount '%s'"),
 366                             mountpoint));
 367                 }
 368         }
 369 
 370         /*
 371          * Determine if the mountpoint is empty.  If so, refuse to perform the
 372          * mount.  We don't perform this check if MS_OVERLAY is specified, which
 373          * would defeat the point.  We also avoid this check if 'remount' is
 374          * specified.
 375          */
 376         if ((flags & MS_OVERLAY) == 0 &&
 377             strstr(mntopts, MNTOPT_REMOUNT) == NULL &&
 378             !dir_is_empty(mountpoint)) {
 379                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 380                     "directory is not empty"));
 381                 return (zfs_error_fmt(hdl, EZFS_MOUNTFAILED,
 382                     dgettext(TEXT_DOMAIN, "cannot mount '%s'"), mountpoint));
 383         }
 384 
 385         /* perform the mount */
 386         if (mount(zfs_get_name(zhp), mountpoint, MS_OPTIONSTR | flags,
 387             MNTTYPE_ZFS, NULL, 0, mntopts, sizeof (mntopts)) != 0) {
 388                 /*
 389                  * Generic errors are nasty, but there are just way too many
 390                  * from mount(), and they're well-understood.  We pick a few
 391                  * common ones to improve upon.
 392                  */
 393                 if (errno == EBUSY) {
 394                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 395                             "mountpoint or dataset is busy"));
 396                 } else if (errno == EPERM) {
 397                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 398                             "Insufficient privileges"));
 399                 } else if (errno == ENOTSUP) {
 400                         char buf[256];
 401                         int spa_version;
 402 
 403                         VERIFY(zfs_spa_version(zhp, &spa_version) == 0);
 404                         (void) snprintf(buf, sizeof (buf),
 405                             dgettext(TEXT_DOMAIN, "Can't mount a version %lld "
 406                             "file system on a version %d pool. Pool must be"
 407                             " upgraded to mount this file system."),
 408                             (u_longlong_t)zfs_prop_get_int(zhp,
 409                             ZFS_PROP_VERSION), spa_version);
 410                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, buf));
 411                 } else {
 412                         zfs_error_aux(hdl, strerror(errno));
 413                 }
 414                 return (zfs_error_fmt(hdl, EZFS_MOUNTFAILED,
 415                     dgettext(TEXT_DOMAIN, "cannot mount '%s'"),
 416                     zhp->zfs_name));
 417         }
 418 
 419         /* add the mounted entry into our cache */
 420         libzfs_mnttab_add(hdl, zfs_get_name(zhp), mountpoint,
 421             mntopts);
 422         return (0);
 423 }
 424 
 425 /*
 426  * Unmount a single filesystem.
 427  */
 428 static int
 429 unmount_one(libzfs_handle_t *hdl, const char *mountpoint, int flags)
 430 {
 431         int ret = umount2(mountpoint, flags);
 432         if (ret != 0) {
 433                 zfs_error_aux(hdl, strerror(errno));
 434                 return (zfs_error_fmt(hdl, EZFS_UMOUNTFAILED,
 435                     dgettext(TEXT_DOMAIN, "cannot unmount '%s'"),
 436                     mountpoint));
 437         }
 438 
 439         return (0);
 440 }
 441 
 442 /*
 443  * Unmount the given filesystem.
 444  */
 445 int
 446 zfs_unmount(zfs_handle_t *zhp, const char *mountpoint, int flags)
 447 {
 448         libzfs_handle_t *hdl = zhp->zfs_hdl;
 449         struct mnttab entry;
 450         char *mntpt = NULL;
 451 
 452         /* check to see if we need to unmount the filesystem */
 453         if (mountpoint != NULL || ((zfs_get_type(zhp) == ZFS_TYPE_FILESYSTEM) &&
 454             libzfs_mnttab_find(hdl, zhp->zfs_name, &entry) == 0)) {
 455                 /*
 456                  * mountpoint may have come from a call to
 457                  * getmnt/getmntany if it isn't NULL. If it is NULL,
 458                  * we know it comes from libzfs_mnttab_find which can
 459                  * then get freed later. We strdup it to play it safe.
 460                  */
 461                 if (mountpoint == NULL)
 462                         mntpt = zfs_strdup(hdl, entry.mnt_mountp);
 463                 else
 464                         mntpt = zfs_strdup(hdl, mountpoint);
 465 
 466                 /*
 467                  * Unshare and unmount the filesystem
 468                  */
 469                 if (zfs_unshare_proto(zhp, mntpt, share_all_proto) != 0)
 470                         return (-1);
 471 
 472                 if (unmount_one(hdl, mntpt, flags) != 0) {
 473                         free(mntpt);
 474                         (void) zfs_shareall(zhp);
 475                         return (-1);
 476                 }
 477                 libzfs_mnttab_remove(hdl, zhp->zfs_name);
 478                 free(mntpt);
 479         }
 480 
 481         return (0);
 482 }
 483 
 484 /*
 485  * Unmount this filesystem and any children inheriting the mountpoint property.
 486  * To do this, just act like we're changing the mountpoint property, but don't
 487  * remount the filesystems afterwards.
 488  */
 489 int
 490 zfs_unmountall(zfs_handle_t *zhp, int flags)
 491 {
 492         prop_changelist_t *clp;
 493         int ret;
 494 
 495         clp = changelist_gather(zhp, ZFS_PROP_MOUNTPOINT, 0, flags);
 496         if (clp == NULL)
 497                 return (-1);
 498 
 499         ret = changelist_prefix(clp);
 500         changelist_free(clp);
 501 
 502         return (ret);
 503 }
 504 
 505 boolean_t
 506 zfs_is_shared(zfs_handle_t *zhp)
 507 {
 508         zfs_share_type_t rc = 0;
 509         zfs_share_proto_t *curr_proto;
 510 
 511         if (ZFS_IS_VOLUME(zhp))
 512                 return (B_FALSE);
 513 
 514         for (curr_proto = share_all_proto; *curr_proto != PROTO_END;
 515             curr_proto++)
 516                 rc |= zfs_is_shared_proto(zhp, NULL, *curr_proto);
 517 
 518         return (rc ? B_TRUE : B_FALSE);
 519 }
 520 
 521 int
 522 zfs_share(zfs_handle_t *zhp)
 523 {
 524         assert(!ZFS_IS_VOLUME(zhp));
 525         return (zfs_share_proto(zhp, share_all_proto));
 526 }
 527 
 528 int
 529 zfs_unshare(zfs_handle_t *zhp)
 530 {
 531         assert(!ZFS_IS_VOLUME(zhp));
 532         return (zfs_unshareall(zhp));
 533 }
 534 
 535 /*
 536  * Check to see if the filesystem is currently shared.
 537  */
 538 zfs_share_type_t
 539 zfs_is_shared_proto(zfs_handle_t *zhp, char **where, zfs_share_proto_t proto)
 540 {
 541         char *mountpoint;
 542         zfs_share_type_t rc;
 543 
 544         if (!zfs_is_mounted(zhp, &mountpoint))
 545                 return (SHARED_NOT_SHARED);
 546 
 547         if ((rc = is_shared(zhp->zfs_hdl, mountpoint, proto))
 548             != SHARED_NOT_SHARED) {
 549                 if (where != NULL)
 550                         *where = mountpoint;
 551                 else
 552                         free(mountpoint);
 553                 return (rc);
 554         } else {
 555                 free(mountpoint);
 556                 return (SHARED_NOT_SHARED);
 557         }
 558 }
 559 
 560 boolean_t
 561 zfs_is_shared_nfs(zfs_handle_t *zhp, char **where)
 562 {
 563         return (zfs_is_shared_proto(zhp, where,
 564             PROTO_NFS) != SHARED_NOT_SHARED);
 565 }
 566 
 567 boolean_t
 568 zfs_is_shared_smb(zfs_handle_t *zhp, char **where)
 569 {
 570         return (zfs_is_shared_proto(zhp, where,
 571             PROTO_SMB) != SHARED_NOT_SHARED);
 572 }
 573 
 574 /*
 575  * Make sure things will work if libshare isn't installed by using
 576  * wrapper functions that check to see that the pointers to functions
 577  * initialized in _zfs_init_libshare() are actually present.
 578  */
 579 
 580 static sa_handle_t (*_sa_init)(int);
 581 static sa_handle_t (*_sa_init_arg)(int, void *);
 582 static int (*_sa_service)(sa_handle_t);
 583 static void (*_sa_fini)(sa_handle_t);
 584 static sa_share_t (*_sa_find_share)(sa_handle_t, char *);
 585 static int (*_sa_enable_share)(sa_share_t, char *);
 586 static int (*_sa_disable_share)(sa_share_t, char *);
 587 static char *(*_sa_errorstr)(int);
 588 static int (*_sa_parse_legacy_options)(sa_group_t, char *, char *);
 589 static boolean_t (*_sa_needs_refresh)(sa_handle_t *);
 590 static libzfs_handle_t *(*_sa_get_zfs_handle)(sa_handle_t);
 591 static int (* _sa_get_zfs_share)(sa_handle_t, char *, zfs_handle_t *);
 592 static void (*_sa_update_sharetab_ts)(sa_handle_t);
 593 
 594 /*
 595  * _zfs_init_libshare()
 596  *
 597  * Find the libshare.so.1 entry points that we use here and save the
 598  * values to be used later. This is triggered by the runtime loader.
 599  * Make sure the correct ISA version is loaded.
 600  */
 601 
 602 #pragma init(_zfs_init_libshare)
 603 static void
 604 _zfs_init_libshare(void)
 605 {
 606         void *libshare;
 607         char path[MAXPATHLEN];
 608         char isa[MAXISALEN];
 609 
 610 #if defined(_LP64)
 611         if (sysinfo(SI_ARCHITECTURE_64, isa, MAXISALEN) == -1)
 612                 isa[0] = '\0';
 613 #else
 614         isa[0] = '\0';
 615 #endif
 616         (void) snprintf(path, MAXPATHLEN,
 617             "/usr/lib/%s/libshare.so.1", isa);
 618 
 619         if ((libshare = dlopen(path, RTLD_LAZY | RTLD_GLOBAL)) != NULL) {
 620                 _sa_init = (sa_handle_t (*)(int))dlsym(libshare, "sa_init");
 621                 _sa_init_arg = (sa_handle_t (*)(int, void *))dlsym(libshare,
 622                     "sa_init_arg");
 623                 _sa_fini = (void (*)(sa_handle_t))dlsym(libshare, "sa_fini");
 624                 _sa_service = (int (*)(sa_handle_t))dlsym(libshare,
 625                     "sa_service");
 626                 _sa_find_share = (sa_share_t (*)(sa_handle_t, char *))
 627                     dlsym(libshare, "sa_find_share");
 628                 _sa_enable_share = (int (*)(sa_share_t, char *))dlsym(libshare,
 629                     "sa_enable_share");
 630                 _sa_disable_share = (int (*)(sa_share_t, char *))dlsym(libshare,
 631                     "sa_disable_share");
 632                 _sa_errorstr = (char *(*)(int))dlsym(libshare, "sa_errorstr");
 633                 _sa_parse_legacy_options = (int (*)(sa_group_t, char *, char *))
 634                     dlsym(libshare, "sa_parse_legacy_options");
 635                 _sa_needs_refresh = (boolean_t (*)(sa_handle_t *))
 636                     dlsym(libshare, "sa_needs_refresh");
 637                 _sa_get_zfs_handle = (libzfs_handle_t *(*)(sa_handle_t))
 638                     dlsym(libshare, "sa_get_zfs_handle");
 639                 _sa_get_zfs_share = (int (*)(sa_handle_t, char *,
 640                     zfs_handle_t *)) dlsym(libshare, "sa_get_zfs_share");
 641                 _sa_update_sharetab_ts = (void (*)(sa_handle_t))
 642                     dlsym(libshare, "sa_update_sharetab_ts");
 643                 if (_sa_init == NULL || _sa_init_arg == NULL ||
 644                     _sa_fini == NULL || _sa_find_share == NULL ||
 645                     _sa_enable_share == NULL || _sa_disable_share == NULL ||
 646                     _sa_errorstr == NULL || _sa_parse_legacy_options == NULL ||
 647                     _sa_needs_refresh == NULL || _sa_get_zfs_handle == NULL ||
 648                     _sa_get_zfs_share == NULL || _sa_service == NULL ||
 649                     _sa_update_sharetab_ts == NULL) {
 650                         _sa_init = NULL;
 651                         _sa_init_arg = NULL;
 652                         _sa_service = NULL;
 653                         _sa_fini = NULL;
 654                         _sa_disable_share = NULL;
 655                         _sa_enable_share = NULL;
 656                         _sa_errorstr = NULL;
 657                         _sa_parse_legacy_options = NULL;
 658                         (void) dlclose(libshare);
 659                         _sa_needs_refresh = NULL;
 660                         _sa_get_zfs_handle = NULL;
 661                         _sa_get_zfs_share = NULL;
 662                         _sa_update_sharetab_ts = NULL;
 663                 }
 664         }
 665 }
 666 
 667 /*
 668  * zfs_init_libshare(zhandle, service)
 669  *
 670  * Initialize the libshare API if it hasn't already been initialized.
 671  * In all cases it returns 0 if it succeeded and an error if not. The
 672  * service value is which part(s) of the API to initialize and is a
 673  * direct map to the libshare sa_init(service) interface.
 674  */
 675 static int
 676 zfs_init_libshare_impl(libzfs_handle_t *zhandle, int service, void *arg)
 677 {
 678         /*
 679          * libshare is either not installed or we're in a branded zone. The
 680          * rest of the wrapper functions around the libshare calls already
 681          * handle NULL function pointers, but we don't want the callers of
 682          * zfs_init_libshare() to fail prematurely if libshare is not available.
 683          */
 684         if (_sa_init == NULL)
 685                 return (SA_OK);
 686 
 687         /*
 688          * Attempt to refresh libshare. This is necessary if there was a cache
 689          * miss for a new ZFS dataset that was just created, or if state of the
 690          * sharetab file has changed since libshare was last initialized. We
 691          * want to make sure so check timestamps to see if a different process
 692          * has updated any of the configuration. If there was some non-ZFS
 693          * change, we need to re-initialize the internal cache.
 694          */
 695         if (_sa_needs_refresh != NULL &&
 696             _sa_needs_refresh(zhandle->libzfs_sharehdl)) {
 697                 zfs_uninit_libshare(zhandle);
 698                 zhandle->libzfs_sharehdl = _sa_init_arg(service, arg);
 699         }
 700 
 701         if (zhandle && zhandle->libzfs_sharehdl == NULL)
 702                 zhandle->libzfs_sharehdl = _sa_init_arg(service, arg);
 703 
 704         if (zhandle->libzfs_sharehdl == NULL)
 705                 return (SA_NO_MEMORY);
 706 
 707         return (SA_OK);
 708 }
 709 int
 710 zfs_init_libshare(libzfs_handle_t *zhandle, int service)
 711 {
 712         return (zfs_init_libshare_impl(zhandle, service, NULL));
 713 }
 714 
 715 int
 716 zfs_init_libshare_arg(libzfs_handle_t *zhandle, int service, void *arg)
 717 {
 718         return (zfs_init_libshare_impl(zhandle, service, arg));
 719 }
 720 
 721 
 722 /*
 723  * zfs_uninit_libshare(zhandle)
 724  *
 725  * Uninitialize the libshare API if it hasn't already been
 726  * uninitialized. It is OK to call multiple times.
 727  */
 728 void
 729 zfs_uninit_libshare(libzfs_handle_t *zhandle)
 730 {
 731         if (zhandle != NULL && zhandle->libzfs_sharehdl != NULL) {
 732                 if (_sa_fini != NULL)
 733                         _sa_fini(zhandle->libzfs_sharehdl);
 734                 zhandle->libzfs_sharehdl = NULL;
 735         }
 736 }
 737 
 738 /*
 739  * zfs_parse_options(options, proto)
 740  *
 741  * Call the legacy parse interface to get the protocol specific
 742  * options using the NULL arg to indicate that this is a "parse" only.
 743  */
 744 int
 745 zfs_parse_options(char *options, zfs_share_proto_t proto)
 746 {
 747         if (_sa_parse_legacy_options != NULL) {
 748                 return (_sa_parse_legacy_options(NULL, options,
 749                     proto_table[proto].p_name));
 750         }
 751         return (SA_CONFIG_ERR);
 752 }
 753 
 754 /*
 755  * zfs_sa_find_share(handle, path)
 756  *
 757  * wrapper around sa_find_share to find a share path in the
 758  * configuration.
 759  */
 760 static sa_share_t
 761 zfs_sa_find_share(sa_handle_t handle, char *path)
 762 {
 763         if (_sa_find_share != NULL)
 764                 return (_sa_find_share(handle, path));
 765         return (NULL);
 766 }
 767 
 768 /*
 769  * zfs_sa_enable_share(share, proto)
 770  *
 771  * Wrapper for sa_enable_share which enables a share for a specified
 772  * protocol.
 773  */
 774 static int
 775 zfs_sa_enable_share(sa_share_t share, char *proto)
 776 {
 777         if (_sa_enable_share != NULL)
 778                 return (_sa_enable_share(share, proto));
 779         return (SA_CONFIG_ERR);
 780 }
 781 
 782 /*
 783  * zfs_sa_disable_share(share, proto)
 784  *
 785  * Wrapper for sa_enable_share which disables a share for a specified
 786  * protocol.
 787  */
 788 static int
 789 zfs_sa_disable_share(sa_share_t share, char *proto)
 790 {
 791         if (_sa_disable_share != NULL)
 792                 return (_sa_disable_share(share, proto));
 793         return (SA_CONFIG_ERR);
 794 }
 795 
 796 /*
 797  * Share the given filesystem according to the options in the specified
 798  * protocol specific properties (sharenfs, sharesmb).  We rely
 799  * on "libshare" to the dirty work for us.
 800  */
 801 static int
 802 zfs_share_proto(zfs_handle_t *zhp, zfs_share_proto_t *proto)
 803 {
 804         char mountpoint[ZFS_MAXPROPLEN];
 805         char shareopts[ZFS_MAXPROPLEN];
 806         char sourcestr[ZFS_MAXPROPLEN];
 807         libzfs_handle_t *hdl = zhp->zfs_hdl;
 808         sa_share_t share;
 809         zfs_share_proto_t *curr_proto;
 810         zprop_source_t sourcetype;
 811         int service = SA_INIT_ONE_SHARE_FROM_HANDLE;
 812         int ret;
 813 
 814         if (!zfs_is_mountable(zhp, mountpoint, sizeof (mountpoint), NULL))
 815                 return (0);
 816 
 817         /*
 818          * Function may be called in a loop from higher up stack, with libshare
 819          * initialized for multiple shares (SA_INIT_SHARE_API_SELECTIVE).
 820          * zfs_init_libshare_arg will refresh the handle's cache if necessary.
 821          * In this case we do not want to switch to per share initialization.
 822          * Specify SA_INIT_SHARE_API to do full refresh, if refresh required.
 823          */
 824         if ((hdl->libzfs_sharehdl != NULL) && (_sa_service != NULL) &&
 825             (_sa_service(hdl->libzfs_sharehdl) ==
 826             SA_INIT_SHARE_API_SELECTIVE)) {
 827                 service = SA_INIT_SHARE_API;
 828         }
 829 
 830         for (curr_proto = proto; *curr_proto != PROTO_END; curr_proto++) {
 831                 /*
 832                  * Return success if there are no share options.
 833                  */
 834                 if (zfs_prop_get(zhp, proto_table[*curr_proto].p_prop,
 835                     shareopts, sizeof (shareopts), &sourcetype, sourcestr,
 836                     ZFS_MAXPROPLEN, B_FALSE) != 0 ||
 837                     strcmp(shareopts, "off") == 0)
 838                         continue;
 839                 ret = zfs_init_libshare_arg(hdl, service, zhp);
 840                 if (ret != SA_OK) {
 841                         (void) zfs_error_fmt(hdl, EZFS_SHARENFSFAILED,
 842                             dgettext(TEXT_DOMAIN, "cannot share '%s': %s"),
 843                             zfs_get_name(zhp), _sa_errorstr != NULL ?
 844                             _sa_errorstr(ret) : "");
 845                         return (-1);
 846                 }
 847 
 848                 share = zfs_sa_find_share(hdl->libzfs_sharehdl, mountpoint);
 849                 if (share == NULL) {
 850                         /*
 851                          * This may be a new file system that was just
 852                          * created so isn't in the internal cache.
 853                          * Rather than reloading the entire configuration,
 854                          * we can add just this one share to the cache.
 855                          */
 856                         if ((_sa_get_zfs_share == NULL) ||
 857                             (_sa_get_zfs_share(hdl->libzfs_sharehdl, "zfs", zhp)
 858                             != SA_OK)) {
 859                                 (void) zfs_error_fmt(hdl,
 860                                     proto_table[*curr_proto].p_share_err,
 861                                     dgettext(TEXT_DOMAIN, "cannot share '%s'"),
 862                                     zfs_get_name(zhp));
 863                                 return (-1);
 864                         }
 865                         share = zfs_sa_find_share(hdl->libzfs_sharehdl,
 866                             mountpoint);
 867                 }
 868                 if (share != NULL) {
 869                         int err;
 870                         err = zfs_sa_enable_share(share,
 871                             proto_table[*curr_proto].p_name);
 872                         if (err != SA_OK) {
 873                                 (void) zfs_error_fmt(hdl,
 874                                     proto_table[*curr_proto].p_share_err,
 875                                     dgettext(TEXT_DOMAIN, "cannot share '%s'"),
 876                                     zfs_get_name(zhp));
 877                                 return (-1);
 878                         }
 879                 } else {
 880                         (void) zfs_error_fmt(hdl,
 881                             proto_table[*curr_proto].p_share_err,
 882                             dgettext(TEXT_DOMAIN, "cannot share '%s'"),
 883                             zfs_get_name(zhp));
 884                         return (-1);
 885                 }
 886 
 887         }
 888         return (0);
 889 }
 890 
 891 
 892 int
 893 zfs_share_nfs(zfs_handle_t *zhp)
 894 {
 895         return (zfs_share_proto(zhp, nfs_only));
 896 }
 897 
 898 int
 899 zfs_share_smb(zfs_handle_t *zhp)
 900 {
 901         return (zfs_share_proto(zhp, smb_only));
 902 }
 903 
 904 int
 905 zfs_shareall(zfs_handle_t *zhp)
 906 {
 907         return (zfs_share_proto(zhp, share_all_proto));
 908 }
 909 
 910 /*
 911  * Unshare a filesystem by mountpoint.
 912  */
 913 static int
 914 unshare_one(libzfs_handle_t *hdl, const char *name, const char *mountpoint,
 915     zfs_share_proto_t proto)
 916 {
 917         sa_share_t share;
 918         int err;
 919         char *mntpt;
 920         int service = SA_INIT_ONE_SHARE_FROM_NAME;
 921 
 922         /*
 923          * Mountpoint could get trashed if libshare calls getmntany
 924          * which it does during API initialization, so strdup the
 925          * value.
 926          */
 927         mntpt = zfs_strdup(hdl, mountpoint);
 928 
 929         /*
 930          * Function may be called in a loop from higher up stack, with libshare
 931          * initialized for multiple shares (SA_INIT_SHARE_API_SELECTIVE).
 932          * zfs_init_libshare_arg will refresh the handle's cache if necessary.
 933          * In this case we do not want to switch to per share initialization.
 934          * Specify SA_INIT_SHARE_API to do full refresh, if refresh required.
 935          */
 936         if ((hdl->libzfs_sharehdl != NULL) && (_sa_service != NULL) &&
 937             (_sa_service(hdl->libzfs_sharehdl) ==
 938             SA_INIT_SHARE_API_SELECTIVE)) {
 939                 service = SA_INIT_SHARE_API;
 940         }
 941 
 942         err = zfs_init_libshare_arg(hdl, service, (void *)name);
 943         if (err != SA_OK) {
 944                 free(mntpt);    /* don't need the copy anymore */
 945                 return (zfs_error_fmt(hdl, proto_table[proto].p_unshare_err,
 946                     dgettext(TEXT_DOMAIN, "cannot unshare '%s': %s"),
 947                     name, _sa_errorstr(err)));
 948         }
 949 
 950         share = zfs_sa_find_share(hdl->libzfs_sharehdl, mntpt);
 951         free(mntpt);    /* don't need the copy anymore */
 952 
 953         if (share != NULL) {
 954                 err = zfs_sa_disable_share(share, proto_table[proto].p_name);
 955                 if (err != SA_OK) {
 956                         return (zfs_error_fmt(hdl,
 957                             proto_table[proto].p_unshare_err,
 958                             dgettext(TEXT_DOMAIN, "cannot unshare '%s': %s"),
 959                             name, _sa_errorstr(err)));
 960                 }
 961         } else {
 962                 return (zfs_error_fmt(hdl, proto_table[proto].p_unshare_err,
 963                     dgettext(TEXT_DOMAIN, "cannot unshare '%s': not found"),
 964                     name));
 965         }
 966         return (0);
 967 }
 968 
 969 /*
 970  * Unshare the given filesystem.
 971  */
 972 int
 973 zfs_unshare_proto(zfs_handle_t *zhp, const char *mountpoint,
 974     zfs_share_proto_t *proto)
 975 {
 976         libzfs_handle_t *hdl = zhp->zfs_hdl;
 977         struct mnttab entry;
 978         char *mntpt = NULL;
 979 
 980         /* check to see if need to unmount the filesystem */
 981         rewind(zhp->zfs_hdl->libzfs_mnttab);
 982         if (mountpoint != NULL)
 983                 mountpoint = mntpt = zfs_strdup(hdl, mountpoint);
 984 
 985         if (mountpoint != NULL || ((zfs_get_type(zhp) == ZFS_TYPE_FILESYSTEM) &&
 986             libzfs_mnttab_find(hdl, zfs_get_name(zhp), &entry) == 0)) {
 987                 zfs_share_proto_t *curr_proto;
 988 
 989                 if (mountpoint == NULL)
 990                         mntpt = zfs_strdup(zhp->zfs_hdl, entry.mnt_mountp);
 991 
 992                 for (curr_proto = proto; *curr_proto != PROTO_END;
 993                     curr_proto++) {
 994 
 995                         if (is_shared(hdl, mntpt, *curr_proto) &&
 996                             unshare_one(hdl, zhp->zfs_name,
 997                             mntpt, *curr_proto) != 0) {
 998                                 if (mntpt != NULL)
 999                                         free(mntpt);
1000                                 return (-1);
1001                         }
1002                 }
1003         }
1004         if (mntpt != NULL)
1005                 free(mntpt);
1006 
1007         return (0);
1008 }
1009 
1010 int
1011 zfs_unshare_nfs(zfs_handle_t *zhp, const char *mountpoint)
1012 {
1013         return (zfs_unshare_proto(zhp, mountpoint, nfs_only));
1014 }
1015 
1016 int
1017 zfs_unshare_smb(zfs_handle_t *zhp, const char *mountpoint)
1018 {
1019         return (zfs_unshare_proto(zhp, mountpoint, smb_only));
1020 }
1021 
1022 /*
1023  * Same as zfs_unmountall(), but for NFS and SMB unshares.
1024  */
1025 int
1026 zfs_unshareall_proto(zfs_handle_t *zhp, zfs_share_proto_t *proto)
1027 {
1028         prop_changelist_t *clp;
1029         int ret;
1030 
1031         clp = changelist_gather(zhp, ZFS_PROP_SHARENFS, 0, 0);
1032         if (clp == NULL)
1033                 return (-1);
1034 
1035         ret = changelist_unshare(clp, proto);
1036         changelist_free(clp);
1037 
1038         return (ret);
1039 }
1040 
1041 int
1042 zfs_unshareall_nfs(zfs_handle_t *zhp)
1043 {
1044         return (zfs_unshareall_proto(zhp, nfs_only));
1045 }
1046 
1047 int
1048 zfs_unshareall_smb(zfs_handle_t *zhp)
1049 {
1050         return (zfs_unshareall_proto(zhp, smb_only));
1051 }
1052 
1053 int
1054 zfs_unshareall(zfs_handle_t *zhp)
1055 {
1056         return (zfs_unshareall_proto(zhp, share_all_proto));
1057 }
1058 
1059 int
1060 zfs_unshareall_bypath(zfs_handle_t *zhp, const char *mountpoint)
1061 {
1062         return (zfs_unshare_proto(zhp, mountpoint, share_all_proto));
1063 }
1064 
1065 /*
1066  * Remove the mountpoint associated with the current dataset, if necessary.
1067  * We only remove the underlying directory if:
1068  *
1069  *      - The mountpoint is not 'none' or 'legacy'
1070  *      - The mountpoint is non-empty
1071  *      - The mountpoint is the default or inherited
1072  *      - The 'zoned' property is set, or we're in a local zone
1073  *
1074  * Any other directories we leave alone.
1075  */
1076 void
1077 remove_mountpoint(zfs_handle_t *zhp)
1078 {
1079         char mountpoint[ZFS_MAXPROPLEN];
1080         zprop_source_t source;
1081 
1082         if (!zfs_is_mountable(zhp, mountpoint, sizeof (mountpoint),
1083             &source))
1084                 return;
1085 
1086         if (source == ZPROP_SRC_DEFAULT ||
1087             source == ZPROP_SRC_INHERITED) {
1088                 /*
1089                  * Try to remove the directory, silently ignoring any errors.
1090                  * The filesystem may have since been removed or moved around,
1091                  * and this error isn't really useful to the administrator in
1092                  * any way.
1093                  */
1094                 (void) rmdir(mountpoint);
1095         }
1096 }
1097 
1098 void
1099 libzfs_add_handle(get_all_cb_t *cbp, zfs_handle_t *zhp)
1100 {
1101         if (cbp->cb_alloc == cbp->cb_used) {
1102                 size_t newsz;
1103                 void *ptr;
1104 
1105                 newsz = cbp->cb_alloc ? cbp->cb_alloc * 2 : 64;
1106                 ptr = zfs_realloc(zhp->zfs_hdl,
1107                     cbp->cb_handles, cbp->cb_alloc * sizeof (void *),
1108                     newsz * sizeof (void *));
1109                 cbp->cb_handles = ptr;
1110                 cbp->cb_alloc = newsz;
1111         }
1112         cbp->cb_handles[cbp->cb_used++] = zhp;
1113 }
1114 
1115 static int
1116 mount_cb(zfs_handle_t *zhp, void *data)
1117 {
1118         get_all_cb_t *cbp = data;
1119 
1120         if (!(zfs_get_type(zhp) & ZFS_TYPE_FILESYSTEM)) {
1121                 zfs_close(zhp);
1122                 return (0);
1123         }
1124 
1125         if (zfs_prop_get_int(zhp, ZFS_PROP_CANMOUNT) == ZFS_CANMOUNT_NOAUTO) {
1126                 zfs_close(zhp);
1127                 return (0);
1128         }
1129 
1130         /*
1131          * If this filesystem is inconsistent and has a receive resume
1132          * token, we can not mount it.
1133          */
1134         if (zfs_prop_get_int(zhp, ZFS_PROP_INCONSISTENT) &&
1135             zfs_prop_get(zhp, ZFS_PROP_RECEIVE_RESUME_TOKEN,
1136             NULL, 0, NULL, NULL, 0, B_TRUE) == 0) {
1137                 zfs_close(zhp);
1138                 return (0);
1139         }
1140 
1141         libzfs_add_handle(cbp, zhp);
1142         if (zfs_iter_filesystems(zhp, mount_cb, cbp) != 0) {
1143                 zfs_close(zhp);
1144                 return (-1);
1145         }
1146         return (0);
1147 }
1148 
1149 int
1150 libzfs_dataset_cmp(const void *a, const void *b)
1151 {
1152         zfs_handle_t **za = (zfs_handle_t **)a;
1153         zfs_handle_t **zb = (zfs_handle_t **)b;
1154         char mounta[MAXPATHLEN];
1155         char mountb[MAXPATHLEN];
1156         boolean_t gota, gotb;
1157 
1158         if ((gota = (zfs_get_type(*za) == ZFS_TYPE_FILESYSTEM)) != 0)
1159                 verify(zfs_prop_get(*za, ZFS_PROP_MOUNTPOINT, mounta,
1160                     sizeof (mounta), NULL, NULL, 0, B_FALSE) == 0);
1161         if ((gotb = (zfs_get_type(*zb) == ZFS_TYPE_FILESYSTEM)) != 0)
1162                 verify(zfs_prop_get(*zb, ZFS_PROP_MOUNTPOINT, mountb,
1163                     sizeof (mountb), NULL, NULL, 0, B_FALSE) == 0);
1164 
1165         if (gota && gotb)
1166                 return (strcmp(mounta, mountb));
1167 
1168         if (gota)
1169                 return (-1);
1170         if (gotb)
1171                 return (1);
1172 
1173         return (strcmp(zfs_get_name(a), zfs_get_name(b)));
1174 }
1175 
1176 static int
1177 mountpoint_compare(const void *a, const void *b)
1178 {
1179         const char *mounta = *((char **)a);
1180         const char *mountb = *((char **)b);
1181 
1182         return (strcmp(mountb, mounta));
1183 }
1184 
1185 typedef enum {
1186         TASK_TO_PROCESS,
1187         TASK_IN_PROCESSING,
1188         TASK_DONE,
1189         TASK_MAX
1190 } task_state_t;
1191 
1192 typedef struct mount_task {
1193         const char      *mp;
1194         zfs_handle_t    *zh;
1195         task_state_t    state;
1196         int             error;
1197 } mount_task_t;
1198 
1199 typedef struct mount_task_q {
1200         pthread_mutex_t q_lock;
1201         libzfs_handle_t *hdl;
1202         const char      *mntopts;
1203         const char      *error_mp;
1204         zfs_handle_t    *error_zh;
1205         int             error;
1206         int             q_length;
1207         int             n_tasks;
1208         int             flags;
1209         mount_task_t    task[1];
1210 } mount_task_q_t;
1211 
1212 static int
1213 mount_task_q_init(int argc, zfs_handle_t **handles, const char *mntopts,
1214     int flags, mount_task_q_t **task)
1215 {
1216         mount_task_q_t *task_q;
1217         int i, error;
1218         size_t task_q_size;
1219 
1220         *task = NULL;
1221         /* nothing to do ? should not be here */
1222         if (argc <= 0)
1223                 return (EINVAL);
1224 
1225         /* allocate and init task_q */
1226         task_q_size = sizeof (mount_task_q_t) +
1227             (argc - 1) * sizeof (mount_task_t);
1228         task_q = calloc(task_q_size, 1);
1229         if (task_q == NULL)
1230                 return (ENOMEM);
1231 
1232         if ((error = pthread_mutex_init(&task_q->q_lock, NULL)) != 0) {
1233                 free(task_q);
1234                 return (error);
1235         }
1236         task_q->q_length = argc;
1237         task_q->n_tasks = argc;
1238         task_q->flags = flags;
1239         task_q->mntopts = mntopts;
1240 
1241         /* we are not going to change the strings, so no need to strdup */
1242         for (i = 0; i < argc; ++i) {
1243                 task_q->task[i].zh = handles[i];
1244                 task_q->task[i].state = TASK_TO_PROCESS;
1245                 task_q->error = 0;
1246         }
1247 
1248         *task = task_q;
1249         return (0);
1250 }
1251 
1252 static int
1253 umount_task_q_init(int argc, const char **argv, int flags,
1254     libzfs_handle_t *hdl, mount_task_q_t **task)
1255 {
1256         mount_task_q_t *task_q;
1257         int i, error;
1258         size_t task_q_size;
1259 
1260         *task = NULL;
1261         /* nothing to do ? should not be here */
1262         if (argc <= 0)
1263                 return (EINVAL);
1264 
1265         /* allocate and init task_q */
1266         task_q_size = sizeof (mount_task_q_t) +
1267             (argc - 1) * sizeof (mount_task_t);
1268         task_q = calloc(task_q_size, 1);
1269         if (task_q == NULL)
1270                 return (ENOMEM);
1271 
1272         if ((error = pthread_mutex_init(&task_q->q_lock, NULL)) != 0) {
1273                 free(task_q);
1274                 return (error);
1275         }
1276         task_q->hdl = hdl;
1277         task_q->q_length = argc;
1278         task_q->n_tasks = argc;
1279         task_q->flags = flags;
1280 
1281         /* we are not going to change the strings, so no need to strdup */
1282         for (i = 0; i < argc; ++i) {
1283                 task_q->task[i].mp = argv[i];
1284                 task_q->task[i].state = TASK_TO_PROCESS;
1285                 task_q->error = 0;
1286         }
1287 
1288         *task = task_q;
1289         return (0);
1290 }
1291 
1292 static void
1293 mount_task_q_fini(mount_task_q_t *task_q)
1294 {
1295         assert(task_q != NULL);
1296         (void) pthread_mutex_destroy(&task_q->q_lock);
1297         free(task_q);
1298 }
1299 
1300 static int
1301 is_child_of(const char *s1, const char *s2)
1302 {
1303         for (; *s1 && *s2 && (*s1 == *s2); ++s1, ++s2)
1304                 ;
1305         return (!*s2 && (*s1 == '/'));
1306 }
1307 
1308 static boolean_t
1309 task_completed(int ind, mount_task_q_t *task_q)
1310 {
1311         return (task_q->task[ind].state == TASK_DONE);
1312 }
1313 
1314 static boolean_t
1315 task_to_process(int ind, mount_task_q_t *task_q)
1316 {
1317         return (task_q->task[ind].state == TASK_TO_PROCESS);
1318 }
1319 
1320 static boolean_t
1321 task_in_processing(int ind, mount_task_q_t *task_q)
1322 {
1323         return (task_q->task[ind].state == TASK_IN_PROCESSING);
1324 }
1325 
1326 static void
1327 task_next_stage(int ind, mount_task_q_t *task_q)
1328 {
1329         /* our state machine is a pipeline */
1330         task_q->task[ind].state++;
1331         assert(task_q->task[ind].state < TASK_MAX);
1332 }
1333 
1334 static boolean_t
1335 task_state_valid(int ind, mount_task_q_t *task_q)
1336 {
1337         /* our state machine is a pipeline */
1338         return (task_q->task[ind].state < TASK_MAX);
1339 }
1340 
1341 static boolean_t
1342 child_umount_pending(int ind, mount_task_q_t *task_q)
1343 {
1344         int i;
1345         for (i = ind-1; i >= 0; --i) {
1346                 assert(task_state_valid(i, task_q));
1347                 if ((task_q->task[i].state != TASK_DONE) &&
1348                     is_child_of(task_q->task[i].mp, task_q->task[ind].mp))
1349                         return (B_TRUE);
1350         }
1351 
1352         return (B_FALSE);
1353 }
1354 
1355 static boolean_t
1356 parent_mount_pending(int ind, mount_task_q_t *task_q)
1357 {
1358         int i;
1359         for (i = ind-1; i >= 0; --i) {
1360                 assert(task_state_valid(i, task_q));
1361                 if ((task_q->task[i].state != TASK_DONE) &&
1362                     is_child_of(task_q->task[ind].zh->zfs_name,
1363                     task_q->task[i].zh->zfs_name))
1364                         return (B_TRUE);
1365         }
1366 
1367         return (B_FALSE);
1368 }
1369 
1370 static void
1371 unmounter(void *arg)
1372 {
1373         mount_task_q_t *task_q = (mount_task_q_t *)arg;
1374         int error = 0, done = 0;
1375 
1376         assert(task_q != NULL);
1377         if (task_q == NULL)
1378                 return;
1379 
1380         while (!error && !done) {
1381                 mount_task_t *task;
1382                 int i, t, umount_err, flags, q_error;
1383 
1384                 if ((error = pthread_mutex_lock(&task_q->q_lock)) != 0)
1385                         break; /* Out of while() loop */
1386 
1387                 if (task_q->error || task_q->n_tasks == 0) {
1388                         (void) pthread_mutex_unlock(&task_q->q_lock);
1389                         break; /* Out of while() loop */
1390                 }
1391 
1392                 /* Find task ready for processing */
1393                 for (i = 0, task = NULL, t = -1; i < task_q->q_length; ++i) {
1394                         if (task_q->error) {
1395                                 /* Fatal error, stop processing */
1396                                 done = 1;
1397                                 break; /* Out of for() loop */
1398                         }
1399 
1400                         if (task_completed(i, task_q))
1401                                 continue; /* for() loop */
1402 
1403                         if (task_to_process(i, task_q)) {
1404                                 /*
1405                                  * Cannot umount if some children are still
1406                                  * mounted; come back later
1407                                  */
1408                                 if ((child_umount_pending(i, task_q)))
1409                                         continue; /* for() loop */
1410                                 /* Should be OK to unmount now */
1411                                 task_next_stage(i, task_q);
1412                                 task = &task_q->task[i];
1413                                 t = i;
1414                                 break; /* Out of for() loop */
1415                         }
1416 
1417                         /* Otherwise, the task is already in processing */
1418                         assert(task_in_processing(i, task_q));
1419                 }
1420 
1421                 flags = task_q->flags;
1422 
1423                 error = pthread_mutex_unlock(&task_q->q_lock);
1424 
1425                 if (done || (task == NULL) || error || task_q->error)
1426                         break; /* Out of while() loop */
1427 
1428                 umount_err = umount2(task->mp, flags);
1429                 q_error = errno;
1430 
1431                 if ((error = pthread_mutex_lock(&task_q->q_lock)) != 0)
1432                         break; /* Out of while() loop */
1433 
1434                 /* done processing */
1435                 assert(t >= 0 && t < task_q->q_length);
1436                 task_next_stage(t, task_q);
1437                 assert(task_completed(t, task_q));
1438                 task_q->n_tasks--;
1439 
1440                 if (umount_err) {
1441                         /*
1442                          * umount2() failed, cannot be busy because of mounted
1443                          * children - we have checked above, so it is fatal
1444                          */
1445                         assert(child_umount_pending(t, task_q) == B_FALSE);
1446                         task->error = q_error;
1447                         if (!task_q->error) {
1448                                 task_q->error = task->error;
1449                                 task_q->error_mp = task->mp;
1450                         }
1451                         done = 1;
1452                 }
1453 
1454                 if ((error = pthread_mutex_unlock(&task_q->q_lock)) != 0)
1455                         break; /* Out of while() loop */
1456         }
1457 }
1458 
1459 static void
1460 mounter(void *arg)
1461 {
1462         mount_task_q_t *task_q = (mount_task_q_t *)arg;
1463         int error = 0, done = 0;
1464 
1465         assert(task_q != NULL);
1466         if (task_q == NULL)
1467                 return;
1468 
1469         while (!error && !done) {
1470                 mount_task_t *task;
1471                 int i, t, mount_err, flags, q_error;
1472                 const char *mntopts;
1473 
1474                 if ((error = pthread_mutex_lock(&task_q->q_lock)) != 0)
1475                         break; /* Out of while() loop */
1476 
1477                 if (task_q->error || task_q->n_tasks == 0) {
1478                         (void) pthread_mutex_unlock(&task_q->q_lock);
1479                         break; /* Out of while() loop */
1480                 }
1481 
1482                 /* Find task ready for processing */
1483                 for (i = 0, task = NULL, t = -1; i < task_q->q_length; ++i) {
1484                         if (task_q->error) {
1485                                 /* Fatal error, stop processing */
1486                                 done = 1;
1487                                 break; /* Out of for() loop */
1488                         }
1489 
1490                         if (task_completed(i, task_q))
1491                                 continue; /* for() loop */
1492 
1493                         if (task_to_process(i, task_q)) {
1494                                 /*
1495                                  * Cannot mount if some parents are not
1496                                  * mounted yet; come back later
1497                                  */
1498                                 if ((parent_mount_pending(i, task_q)))
1499                                         continue; /* for() loop */
1500                                 /* Should be OK to mount now */
1501                                 task_next_stage(i, task_q);
1502                                 task = &task_q->task[i];
1503                                 t = i;
1504                                 break; /* Out of for() loop */
1505                         }
1506 
1507                         /* Otherwise, the task is already in processing */
1508                         assert(task_in_processing(i, task_q));
1509                 }
1510 
1511                 flags = task_q->flags;
1512                 mntopts = task_q->mntopts;
1513 
1514                 error = pthread_mutex_unlock(&task_q->q_lock);
1515 
1516                 if (done || (task == NULL) || error || task_q->error)
1517                         break; /* Out of while() loop */
1518 
1519                 mount_err = zfs_mount(task->zh, mntopts, flags);
1520                 q_error = errno;
1521 
1522                 if ((error = pthread_mutex_lock(&task_q->q_lock)) != 0)
1523                         break; /* Out of while() loop */
1524 
1525                 /* done processing */
1526                 assert(t >= 0 && t < task_q->q_length);
1527                 task_next_stage(t, task_q);
1528                 assert(task_completed(t, task_q));
1529                 task_q->n_tasks--;
1530 
1531                 if (mount_err) {
1532                         task->error = q_error;
1533                         if (!task_q->error) {
1534                                 task_q->error = task->error;
1535                                 task_q->error_zh = task->zh;
1536                         }
1537                         done = 1;
1538                 }
1539 
1540                 if ((error = pthread_mutex_unlock(&task_q->q_lock)) != 0)
1541                         break; /* Out of while() loop */
1542         }
1543 }
1544 
1545 #define THREADS_HARD_LIMIT      128
1546 int parallel_unmount(libzfs_handle_t *hdl, int argc, const char **argv,
1547     int flags, int n_threads)
1548 {
1549         mount_task_q_t *task_queue = NULL;
1550         int             i, error;
1551         tpool_t         *t;
1552 
1553         if (argc == 0)
1554                 return (0);
1555 
1556         if ((error = umount_task_q_init(argc, argv, flags, hdl, &task_queue))
1557             != 0) {
1558                 assert(task_queue == NULL);
1559                 return (error);
1560         }
1561 
1562         if (n_threads > argc)
1563                 n_threads = argc;
1564 
1565         if (n_threads > THREADS_HARD_LIMIT)
1566                 n_threads = THREADS_HARD_LIMIT;
1567 
1568         t = tpool_create(1, n_threads, 0, NULL);
1569 
1570         for (i = 0; i < n_threads; ++i)
1571                 (void) tpool_dispatch(t, unmounter, task_queue);
1572 
1573         tpool_wait(t);
1574         tpool_destroy(t);
1575 
1576         if (task_queue->error) {
1577                 /*
1578                  * Tell ZFS!
1579                  */
1580                 zfs_error_aux(hdl,
1581                     strerror(error ? error : task_queue->error));
1582                 error = zfs_error_fmt(hdl, EZFS_UMOUNTFAILED,
1583                     dgettext(TEXT_DOMAIN, "cannot unmount '%s'"),
1584                     error ? "datasets" : task_queue->error_mp);
1585         }
1586         if (task_queue)
1587                 mount_task_q_fini(task_queue);
1588 
1589         return (error);
1590 }
1591 
1592 int parallel_mount(get_all_cb_t *cb, int *good, const char *mntopts,
1593     int flags, int n_threads)
1594 {
1595         int             i, error = 0;
1596         mount_task_q_t  *task_queue = NULL;
1597         tpool_t         *t;
1598 
1599         if (cb->cb_used == 0)
1600                 return (0);
1601 
1602         if (n_threads > cb->cb_used)
1603                 n_threads = cb->cb_used;
1604 
1605         if ((error = mount_task_q_init(cb->cb_used, cb->cb_handles,
1606             mntopts, flags, &task_queue)) != 0) {
1607                 assert(task_queue == NULL);
1608                 return (error);
1609         }
1610 
1611         t = tpool_create(1, n_threads, 0, NULL);
1612 
1613         for (i = 0; i < n_threads; ++i)
1614                 (void) tpool_dispatch(t, mounter, task_queue);
1615 
1616         tpool_wait(t);
1617         for (i = 0; i < cb->cb_used; ++i) {
1618                 good[i] = !task_queue->task[i].error;
1619                 if (!good[i]) {
1620                         zfs_handle_t *hdl = task_queue->error_zh;
1621                         zfs_error_aux(hdl->zfs_hdl,
1622                             strerror(task_queue->task[i].error));
1623                         (void) zfs_error_fmt(hdl->zfs_hdl, EZFS_MOUNTFAILED,
1624                             dgettext(TEXT_DOMAIN, "cannot mount '%s'"),
1625                             task_queue->task[i].zh->zfs_name);
1626                 }
1627         }
1628         tpool_destroy(t);
1629 
1630         if (task_queue->error) {
1631                 zfs_handle_t *hdl = task_queue->error_zh;
1632                 /*
1633                  * Tell ZFS!
1634                  */
1635                 zfs_error_aux(hdl->zfs_hdl,
1636                     strerror(error ? error : task_queue->error));
1637                 error = zfs_error_fmt(hdl->zfs_hdl, EZFS_MOUNTFAILED,
1638                     dgettext(TEXT_DOMAIN, "cannot mount '%s'"),
1639                     error ? "datasets" : hdl->zfs_name);
1640         }
1641         if (task_queue)
1642                 mount_task_q_fini(task_queue);
1643 
1644         return (error);
1645 }
1646 
1647 int
1648 zpool_enable_datasets_ex(zpool_handle_t *zhp, const char *mntopts, int flags,
1649     int n_threads)
1650 {
1651         get_all_cb_t cb = { 0 };
1652         libzfs_handle_t *hdl = zhp->zpool_hdl;
1653         zfs_handle_t *zfsp;
1654         int i, ret = -1;
1655         int *good;
1656         sa_init_selective_arg_t sharearg;
1657 
1658         /*
1659          * Gather all non-snap datasets within the pool.
1660          */
1661         if ((zfsp = zfs_open(hdl, zhp->zpool_name, ZFS_TYPE_DATASET)) == NULL)
1662                 goto out;
1663 
1664         libzfs_add_handle(&cb, zfsp);
1665         if (zfs_iter_filesystems(zfsp, mount_cb, &cb) != 0)
1666                 goto out;
1667         /*
1668          * Sort the datasets by mountpoint.
1669          */
1670         qsort(cb.cb_handles, cb.cb_used, sizeof (void *),
1671             libzfs_dataset_cmp);
1672 
1673         /*
1674          * And mount all the datasets, keeping track of which ones
1675          * succeeded or failed.
1676          */
1677         if ((good = zfs_alloc(zhp->zpool_hdl,
1678             cb.cb_used * sizeof (int))) == NULL)
1679                 goto out;
1680 
1681         ret = 0;
1682         if (n_threads < 2) {
1683                 for (i = 0; i < cb.cb_used; i++) {
1684                         if (zfs_mount(cb.cb_handles[i], mntopts, flags) != 0)
1685                                 ret = -1;
1686                         else
1687                                 good[i] = 1;
1688                 }
1689         } else {
1690                 ret = parallel_mount(&cb, good, mntopts, flags, n_threads);
1691         }
1692 
1693         /*
1694          * Initilialize libshare SA_INIT_SHARE_API_SELECTIVE here
1695          * to avoid unneccesary load/unload of the libshare API
1696          * per shared dataset downstream.
1697          */
1698         sharearg.zhandle_arr = cb.cb_handles;
1699         sharearg.zhandle_len = cb.cb_used;
1700         ret = zfs_init_libshare_arg(hdl, SA_INIT_SHARE_API_SELECTIVE,
1701             &sharearg);
1702         if (ret != 0) {
1703                 free(good);
1704                 goto out;
1705         }
1706 
1707         /*
1708          * Then share all the ones that need to be shared. This needs
1709          * to be a separate pass in order to avoid excessive reloading
1710          * of the configuration. Good should never be NULL since
1711          * zfs_alloc is supposed to exit if memory isn't available.
1712          */
1713         for (i = 0; i < cb.cb_used; i++) {
1714                 if (good[i] && zfs_share(cb.cb_handles[i]) != 0)
1715                         ret = -1;
1716         }
1717 
1718         free(good);
1719 
1720 out:
1721         for (i = 0; i < cb.cb_used; i++)
1722                 zfs_close(cb.cb_handles[i]);
1723         free(cb.cb_handles);
1724 
1725         return (ret);
1726 }
1727 
1728 int
1729 zpool_disable_datasets_ex(zpool_handle_t *zhp, boolean_t force, int n_threads)
1730 {
1731         int used, alloc;
1732         struct mnttab entry;
1733         size_t namelen;
1734         char **mountpoints = NULL;
1735         zfs_handle_t **datasets = NULL;
1736         libzfs_handle_t *hdl = zhp->zpool_hdl;
1737         int i;
1738         int ret = -1;
1739         int flags = (force ? MS_FORCE : 0);
1740         sa_init_selective_arg_t sharearg;
1741 
1742         namelen = strlen(zhp->zpool_name);
1743 
1744         rewind(hdl->libzfs_mnttab);
1745         used = alloc = 0;
1746         while (getmntent(hdl->libzfs_mnttab, &entry) == 0) {
1747                 /*
1748                  * Ignore non-ZFS entries.
1749                  */
1750                 if (entry.mnt_fstype == NULL ||
1751                     strcmp(entry.mnt_fstype, MNTTYPE_ZFS) != 0)
1752                         continue;
1753 
1754                 /*
1755                  * Ignore filesystems not within this pool.
1756                  */
1757                 if (entry.mnt_mountp == NULL ||
1758                     strncmp(entry.mnt_special, zhp->zpool_name, namelen) != 0 ||
1759                     (entry.mnt_special[namelen] != '/' &&
1760                     entry.mnt_special[namelen] != '\0'))
1761                         continue;
1762 
1763                 /*
1764                  * At this point we've found a filesystem within our pool.  Add
1765                  * it to our growing list.
1766                  */
1767                 if (used == alloc) {
1768                         if (alloc == 0) {
1769                                 if ((mountpoints = zfs_alloc(hdl,
1770                                     8 * sizeof (void *))) == NULL)
1771                                         goto out;
1772 
1773                                 if ((datasets = zfs_alloc(hdl,
1774                                     8 * sizeof (void *))) == NULL)
1775                                         goto out;
1776 
1777                                 alloc = 8;
1778                         } else {
1779                                 void *ptr;
1780 
1781                                 if ((ptr = zfs_realloc(hdl, mountpoints,
1782                                     alloc * sizeof (void *),
1783                                     alloc * 2 * sizeof (void *))) == NULL)
1784                                         goto out;
1785                                 mountpoints = ptr;
1786 
1787                                 if ((ptr = zfs_realloc(hdl, datasets,
1788                                     alloc * sizeof (void *),
1789                                     alloc * 2 * sizeof (void *))) == NULL)
1790                                         goto out;
1791                                 datasets = ptr;
1792 
1793                                 alloc *= 2;
1794                         }
1795                 }
1796 
1797                 if ((mountpoints[used] = zfs_strdup(hdl,
1798                     entry.mnt_mountp)) == NULL)
1799                         goto out;
1800 
1801                 /*
1802                  * This is allowed to fail, in case there is some I/O error.  It
1803                  * is only used to determine if we need to remove the underlying
1804                  * mountpoint, so failure is not fatal.
1805                  */
1806                 datasets[used] = make_dataset_handle(hdl, entry.mnt_special);
1807 
1808                 used++;
1809         }
1810 
1811         /*
1812          * At this point, we have the entire list of filesystems, so sort it by
1813          * mountpoint.
1814          */
1815         sharearg.zhandle_arr = datasets;
1816         sharearg.zhandle_len = used;
1817         ret = zfs_init_libshare_arg(hdl, SA_INIT_SHARE_API_SELECTIVE,
1818             &sharearg);
1819         if (ret != 0)
1820                 goto out;
1821         qsort(mountpoints, used, sizeof (char *), mountpoint_compare);
1822 
1823         /*
1824          * Walk through and first unshare everything.
1825          */
1826         for (i = 0; i < used; i++) {
1827                 zfs_share_proto_t *curr_proto;
1828                 for (curr_proto = share_all_proto; *curr_proto != PROTO_END;
1829                     curr_proto++) {
1830                         if (is_shared(hdl, mountpoints[i], *curr_proto) &&
1831                             unshare_one(hdl, mountpoints[i], mountpoints[i],
1832                             *curr_proto) != 0)
1833                                 goto out;
1834                 }
1835         }
1836 
1837         /*
1838          * Now unmount everything, removing the underlying directories as
1839          * appropriate.
1840          */
1841         if (n_threads < 2) {
1842                 for (i = 0; i < used; i++) {
1843                         if (unmount_one(hdl, mountpoints[i], flags) != 0)
1844                                 goto out;
1845                 }
1846         } else {
1847                 if (parallel_unmount(hdl, used, (const char **)mountpoints,
1848                     flags, n_threads) != 0)
1849                         goto out;
1850         }
1851         for (i = 0; i < used; i++) {
1852                 if (datasets[i])
1853                         remove_mountpoint(datasets[i]);
1854         }
1855         ret = 0;
1856 out:
1857         for (i = 0; i < used; i++) {
1858                 if (datasets[i])
1859                         zfs_close(datasets[i]);
1860                 free(mountpoints[i]);
1861         }
1862         free(datasets);
1863         free(mountpoints);
1864 
1865         return (ret);
1866 }
1867 
1868 /*
1869  * Mount and share all datasets within the given pool.  This assumes that no
1870  * datasets within the pool are currently mounted.  Because users can create
1871  * complicated nested hierarchies of mountpoints, we first gather all the
1872  * datasets and mountpoints within the pool, and sort them by mountpoint.  Once
1873  * we have the list of all filesystems, we iterate over them in order and mount
1874  * and/or share each one.
1875  */
1876 #pragma weak zpool_mount_datasets = zpool_enable_datasets
1877 int
1878 zpool_enable_datasets(zpool_handle_t *zhp, const char *mntopts, int flags)
1879 {
1880         return (zpool_enable_datasets_ex(zhp, mntopts, flags, 1));
1881 }
1882 
1883 /* alias for 2002/240 */
1884 #pragma weak zpool_unmount_datasets = zpool_disable_datasets
1885 /*
1886  * Unshare and unmount all datasets within the given pool.  We don't want to
1887  * rely on traversing the DSL to discover the filesystems within the pool,
1888  * because this may be expensive (if not all of them are mounted), and can fail
1889  * arbitrarily (on I/O error, for example).  Instead, we walk /etc/mnttab and
1890  * gather all the filesystems that are currently mounted.
1891  */
1892 int
1893 zpool_disable_datasets(zpool_handle_t *zhp, boolean_t force)
1894 {
1895         return (zpool_disable_datasets_ex(zhp, force, 1));
1896 }